Merged with mainline at revision 128810.
[official-gcc.git] / gcc / config / i386 / i386.c
blobe208fb4de4f89ab5787f96717c16ad041e789d7f
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49 #include "tree-gimple.h"
50 #include "dwarf2.h"
51 #include "df.h"
52 #include "tm-constrs.h"
53 #include "params.h"
55 static int x86_builtin_vectorization_cost (bool);
57 #ifndef CHECK_STACK_LIMIT
58 #define CHECK_STACK_LIMIT (-1)
59 #endif
61 /* Return index of given mode in mult and division cost tables. */
62 #define MODE_INDEX(mode) \
63 ((mode) == QImode ? 0 \
64 : (mode) == HImode ? 1 \
65 : (mode) == SImode ? 2 \
66 : (mode) == DImode ? 3 \
67 : 4)
69 /* Processor costs (relative to an add) */
70 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
71 #define COSTS_N_BYTES(N) ((N) * 2)
73 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
75 static const
76 struct processor_costs size_cost = { /* costs for tuning for size */
77 COSTS_N_BYTES (2), /* cost of an add instruction */
78 COSTS_N_BYTES (3), /* cost of a lea instruction */
79 COSTS_N_BYTES (2), /* variable shift costs */
80 COSTS_N_BYTES (3), /* constant shift costs */
81 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 0, /* cost of multiply per each bit set */
87 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
88 COSTS_N_BYTES (3), /* HI */
89 COSTS_N_BYTES (3), /* SI */
90 COSTS_N_BYTES (3), /* DI */
91 COSTS_N_BYTES (5)}, /* other */
92 COSTS_N_BYTES (3), /* cost of movsx */
93 COSTS_N_BYTES (3), /* cost of movzx */
94 0, /* "large" insn */
95 2, /* MOVE_RATIO */
96 2, /* cost for loading QImode using movzbl */
97 {2, 2, 2}, /* cost of loading integer registers
98 in QImode, HImode and SImode.
99 Relative to reg-reg move (2). */
100 {2, 2, 2}, /* cost of storing integer registers */
101 2, /* cost of reg,reg fld/fst */
102 {2, 2, 2}, /* cost of loading fp registers
103 in SFmode, DFmode and XFmode */
104 {2, 2, 2}, /* cost of storing fp registers
105 in SFmode, DFmode and XFmode */
106 3, /* cost of moving MMX register */
107 {3, 3}, /* cost of loading MMX registers
108 in SImode and DImode */
109 {3, 3}, /* cost of storing MMX registers
110 in SImode and DImode */
111 3, /* cost of moving SSE register */
112 {3, 3, 3}, /* cost of loading SSE registers
113 in SImode, DImode and TImode */
114 {3, 3, 3}, /* cost of storing SSE registers
115 in SImode, DImode and TImode */
116 3, /* MMX or SSE register to integer */
117 0, /* size of l1 cache */
118 0, /* size of l2 cache */
119 0, /* size of prefetch block */
120 0, /* number of parallel prefetches */
121 2, /* Branch cost */
122 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
123 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
124 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
125 COSTS_N_BYTES (2), /* cost of FABS instruction. */
126 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
127 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
130 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
132 1, /* scalar_stmt_cost. */
133 1, /* scalar load_cost. */
134 1, /* scalar_store_cost. */
135 1, /* vec_stmt_cost. */
136 1, /* vec_to_scalar_cost. */
137 1, /* scalar_to_vec_cost. */
138 1, /* vec_align_load_cost. */
139 1, /* vec_unalign_load_cost. */
140 1, /* vec_store_cost. */
141 1, /* cond_taken_branch_cost. */
142 1, /* cond_not_taken_branch_cost. */
145 /* Processor costs (relative to an add) */
146 static const
147 struct processor_costs i386_cost = { /* 386 specific costs */
148 COSTS_N_INSNS (1), /* cost of an add instruction */
149 COSTS_N_INSNS (1), /* cost of a lea instruction */
150 COSTS_N_INSNS (3), /* variable shift costs */
151 COSTS_N_INSNS (2), /* constant shift costs */
152 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
153 COSTS_N_INSNS (6), /* HI */
154 COSTS_N_INSNS (6), /* SI */
155 COSTS_N_INSNS (6), /* DI */
156 COSTS_N_INSNS (6)}, /* other */
157 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
158 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
159 COSTS_N_INSNS (23), /* HI */
160 COSTS_N_INSNS (23), /* SI */
161 COSTS_N_INSNS (23), /* DI */
162 COSTS_N_INSNS (23)}, /* other */
163 COSTS_N_INSNS (3), /* cost of movsx */
164 COSTS_N_INSNS (2), /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of storing fp registers
176 in SFmode, DFmode and XFmode */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of l1 cache */
189 0, /* size of l2 cache */
190 0, /* size of prefetch block */
191 0, /* number of parallel prefetches */
192 1, /* Branch cost */
193 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
194 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
195 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
196 COSTS_N_INSNS (22), /* cost of FABS instruction. */
197 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
198 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
199 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
200 DUMMY_STRINGOP_ALGS},
201 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
202 DUMMY_STRINGOP_ALGS},
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 2, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 3, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
216 static const
217 struct processor_costs i486_cost = { /* 486 specific costs */
218 COSTS_N_INSNS (1), /* cost of an add instruction */
219 COSTS_N_INSNS (1), /* cost of a lea instruction */
220 COSTS_N_INSNS (3), /* variable shift costs */
221 COSTS_N_INSNS (2), /* constant shift costs */
222 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
223 COSTS_N_INSNS (12), /* HI */
224 COSTS_N_INSNS (12), /* SI */
225 COSTS_N_INSNS (12), /* DI */
226 COSTS_N_INSNS (12)}, /* other */
227 1, /* cost of multiply per each bit set */
228 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
229 COSTS_N_INSNS (40), /* HI */
230 COSTS_N_INSNS (40), /* SI */
231 COSTS_N_INSNS (40), /* DI */
232 COSTS_N_INSNS (40)}, /* other */
233 COSTS_N_INSNS (3), /* cost of movsx */
234 COSTS_N_INSNS (2), /* cost of movzx */
235 15, /* "large" insn */
236 3, /* MOVE_RATIO */
237 4, /* cost for loading QImode using movzbl */
238 {2, 4, 2}, /* cost of loading integer registers
239 in QImode, HImode and SImode.
240 Relative to reg-reg move (2). */
241 {2, 4, 2}, /* cost of storing integer registers */
242 2, /* cost of reg,reg fld/fst */
243 {8, 8, 8}, /* cost of loading fp registers
244 in SFmode, DFmode and XFmode */
245 {8, 8, 8}, /* cost of storing fp registers
246 in SFmode, DFmode and XFmode */
247 2, /* cost of moving MMX register */
248 {4, 8}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {4, 8}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {4, 8, 16}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {4, 8, 16}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 4, /* size of l1 cache. 486 has 8kB cache
259 shared for code and data, so 4kB is
260 not really precise. */
261 4, /* size of l2 cache */
262 0, /* size of prefetch block */
263 0, /* number of parallel prefetches */
264 1, /* Branch cost */
265 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
266 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
267 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
268 COSTS_N_INSNS (3), /* cost of FABS instruction. */
269 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
270 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
271 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
272 DUMMY_STRINGOP_ALGS},
273 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
274 DUMMY_STRINGOP_ALGS},
275 1, /* scalar_stmt_cost. */
276 1, /* scalar load_cost. */
277 1, /* scalar_store_cost. */
278 1, /* vec_stmt_cost. */
279 1, /* vec_to_scalar_cost. */
280 1, /* scalar_to_vec_cost. */
281 1, /* vec_align_load_cost. */
282 2, /* vec_unalign_load_cost. */
283 1, /* vec_store_cost. */
284 3, /* cond_taken_branch_cost. */
285 1, /* cond_not_taken_branch_cost. */
288 static const
289 struct processor_costs pentium_cost = {
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (4), /* variable shift costs */
293 COSTS_N_INSNS (1), /* constant shift costs */
294 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (11), /* HI */
296 COSTS_N_INSNS (11), /* SI */
297 COSTS_N_INSNS (11), /* DI */
298 COSTS_N_INSNS (11)}, /* other */
299 0, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (25), /* HI */
302 COSTS_N_INSNS (25), /* SI */
303 COSTS_N_INSNS (25), /* DI */
304 COSTS_N_INSNS (25)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 8, /* "large" insn */
308 6, /* MOVE_RATIO */
309 6, /* cost for loading QImode using movzbl */
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of storing fp registers
318 in SFmode, DFmode and XFmode */
319 8, /* cost of moving MMX register */
320 {8, 8}, /* cost of loading MMX registers
321 in SImode and DImode */
322 {8, 8}, /* cost of storing MMX registers
323 in SImode and DImode */
324 2, /* cost of moving SSE register */
325 {4, 8, 16}, /* cost of loading SSE registers
326 in SImode, DImode and TImode */
327 {4, 8, 16}, /* cost of storing SSE registers
328 in SImode, DImode and TImode */
329 3, /* MMX or SSE register to integer */
330 8, /* size of l1 cache. */
331 8, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 2, /* Branch cost */
335 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (1), /* cost of FABS instruction. */
339 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
341 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
342 DUMMY_STRINGOP_ALGS},
343 {{libcall, {{-1, rep_prefix_4_byte}}},
344 DUMMY_STRINGOP_ALGS},
345 1, /* scalar_stmt_cost. */
346 1, /* scalar load_cost. */
347 1, /* scalar_store_cost. */
348 1, /* vec_stmt_cost. */
349 1, /* vec_to_scalar_cost. */
350 1, /* scalar_to_vec_cost. */
351 1, /* vec_align_load_cost. */
352 2, /* vec_unalign_load_cost. */
353 1, /* vec_store_cost. */
354 3, /* cond_taken_branch_cost. */
355 1, /* cond_not_taken_branch_cost. */
358 static const
359 struct processor_costs pentiumpro_cost = {
360 COSTS_N_INSNS (1), /* cost of an add instruction */
361 COSTS_N_INSNS (1), /* cost of a lea instruction */
362 COSTS_N_INSNS (1), /* variable shift costs */
363 COSTS_N_INSNS (1), /* constant shift costs */
364 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
365 COSTS_N_INSNS (4), /* HI */
366 COSTS_N_INSNS (4), /* SI */
367 COSTS_N_INSNS (4), /* DI */
368 COSTS_N_INSNS (4)}, /* other */
369 0, /* cost of multiply per each bit set */
370 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
371 COSTS_N_INSNS (17), /* HI */
372 COSTS_N_INSNS (17), /* SI */
373 COSTS_N_INSNS (17), /* DI */
374 COSTS_N_INSNS (17)}, /* other */
375 COSTS_N_INSNS (1), /* cost of movsx */
376 COSTS_N_INSNS (1), /* cost of movzx */
377 8, /* "large" insn */
378 6, /* MOVE_RATIO */
379 2, /* cost for loading QImode using movzbl */
380 {4, 4, 4}, /* cost of loading integer registers
381 in QImode, HImode and SImode.
382 Relative to reg-reg move (2). */
383 {2, 2, 2}, /* cost of storing integer registers */
384 2, /* cost of reg,reg fld/fst */
385 {2, 2, 6}, /* cost of loading fp registers
386 in SFmode, DFmode and XFmode */
387 {4, 4, 6}, /* cost of storing fp registers
388 in SFmode, DFmode and XFmode */
389 2, /* cost of moving MMX register */
390 {2, 2}, /* cost of loading MMX registers
391 in SImode and DImode */
392 {2, 2}, /* cost of storing MMX registers
393 in SImode and DImode */
394 2, /* cost of moving SSE register */
395 {2, 2, 8}, /* cost of loading SSE registers
396 in SImode, DImode and TImode */
397 {2, 2, 8}, /* cost of storing SSE registers
398 in SImode, DImode and TImode */
399 3, /* MMX or SSE register to integer */
400 8, /* size of l1 cache. */
401 256, /* size of l2 cache */
402 32, /* size of prefetch block */
403 6, /* number of parallel prefetches */
404 2, /* Branch cost */
405 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
406 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
407 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
408 COSTS_N_INSNS (2), /* cost of FABS instruction. */
409 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
410 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
411 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
412 the alignment). For small blocks inline loop is still a noticeable win, for bigger
413 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
414 more expensive startup time in CPU, but after 4K the difference is down in the noise.
416 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
417 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
418 DUMMY_STRINGOP_ALGS},
419 {{rep_prefix_4_byte, {{1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS},
422 1, /* scalar_stmt_cost. */
423 1, /* scalar load_cost. */
424 1, /* scalar_store_cost. */
425 1, /* vec_stmt_cost. */
426 1, /* vec_to_scalar_cost. */
427 1, /* scalar_to_vec_cost. */
428 1, /* vec_align_load_cost. */
429 2, /* vec_unalign_load_cost. */
430 1, /* vec_store_cost. */
431 3, /* cond_taken_branch_cost. */
432 1, /* cond_not_taken_branch_cost. */
435 static const
436 struct processor_costs geode_cost = {
437 COSTS_N_INSNS (1), /* cost of an add instruction */
438 COSTS_N_INSNS (1), /* cost of a lea instruction */
439 COSTS_N_INSNS (2), /* variable shift costs */
440 COSTS_N_INSNS (1), /* constant shift costs */
441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
442 COSTS_N_INSNS (4), /* HI */
443 COSTS_N_INSNS (7), /* SI */
444 COSTS_N_INSNS (7), /* DI */
445 COSTS_N_INSNS (7)}, /* other */
446 0, /* cost of multiply per each bit set */
447 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
448 COSTS_N_INSNS (23), /* HI */
449 COSTS_N_INSNS (39), /* SI */
450 COSTS_N_INSNS (39), /* DI */
451 COSTS_N_INSNS (39)}, /* other */
452 COSTS_N_INSNS (1), /* cost of movsx */
453 COSTS_N_INSNS (1), /* cost of movzx */
454 8, /* "large" insn */
455 4, /* MOVE_RATIO */
456 1, /* cost for loading QImode using movzbl */
457 {1, 1, 1}, /* cost of loading integer registers
458 in QImode, HImode and SImode.
459 Relative to reg-reg move (2). */
460 {1, 1, 1}, /* cost of storing integer registers */
461 1, /* cost of reg,reg fld/fst */
462 {1, 1, 1}, /* cost of loading fp registers
463 in SFmode, DFmode and XFmode */
464 {4, 6, 6}, /* cost of storing fp registers
465 in SFmode, DFmode and XFmode */
467 1, /* cost of moving MMX register */
468 {1, 1}, /* cost of loading MMX registers
469 in SImode and DImode */
470 {1, 1}, /* cost of storing MMX registers
471 in SImode and DImode */
472 1, /* cost of moving SSE register */
473 {1, 1, 1}, /* cost of loading SSE registers
474 in SImode, DImode and TImode */
475 {1, 1, 1}, /* cost of storing SSE registers
476 in SImode, DImode and TImode */
477 1, /* MMX or SSE register to integer */
478 64, /* size of l1 cache. */
479 128, /* size of l2 cache. */
480 32, /* size of prefetch block */
481 1, /* number of parallel prefetches */
482 1, /* Branch cost */
483 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
484 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
485 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
486 COSTS_N_INSNS (1), /* cost of FABS instruction. */
487 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
488 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
489 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
490 DUMMY_STRINGOP_ALGS},
491 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
492 DUMMY_STRINGOP_ALGS},
493 1, /* scalar_stmt_cost. */
494 1, /* scalar load_cost. */
495 1, /* scalar_store_cost. */
496 1, /* vec_stmt_cost. */
497 1, /* vec_to_scalar_cost. */
498 1, /* scalar_to_vec_cost. */
499 1, /* vec_align_load_cost. */
500 2, /* vec_unalign_load_cost. */
501 1, /* vec_store_cost. */
502 3, /* cond_taken_branch_cost. */
503 1, /* cond_not_taken_branch_cost. */
506 static const
507 struct processor_costs k6_cost = {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (2), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (3), /* HI */
514 COSTS_N_INSNS (3), /* SI */
515 COSTS_N_INSNS (3), /* DI */
516 COSTS_N_INSNS (3)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (18), /* HI */
520 COSTS_N_INSNS (18), /* SI */
521 COSTS_N_INSNS (18), /* DI */
522 COSTS_N_INSNS (18)}, /* other */
523 COSTS_N_INSNS (2), /* cost of movsx */
524 COSTS_N_INSNS (2), /* cost of movzx */
525 8, /* "large" insn */
526 4, /* MOVE_RATIO */
527 3, /* cost for loading QImode using movzbl */
528 {4, 5, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 3, 2}, /* cost of storing integer registers */
532 4, /* cost of reg,reg fld/fst */
533 {6, 6, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 4}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 6, /* MMX or SSE register to integer */
548 32, /* size of l1 cache. */
549 32, /* size of l2 cache. Some models
550 have integrated l2 cache, but
551 optimizing for k6 is not important
552 enough to worry about that. */
553 32, /* size of prefetch block */
554 1, /* number of parallel prefetches */
555 1, /* Branch cost */
556 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (2), /* cost of FABS instruction. */
560 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
562 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
563 DUMMY_STRINGOP_ALGS},
564 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
565 DUMMY_STRINGOP_ALGS},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
579 static const
580 struct processor_costs athlon_cost = {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (2), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (5), /* HI */
587 COSTS_N_INSNS (5), /* SI */
588 COSTS_N_INSNS (5), /* DI */
589 COSTS_N_INSNS (5)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (26), /* HI */
593 COSTS_N_INSNS (42), /* SI */
594 COSTS_N_INSNS (74), /* DI */
595 COSTS_N_INSNS (74)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
599 9, /* MOVE_RATIO */
600 4, /* cost for loading QImode using movzbl */
601 {3, 4, 3}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {3, 4, 3}, /* cost of storing integer registers */
605 4, /* cost of reg,reg fld/fst */
606 {4, 4, 12}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {6, 6, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 4}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 4}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 4, 6}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 4, 5}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 5, /* MMX or SSE register to integer */
621 64, /* size of l1 cache. */
622 256, /* size of l2 cache. */
623 64, /* size of prefetch block */
624 6, /* number of parallel prefetches */
625 5, /* Branch cost */
626 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
632 /* For some reason, Athlon deals better with REP prefix (relative to loops)
633 compared to K8. Alignment becomes important after 8 bytes for memcpy and
634 128 bytes for memset. */
635 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
636 DUMMY_STRINGOP_ALGS},
637 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
652 static const
653 struct processor_costs k8_cost = {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (4), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (4), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
672 9, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {3, 3}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 3, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 512, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 /* New AMD processors never drop prefetches; if they cannot be performed
698 immediately, they are queued. We set number of simultaneous prefetches
699 to a large constant to reflect this (it probably is not a good idea not
700 to limit number of prefetches at all, as their execution also takes some
701 time). */
702 100, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 /* K8 has optimized REP instruction for medium sized blocks, but for very small
711 blocks it is better to use loop. For large blocks, libcall can do
712 nontemporary accesses and beat inline considerably. */
713 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
714 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
715 {{libcall, {{8, loop}, {24, unrolled_loop},
716 {2048, rep_prefix_4_byte}, {-1, libcall}}},
717 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 4, /* scalar_stmt_cost. */
719 2, /* scalar load_cost. */
720 2, /* scalar_store_cost. */
721 5, /* vec_stmt_cost. */
722 0, /* vec_to_scalar_cost. */
723 2, /* scalar_to_vec_cost. */
724 2, /* vec_align_load_cost. */
725 3, /* vec_unalign_load_cost. */
726 3, /* vec_store_cost. */
727 6, /* cond_taken_branch_cost. */
728 1, /* cond_not_taken_branch_cost. */
731 struct processor_costs amdfam10_cost = {
732 COSTS_N_INSNS (1), /* cost of an add instruction */
733 COSTS_N_INSNS (2), /* cost of a lea instruction */
734 COSTS_N_INSNS (1), /* variable shift costs */
735 COSTS_N_INSNS (1), /* constant shift costs */
736 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
737 COSTS_N_INSNS (4), /* HI */
738 COSTS_N_INSNS (3), /* SI */
739 COSTS_N_INSNS (4), /* DI */
740 COSTS_N_INSNS (5)}, /* other */
741 0, /* cost of multiply per each bit set */
742 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
743 COSTS_N_INSNS (35), /* HI */
744 COSTS_N_INSNS (51), /* SI */
745 COSTS_N_INSNS (83), /* DI */
746 COSTS_N_INSNS (83)}, /* other */
747 COSTS_N_INSNS (1), /* cost of movsx */
748 COSTS_N_INSNS (1), /* cost of movzx */
749 8, /* "large" insn */
750 9, /* MOVE_RATIO */
751 4, /* cost for loading QImode using movzbl */
752 {3, 4, 3}, /* cost of loading integer registers
753 in QImode, HImode and SImode.
754 Relative to reg-reg move (2). */
755 {3, 4, 3}, /* cost of storing integer registers */
756 4, /* cost of reg,reg fld/fst */
757 {4, 4, 12}, /* cost of loading fp registers
758 in SFmode, DFmode and XFmode */
759 {6, 6, 8}, /* cost of storing fp registers
760 in SFmode, DFmode and XFmode */
761 2, /* cost of moving MMX register */
762 {3, 3}, /* cost of loading MMX registers
763 in SImode and DImode */
764 {4, 4}, /* cost of storing MMX registers
765 in SImode and DImode */
766 2, /* cost of moving SSE register */
767 {4, 4, 3}, /* cost of loading SSE registers
768 in SImode, DImode and TImode */
769 {4, 4, 5}, /* cost of storing SSE registers
770 in SImode, DImode and TImode */
771 3, /* MMX or SSE register to integer */
772 /* On K8
773 MOVD reg64, xmmreg Double FSTORE 4
774 MOVD reg32, xmmreg Double FSTORE 4
775 On AMDFAM10
776 MOVD reg64, xmmreg Double FADD 3
777 1/1 1/1
778 MOVD reg32, xmmreg Double FADD 3
779 1/1 1/1 */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 5, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
797 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
798 very small blocks it is better to use loop. For large blocks, libcall can
799 do nontemporary accesses and beat inline considerably. */
800 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
801 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
802 {{libcall, {{8, loop}, {24, unrolled_loop},
803 {2048, rep_prefix_4_byte}, {-1, libcall}}},
804 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 4, /* scalar_stmt_cost. */
806 2, /* scalar load_cost. */
807 2, /* scalar_store_cost. */
808 6, /* vec_stmt_cost. */
809 0, /* vec_to_scalar_cost. */
810 2, /* scalar_to_vec_cost. */
811 2, /* vec_align_load_cost. */
812 2, /* vec_unalign_load_cost. */
813 2, /* vec_store_cost. */
814 6, /* cond_taken_branch_cost. */
815 1, /* cond_not_taken_branch_cost. */
818 static const
819 struct processor_costs pentium4_cost = {
820 COSTS_N_INSNS (1), /* cost of an add instruction */
821 COSTS_N_INSNS (3), /* cost of a lea instruction */
822 COSTS_N_INSNS (4), /* variable shift costs */
823 COSTS_N_INSNS (4), /* constant shift costs */
824 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
825 COSTS_N_INSNS (15), /* HI */
826 COSTS_N_INSNS (15), /* SI */
827 COSTS_N_INSNS (15), /* DI */
828 COSTS_N_INSNS (15)}, /* other */
829 0, /* cost of multiply per each bit set */
830 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
831 COSTS_N_INSNS (56), /* HI */
832 COSTS_N_INSNS (56), /* SI */
833 COSTS_N_INSNS (56), /* DI */
834 COSTS_N_INSNS (56)}, /* other */
835 COSTS_N_INSNS (1), /* cost of movsx */
836 COSTS_N_INSNS (1), /* cost of movzx */
837 16, /* "large" insn */
838 6, /* MOVE_RATIO */
839 2, /* cost for loading QImode using movzbl */
840 {4, 5, 4}, /* cost of loading integer registers
841 in QImode, HImode and SImode.
842 Relative to reg-reg move (2). */
843 {2, 3, 2}, /* cost of storing integer registers */
844 2, /* cost of reg,reg fld/fst */
845 {2, 2, 6}, /* cost of loading fp registers
846 in SFmode, DFmode and XFmode */
847 {4, 4, 6}, /* cost of storing fp registers
848 in SFmode, DFmode and XFmode */
849 2, /* cost of moving MMX register */
850 {2, 2}, /* cost of loading MMX registers
851 in SImode and DImode */
852 {2, 2}, /* cost of storing MMX registers
853 in SImode and DImode */
854 12, /* cost of moving SSE register */
855 {12, 12, 12}, /* cost of loading SSE registers
856 in SImode, DImode and TImode */
857 {2, 2, 8}, /* cost of storing SSE registers
858 in SImode, DImode and TImode */
859 10, /* MMX or SSE register to integer */
860 8, /* size of l1 cache. */
861 256, /* size of l2 cache. */
862 64, /* size of prefetch block */
863 6, /* number of parallel prefetches */
864 2, /* Branch cost */
865 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
871 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
872 DUMMY_STRINGOP_ALGS},
873 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
874 {-1, libcall}}},
875 DUMMY_STRINGOP_ALGS},
876 1, /* scalar_stmt_cost. */
877 1, /* scalar load_cost. */
878 1, /* scalar_store_cost. */
879 1, /* vec_stmt_cost. */
880 1, /* vec_to_scalar_cost. */
881 1, /* scalar_to_vec_cost. */
882 1, /* vec_align_load_cost. */
883 2, /* vec_unalign_load_cost. */
884 1, /* vec_store_cost. */
885 3, /* cond_taken_branch_cost. */
886 1, /* cond_not_taken_branch_cost. */
889 static const
890 struct processor_costs nocona_cost = {
891 COSTS_N_INSNS (1), /* cost of an add instruction */
892 COSTS_N_INSNS (1), /* cost of a lea instruction */
893 COSTS_N_INSNS (1), /* variable shift costs */
894 COSTS_N_INSNS (1), /* constant shift costs */
895 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
896 COSTS_N_INSNS (10), /* HI */
897 COSTS_N_INSNS (10), /* SI */
898 COSTS_N_INSNS (10), /* DI */
899 COSTS_N_INSNS (10)}, /* other */
900 0, /* cost of multiply per each bit set */
901 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
902 COSTS_N_INSNS (66), /* HI */
903 COSTS_N_INSNS (66), /* SI */
904 COSTS_N_INSNS (66), /* DI */
905 COSTS_N_INSNS (66)}, /* other */
906 COSTS_N_INSNS (1), /* cost of movsx */
907 COSTS_N_INSNS (1), /* cost of movzx */
908 16, /* "large" insn */
909 17, /* MOVE_RATIO */
910 4, /* cost for loading QImode using movzbl */
911 {4, 4, 4}, /* cost of loading integer registers
912 in QImode, HImode and SImode.
913 Relative to reg-reg move (2). */
914 {4, 4, 4}, /* cost of storing integer registers */
915 3, /* cost of reg,reg fld/fst */
916 {12, 12, 12}, /* cost of loading fp registers
917 in SFmode, DFmode and XFmode */
918 {4, 4, 4}, /* cost of storing fp registers
919 in SFmode, DFmode and XFmode */
920 6, /* cost of moving MMX register */
921 {12, 12}, /* cost of loading MMX registers
922 in SImode and DImode */
923 {12, 12}, /* cost of storing MMX registers
924 in SImode and DImode */
925 6, /* cost of moving SSE register */
926 {12, 12, 12}, /* cost of loading SSE registers
927 in SImode, DImode and TImode */
928 {12, 12, 12}, /* cost of storing SSE registers
929 in SImode, DImode and TImode */
930 8, /* MMX or SSE register to integer */
931 8, /* size of l1 cache. */
932 1024, /* size of l2 cache. */
933 128, /* size of prefetch block */
934 8, /* number of parallel prefetches */
935 1, /* Branch cost */
936 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
937 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
938 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
939 COSTS_N_INSNS (3), /* cost of FABS instruction. */
940 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
941 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
942 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
943 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
944 {100000, unrolled_loop}, {-1, libcall}}}},
945 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
946 {-1, libcall}}},
947 {libcall, {{24, loop}, {64, unrolled_loop},
948 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 2, /* vec_unalign_load_cost. */
957 1, /* vec_store_cost. */
958 3, /* cond_taken_branch_cost. */
959 1, /* cond_not_taken_branch_cost. */
962 static const
963 struct processor_costs core2_cost = {
964 COSTS_N_INSNS (1), /* cost of an add instruction */
965 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
966 COSTS_N_INSNS (1), /* variable shift costs */
967 COSTS_N_INSNS (1), /* constant shift costs */
968 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
969 COSTS_N_INSNS (3), /* HI */
970 COSTS_N_INSNS (3), /* SI */
971 COSTS_N_INSNS (3), /* DI */
972 COSTS_N_INSNS (3)}, /* other */
973 0, /* cost of multiply per each bit set */
974 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
975 COSTS_N_INSNS (22), /* HI */
976 COSTS_N_INSNS (22), /* SI */
977 COSTS_N_INSNS (22), /* DI */
978 COSTS_N_INSNS (22)}, /* other */
979 COSTS_N_INSNS (1), /* cost of movsx */
980 COSTS_N_INSNS (1), /* cost of movzx */
981 8, /* "large" insn */
982 16, /* MOVE_RATIO */
983 2, /* cost for loading QImode using movzbl */
984 {6, 6, 6}, /* cost of loading integer registers
985 in QImode, HImode and SImode.
986 Relative to reg-reg move (2). */
987 {4, 4, 4}, /* cost of storing integer registers */
988 2, /* cost of reg,reg fld/fst */
989 {6, 6, 6}, /* cost of loading fp registers
990 in SFmode, DFmode and XFmode */
991 {4, 4, 4}, /* cost of loading integer registers */
992 2, /* cost of moving MMX register */
993 {6, 6}, /* cost of loading MMX registers
994 in SImode and DImode */
995 {4, 4}, /* cost of storing MMX registers
996 in SImode and DImode */
997 2, /* cost of moving SSE register */
998 {6, 6, 6}, /* cost of loading SSE registers
999 in SImode, DImode and TImode */
1000 {4, 4, 4}, /* cost of storing SSE registers
1001 in SImode, DImode and TImode */
1002 2, /* MMX or SSE register to integer */
1003 32, /* size of l1 cache. */
1004 2048, /* size of l2 cache. */
1005 128, /* size of prefetch block */
1006 8, /* number of parallel prefetches */
1007 3, /* Branch cost */
1008 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1009 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1010 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1011 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1012 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1013 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1014 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1015 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1016 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1017 {{libcall, {{8, loop}, {15, unrolled_loop},
1018 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1019 {libcall, {{24, loop}, {32, unrolled_loop},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 1, /* scalar_stmt_cost. */
1022 1, /* scalar load_cost. */
1023 1, /* scalar_store_cost. */
1024 1, /* vec_stmt_cost. */
1025 1, /* vec_to_scalar_cost. */
1026 1, /* scalar_to_vec_cost. */
1027 1, /* vec_align_load_cost. */
1028 2, /* vec_unalign_load_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1034 /* Generic64 should produce code tuned for Nocona and K8. */
1035 static const
1036 struct processor_costs generic64_cost = {
1037 COSTS_N_INSNS (1), /* cost of an add instruction */
1038 /* On all chips taken into consideration lea is 2 cycles and more. With
1039 this cost however our current implementation of synth_mult results in
1040 use of unnecessary temporary registers causing regression on several
1041 SPECfp benchmarks. */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 4, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 512, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1086 is increased to perhaps more appropriate value of 5. */
1087 3, /* Branch cost */
1088 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1089 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1090 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1091 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1092 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1093 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1094 {DUMMY_STRINGOP_ALGS,
1095 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1096 {DUMMY_STRINGOP_ALGS,
1097 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1112 static const
1113 struct processor_costs generic32_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1116 COSTS_N_INSNS (1), /* variable shift costs */
1117 COSTS_N_INSNS (1), /* constant shift costs */
1118 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1119 COSTS_N_INSNS (4), /* HI */
1120 COSTS_N_INSNS (3), /* SI */
1121 COSTS_N_INSNS (4), /* DI */
1122 COSTS_N_INSNS (2)}, /* other */
1123 0, /* cost of multiply per each bit set */
1124 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1125 COSTS_N_INSNS (26), /* HI */
1126 COSTS_N_INSNS (42), /* SI */
1127 COSTS_N_INSNS (74), /* DI */
1128 COSTS_N_INSNS (74)}, /* other */
1129 COSTS_N_INSNS (1), /* cost of movsx */
1130 COSTS_N_INSNS (1), /* cost of movzx */
1131 8, /* "large" insn */
1132 17, /* MOVE_RATIO */
1133 4, /* cost for loading QImode using movzbl */
1134 {4, 4, 4}, /* cost of loading integer registers
1135 in QImode, HImode and SImode.
1136 Relative to reg-reg move (2). */
1137 {4, 4, 4}, /* cost of storing integer registers */
1138 4, /* cost of reg,reg fld/fst */
1139 {12, 12, 12}, /* cost of loading fp registers
1140 in SFmode, DFmode and XFmode */
1141 {6, 6, 8}, /* cost of storing fp registers
1142 in SFmode, DFmode and XFmode */
1143 2, /* cost of moving MMX register */
1144 {8, 8}, /* cost of loading MMX registers
1145 in SImode and DImode */
1146 {8, 8}, /* cost of storing MMX registers
1147 in SImode and DImode */
1148 2, /* cost of moving SSE register */
1149 {8, 8, 8}, /* cost of loading SSE registers
1150 in SImode, DImode and TImode */
1151 {8, 8, 8}, /* cost of storing SSE registers
1152 in SImode, DImode and TImode */
1153 5, /* MMX or SSE register to integer */
1154 32, /* size of l1 cache. */
1155 256, /* size of l2 cache. */
1156 64, /* size of prefetch block */
1157 6, /* number of parallel prefetches */
1158 3, /* Branch cost */
1159 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1165 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1166 DUMMY_STRINGOP_ALGS},
1167 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1168 DUMMY_STRINGOP_ALGS},
1169 1, /* scalar_stmt_cost. */
1170 1, /* scalar load_cost. */
1171 1, /* scalar_store_cost. */
1172 1, /* vec_stmt_cost. */
1173 1, /* vec_to_scalar_cost. */
1174 1, /* scalar_to_vec_cost. */
1175 1, /* vec_align_load_cost. */
1176 2, /* vec_unalign_load_cost. */
1177 1, /* vec_store_cost. */
1178 3, /* cond_taken_branch_cost. */
1179 1, /* cond_not_taken_branch_cost. */
1182 const struct processor_costs *ix86_cost = &pentium_cost;
1184 /* Processor feature/optimization bitmasks. */
1185 #define m_386 (1<<PROCESSOR_I386)
1186 #define m_486 (1<<PROCESSOR_I486)
1187 #define m_PENT (1<<PROCESSOR_PENTIUM)
1188 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1189 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1190 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1191 #define m_CORE2 (1<<PROCESSOR_CORE2)
1193 #define m_GEODE (1<<PROCESSOR_GEODE)
1194 #define m_K6 (1<<PROCESSOR_K6)
1195 #define m_K6_GEODE (m_K6 | m_GEODE)
1196 #define m_K8 (1<<PROCESSOR_K8)
1197 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1198 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1199 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1200 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1202 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1203 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1205 /* Generic instruction choice should be common subset of supported CPUs
1206 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1207 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1209 /* Feature tests against the various tunings. */
1210 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1211 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1212 negatively, so enabling for Generic64 seems like good code size
1213 tradeoff. We can't enable it for 32bit generic because it does not
1214 work well with PPro base chips. */
1215 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1217 /* X86_TUNE_PUSH_MEMORY */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1219 | m_NOCONA | m_CORE2 | m_GENERIC,
1221 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1222 m_486 | m_PENT,
1224 /* X86_TUNE_USE_BIT_TEST */
1225 m_386,
1227 /* X86_TUNE_UNROLL_STRLEN */
1228 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1230 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1231 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1233 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1234 on simulation result. But after P4 was made, no performance benefit
1235 was observed with branch hints. It also increases the code size.
1236 As a result, icc never generates branch hints. */
1239 /* X86_TUNE_DOUBLE_WITH_ADD */
1240 ~m_386,
1242 /* X86_TUNE_USE_SAHF */
1243 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1244 | m_NOCONA | m_CORE2 | m_GENERIC,
1246 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1247 partial dependencies. */
1248 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1249 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1251 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1252 register stalls on Generic32 compilation setting as well. However
1253 in current implementation the partial register stalls are not eliminated
1254 very well - they can be introduced via subregs synthesized by combine
1255 and can happen in caller/callee saving sequences. Because this option
1256 pays back little on PPro based chips and is in conflict with partial reg
1257 dependencies used by Athlon/P4 based chips, it is better to leave it off
1258 for generic32 for now. */
1259 m_PPRO,
1261 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1262 m_CORE2 | m_GENERIC,
1264 /* X86_TUNE_USE_HIMODE_FIOP */
1265 m_386 | m_486 | m_K6_GEODE,
1267 /* X86_TUNE_USE_SIMODE_FIOP */
1268 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1270 /* X86_TUNE_USE_MOV0 */
1271 m_K6,
1273 /* X86_TUNE_USE_CLTD */
1274 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1276 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1277 m_PENT4,
1279 /* X86_TUNE_SPLIT_LONG_MOVES */
1280 m_PPRO,
1282 /* X86_TUNE_READ_MODIFY_WRITE */
1283 ~m_PENT,
1285 /* X86_TUNE_READ_MODIFY */
1286 ~(m_PENT | m_PPRO),
1288 /* X86_TUNE_PROMOTE_QIMODE */
1289 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1290 | m_GENERIC /* | m_PENT4 ? */,
1292 /* X86_TUNE_FAST_PREFIX */
1293 ~(m_PENT | m_486 | m_386),
1295 /* X86_TUNE_SINGLE_STRINGOP */
1296 m_386 | m_PENT4 | m_NOCONA,
1298 /* X86_TUNE_QIMODE_MATH */
1301 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1302 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1303 might be considered for Generic32 if our scheme for avoiding partial
1304 stalls was more effective. */
1305 ~m_PPRO,
1307 /* X86_TUNE_PROMOTE_QI_REGS */
1310 /* X86_TUNE_PROMOTE_HI_REGS */
1311 m_PPRO,
1313 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1314 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1316 /* X86_TUNE_ADD_ESP_8 */
1317 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1318 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1320 /* X86_TUNE_SUB_ESP_4 */
1321 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_SUB_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1325 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1328 for DFmode copies */
1329 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1330 | m_GENERIC | m_GEODE),
1332 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1333 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1335 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1336 conflict here in between PPro/Pentium4 based chips that thread 128bit
1337 SSE registers as single units versus K8 based chips that divide SSE
1338 registers to two 64bit halves. This knob promotes all store destinations
1339 to be 128bit to allow register renaming on 128bit SSE units, but usually
1340 results in one extra microop on 64bit SSE units. Experimental results
1341 shows that disabling this option on P4 brings over 20% SPECfp regression,
1342 while enabling it on K8 brings roughly 2.4% regression that can be partly
1343 masked by careful scheduling of moves. */
1344 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1346 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1347 m_AMDFAM10,
1349 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1350 are resolved on SSE register parts instead of whole registers, so we may
1351 maintain just lower part of scalar values in proper format leaving the
1352 upper part undefined. */
1353 m_ATHLON_K8,
1355 /* X86_TUNE_SSE_TYPELESS_STORES */
1356 m_AMD_MULTIPLE,
1358 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1359 m_PPRO | m_PENT4 | m_NOCONA,
1361 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1362 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1364 /* X86_TUNE_PROLOGUE_USING_MOVE */
1365 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1367 /* X86_TUNE_EPILOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_SHIFT1 */
1371 ~m_486,
1373 /* X86_TUNE_USE_FFREEP */
1374 m_AMD_MULTIPLE,
1376 /* X86_TUNE_INTER_UNIT_MOVES */
1377 ~(m_AMD_MULTIPLE | m_GENERIC),
1379 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1380 ~(m_AMDFAM10),
1382 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1383 than 4 branch instructions in the 16 byte window. */
1384 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1386 /* X86_TUNE_SCHEDULE */
1387 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1389 /* X86_TUNE_USE_BT */
1390 m_AMD_MULTIPLE,
1392 /* X86_TUNE_USE_INCDEC */
1393 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1395 /* X86_TUNE_PAD_RETURNS */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_EXT_80387_CONSTANTS */
1399 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SHORTEN_X87_SSE */
1402 ~m_K8,
1404 /* X86_TUNE_AVOID_VECTOR_DECODE */
1405 m_K8 | m_GENERIC64,
1407 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1408 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1409 ~(m_386 | m_486),
1411 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1412 vector path on AMD machines. */
1413 m_K8 | m_GENERIC64 | m_AMDFAM10,
1415 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1416 machines. */
1417 m_K8 | m_GENERIC64 | m_AMDFAM10,
1419 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1420 than a MOV. */
1421 m_PENT,
1423 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1424 but one byte longer. */
1425 m_PENT,
1427 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1428 operand that cannot be represented using a modRM byte. The XOR
1429 replacement is long decoded, so this split helps here as well. */
1430 m_K6,
1432 /* X86_TUNE_USE_VECTOR_CONVERTS: Preffer vector packed SSE conversion
1433 from integer to FP. */
1434 m_AMDFAM10,
1437 /* Feature tests against the various architecture variations. */
1438 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1439 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1440 ~(m_386 | m_486 | m_PENT | m_K6),
1442 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1443 ~m_386,
1445 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1446 ~(m_386 | m_486),
1448 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1449 ~m_386,
1451 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1452 ~m_386,
1455 static const unsigned int x86_accumulate_outgoing_args
1456 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1458 static const unsigned int x86_arch_always_fancy_math_387
1459 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1460 | m_NOCONA | m_CORE2 | m_GENERIC;
1462 static enum stringop_alg stringop_alg = no_stringop;
1464 /* In case the average insn count for single function invocation is
1465 lower than this constant, emit fast (but longer) prologue and
1466 epilogue code. */
1467 #define FAST_PROLOGUE_INSN_COUNT 20
1469 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1470 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1471 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1472 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1474 /* Array of the smallest class containing reg number REGNO, indexed by
1475 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1477 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1479 /* ax, dx, cx, bx */
1480 AREG, DREG, CREG, BREG,
1481 /* si, di, bp, sp */
1482 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1483 /* FP registers */
1484 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1485 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1486 /* arg pointer */
1487 NON_Q_REGS,
1488 /* flags, fpsr, fpcr, frame */
1489 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1490 /* SSE registers */
1491 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1492 SSE_REGS, SSE_REGS,
1493 /* MMX registers */
1494 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1495 MMX_REGS, MMX_REGS,
1496 /* REX registers */
1497 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1498 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1499 /* SSE REX registers */
1500 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1501 SSE_REGS, SSE_REGS,
1504 /* The "default" register map used in 32bit mode. */
1506 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1508 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1509 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1510 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1511 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1512 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1513 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1514 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1517 static int const x86_64_int_parameter_registers[6] =
1519 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1520 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1523 static int const x86_64_ms_abi_int_parameter_registers[4] =
1525 2 /*RCX*/, 1 /*RDX*/,
1526 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1529 static int const x86_64_int_return_registers[4] =
1531 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1534 /* The "default" register map used in 64bit mode. */
1535 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1537 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1538 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1539 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1540 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1541 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1542 8,9,10,11,12,13,14,15, /* extended integer registers */
1543 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1546 /* Define the register numbers to be used in Dwarf debugging information.
1547 The SVR4 reference port C compiler uses the following register numbers
1548 in its Dwarf output code:
1549 0 for %eax (gcc regno = 0)
1550 1 for %ecx (gcc regno = 2)
1551 2 for %edx (gcc regno = 1)
1552 3 for %ebx (gcc regno = 3)
1553 4 for %esp (gcc regno = 7)
1554 5 for %ebp (gcc regno = 6)
1555 6 for %esi (gcc regno = 4)
1556 7 for %edi (gcc regno = 5)
1557 The following three DWARF register numbers are never generated by
1558 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1559 believes these numbers have these meanings.
1560 8 for %eip (no gcc equivalent)
1561 9 for %eflags (gcc regno = 17)
1562 10 for %trapno (no gcc equivalent)
1563 It is not at all clear how we should number the FP stack registers
1564 for the x86 architecture. If the version of SDB on x86/svr4 were
1565 a bit less brain dead with respect to floating-point then we would
1566 have a precedent to follow with respect to DWARF register numbers
1567 for x86 FP registers, but the SDB on x86/svr4 is so completely
1568 broken with respect to FP registers that it is hardly worth thinking
1569 of it as something to strive for compatibility with.
1570 The version of x86/svr4 SDB I have at the moment does (partially)
1571 seem to believe that DWARF register number 11 is associated with
1572 the x86 register %st(0), but that's about all. Higher DWARF
1573 register numbers don't seem to be associated with anything in
1574 particular, and even for DWARF regno 11, SDB only seems to under-
1575 stand that it should say that a variable lives in %st(0) (when
1576 asked via an `=' command) if we said it was in DWARF regno 11,
1577 but SDB still prints garbage when asked for the value of the
1578 variable in question (via a `/' command).
1579 (Also note that the labels SDB prints for various FP stack regs
1580 when doing an `x' command are all wrong.)
1581 Note that these problems generally don't affect the native SVR4
1582 C compiler because it doesn't allow the use of -O with -g and
1583 because when it is *not* optimizing, it allocates a memory
1584 location for each floating-point variable, and the memory
1585 location is what gets described in the DWARF AT_location
1586 attribute for the variable in question.
1587 Regardless of the severe mental illness of the x86/svr4 SDB, we
1588 do something sensible here and we use the following DWARF
1589 register numbers. Note that these are all stack-top-relative
1590 numbers.
1591 11 for %st(0) (gcc regno = 8)
1592 12 for %st(1) (gcc regno = 9)
1593 13 for %st(2) (gcc regno = 10)
1594 14 for %st(3) (gcc regno = 11)
1595 15 for %st(4) (gcc regno = 12)
1596 16 for %st(5) (gcc regno = 13)
1597 17 for %st(6) (gcc regno = 14)
1598 18 for %st(7) (gcc regno = 15)
1600 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1602 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1603 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1604 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1605 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1606 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1607 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1608 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1611 /* Test and compare insns in i386.md store the information needed to
1612 generate branch and scc insns here. */
1614 rtx ix86_compare_op0 = NULL_RTX;
1615 rtx ix86_compare_op1 = NULL_RTX;
1616 rtx ix86_compare_emitted = NULL_RTX;
1618 /* Size of the register save area. */
1619 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1621 /* Define the structure for the machine field in struct function. */
1623 struct stack_local_entry GTY(())
1625 unsigned short mode;
1626 unsigned short n;
1627 rtx rtl;
1628 struct stack_local_entry *next;
1631 /* Structure describing stack frame layout.
1632 Stack grows downward:
1634 [arguments]
1635 <- ARG_POINTER
1636 saved pc
1638 saved frame pointer if frame_pointer_needed
1639 <- HARD_FRAME_POINTER
1640 [saved regs]
1642 [padding1] \
1644 [va_arg registers] (
1645 > to_allocate <- FRAME_POINTER
1646 [frame] (
1648 [padding2] /
1650 struct ix86_frame
1652 int nregs;
1653 int padding1;
1654 int va_arg_size;
1655 HOST_WIDE_INT frame;
1656 int padding2;
1657 int outgoing_arguments_size;
1658 int red_zone_size;
1660 HOST_WIDE_INT to_allocate;
1661 /* The offsets relative to ARG_POINTER. */
1662 HOST_WIDE_INT frame_pointer_offset;
1663 HOST_WIDE_INT hard_frame_pointer_offset;
1664 HOST_WIDE_INT stack_pointer_offset;
1666 /* When save_regs_using_mov is set, emit prologue using
1667 move instead of push instructions. */
1668 bool save_regs_using_mov;
1671 /* Code model option. */
1672 enum cmodel ix86_cmodel;
1673 /* Asm dialect. */
1674 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1675 /* TLS dialects. */
1676 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1678 /* Which unit we are generating floating point math for. */
1679 enum fpmath_unit ix86_fpmath;
1681 /* Which cpu are we scheduling for. */
1682 enum processor_type ix86_tune;
1684 /* Which instruction set architecture to use. */
1685 enum processor_type ix86_arch;
1687 /* true if sse prefetch instruction is not NOOP. */
1688 int x86_prefetch_sse;
1690 /* ix86_regparm_string as a number */
1691 static int ix86_regparm;
1693 /* -mstackrealign option */
1694 extern int ix86_force_align_arg_pointer;
1695 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1697 /* Preferred alignment for stack boundary in bits. */
1698 unsigned int ix86_preferred_stack_boundary;
1700 /* Values 1-5: see jump.c */
1701 int ix86_branch_cost;
1703 /* Variables which are this size or smaller are put in the data/bss
1704 or ldata/lbss sections. */
1706 int ix86_section_threshold = 65536;
1708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1709 char internal_label_prefix[16];
1710 int internal_label_prefix_len;
1712 /* Fence to use after loop using movnt. */
1713 tree x86_mfence;
1715 /* Register class used for passing given 64bit part of the argument.
1716 These represent classes as documented by the PS ABI, with the exception
1717 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1718 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1720 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1721 whenever possible (upper half does contain padding). */
1722 enum x86_64_reg_class
1724 X86_64_NO_CLASS,
1725 X86_64_INTEGER_CLASS,
1726 X86_64_INTEGERSI_CLASS,
1727 X86_64_SSE_CLASS,
1728 X86_64_SSESF_CLASS,
1729 X86_64_SSEDF_CLASS,
1730 X86_64_SSEUP_CLASS,
1731 X86_64_X87_CLASS,
1732 X86_64_X87UP_CLASS,
1733 X86_64_COMPLEX_X87_CLASS,
1734 X86_64_MEMORY_CLASS
1736 static const char * const x86_64_reg_class_name[] =
1738 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1739 "sseup", "x87", "x87up", "cplx87", "no"
1742 #define MAX_CLASSES 4
1744 /* Table of constants used by fldpi, fldln2, etc.... */
1745 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1746 static bool ext_80387_constants_init = 0;
1749 static struct machine_function * ix86_init_machine_status (void);
1750 static rtx ix86_function_value (const_tree, const_tree, bool);
1751 static int ix86_function_regparm (const_tree, const_tree);
1752 static void ix86_compute_frame_layout (struct ix86_frame *);
1753 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1754 rtx, rtx, int);
1757 /* The svr4 ABI for the i386 says that records and unions are returned
1758 in memory. */
1759 #ifndef DEFAULT_PCC_STRUCT_RETURN
1760 #define DEFAULT_PCC_STRUCT_RETURN 1
1761 #endif
1763 /* Bit flags that specify the ISA we are compiling for. */
1764 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1766 /* A mask of ix86_isa_flags that includes bit X if X
1767 was set or cleared on the command line. */
1768 static int ix86_isa_flags_explicit;
1770 /* Define a set of ISAs which aren't available for a given ISA. MMX
1771 and SSE ISAs are handled separately. */
1773 #define OPTION_MASK_ISA_MMX_UNSET \
1774 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1775 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1777 #define OPTION_MASK_ISA_SSE_UNSET \
1778 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1779 #define OPTION_MASK_ISA_SSE2_UNSET \
1780 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1781 #define OPTION_MASK_ISA_SSE3_UNSET \
1782 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1783 #define OPTION_MASK_ISA_SSSE3_UNSET \
1784 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1785 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1786 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1787 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1789 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1790 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1791 #define OPTION_MASK_ISA_SSE4 \
1792 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1793 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1795 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1797 #define OPTION_MASK_ISA_SSE5_UNSET \
1798 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1800 /* Vectorization library interface and handlers. */
1801 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1802 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1804 /* Implement TARGET_HANDLE_OPTION. */
1806 static bool
1807 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1809 switch (code)
1811 case OPT_mmmx:
1812 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1813 if (!value)
1815 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1816 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1818 return true;
1820 case OPT_m3dnow:
1821 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1822 if (!value)
1824 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1825 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1827 return true;
1829 case OPT_m3dnowa:
1830 return false;
1832 case OPT_msse:
1833 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1834 if (!value)
1836 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1837 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1839 return true;
1841 case OPT_msse2:
1842 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1843 if (!value)
1845 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1846 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1848 return true;
1850 case OPT_msse3:
1851 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1852 if (!value)
1854 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1855 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1857 return true;
1859 case OPT_mssse3:
1860 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1861 if (!value)
1863 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1864 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1866 return true;
1868 case OPT_msse4_1:
1869 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1870 if (!value)
1872 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1873 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1875 return true;
1877 case OPT_msse4_2:
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1879 if (!value)
1881 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1882 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1884 return true;
1886 case OPT_msse4:
1887 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1888 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1889 return true;
1891 case OPT_mno_sse4:
1892 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1893 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1894 return true;
1896 case OPT_msse4a:
1897 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1898 if (!value)
1900 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1901 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1903 return true;
1905 case OPT_msse5:
1906 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
1907 if (!value)
1909 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1910 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1912 return true;
1914 default:
1915 return true;
1919 /* Sometimes certain combinations of command options do not make
1920 sense on a particular target machine. You can define a macro
1921 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1922 defined, is executed once just after all the command options have
1923 been parsed.
1925 Don't use this macro to turn on various extra optimizations for
1926 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1928 void
1929 override_options (void)
1931 int i;
1932 int ix86_tune_defaulted = 0;
1933 int ix86_arch_specified = 0;
1934 unsigned int ix86_arch_mask, ix86_tune_mask;
1936 /* Comes from final.c -- no real reason to change it. */
1937 #define MAX_CODE_ALIGN 16
1939 static struct ptt
1941 const struct processor_costs *cost; /* Processor costs */
1942 const int align_loop; /* Default alignments. */
1943 const int align_loop_max_skip;
1944 const int align_jump;
1945 const int align_jump_max_skip;
1946 const int align_func;
1948 const processor_target_table[PROCESSOR_max] =
1950 {&i386_cost, 4, 3, 4, 3, 4},
1951 {&i486_cost, 16, 15, 16, 15, 16},
1952 {&pentium_cost, 16, 7, 16, 7, 16},
1953 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1954 {&geode_cost, 0, 0, 0, 0, 0},
1955 {&k6_cost, 32, 7, 32, 7, 32},
1956 {&athlon_cost, 16, 7, 16, 7, 16},
1957 {&pentium4_cost, 0, 0, 0, 0, 0},
1958 {&k8_cost, 16, 7, 16, 7, 16},
1959 {&nocona_cost, 0, 0, 0, 0, 0},
1960 {&core2_cost, 16, 10, 16, 10, 16},
1961 {&generic32_cost, 16, 7, 16, 7, 16},
1962 {&generic64_cost, 16, 10, 16, 10, 16},
1963 {&amdfam10_cost, 32, 24, 32, 7, 32}
1966 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1967 enum pta_flags
1969 PTA_SSE = 1 << 0,
1970 PTA_SSE2 = 1 << 1,
1971 PTA_SSE3 = 1 << 2,
1972 PTA_MMX = 1 << 3,
1973 PTA_PREFETCH_SSE = 1 << 4,
1974 PTA_3DNOW = 1 << 5,
1975 PTA_3DNOW_A = 1 << 6,
1976 PTA_64BIT = 1 << 7,
1977 PTA_SSSE3 = 1 << 8,
1978 PTA_CX16 = 1 << 9,
1979 PTA_POPCNT = 1 << 10,
1980 PTA_ABM = 1 << 11,
1981 PTA_SSE4A = 1 << 12,
1982 PTA_NO_SAHF = 1 << 13,
1983 PTA_SSE4_1 = 1 << 14,
1984 PTA_SSE4_2 = 1 << 15,
1985 PTA_SSE5 = 1 << 16
1988 static struct pta
1990 const char *const name; /* processor name or nickname. */
1991 const enum processor_type processor;
1992 const unsigned /*enum pta_flags*/ flags;
1994 const processor_alias_table[] =
1996 {"i386", PROCESSOR_I386, 0},
1997 {"i486", PROCESSOR_I486, 0},
1998 {"i586", PROCESSOR_PENTIUM, 0},
1999 {"pentium", PROCESSOR_PENTIUM, 0},
2000 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2001 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2002 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2003 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2004 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2005 {"i686", PROCESSOR_PENTIUMPRO, 0},
2006 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2007 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2008 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2009 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2010 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2011 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2012 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2013 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2014 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2015 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2016 | PTA_CX16 | PTA_NO_SAHF)},
2017 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2018 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2019 | PTA_SSSE3
2020 | PTA_CX16)},
2021 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2022 |PTA_PREFETCH_SSE)},
2023 {"k6", PROCESSOR_K6, PTA_MMX},
2024 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2025 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2026 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2027 | PTA_PREFETCH_SSE)},
2028 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2029 | PTA_PREFETCH_SSE)},
2030 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2031 | PTA_SSE)},
2032 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2033 | PTA_SSE)},
2034 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2035 | PTA_SSE)},
2036 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2037 | PTA_MMX | PTA_SSE | PTA_SSE2
2038 | PTA_NO_SAHF)},
2039 {"k8", PROCESSOR_K8, (PTA_64BIT
2040 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2041 | PTA_SSE | PTA_SSE2
2042 | PTA_NO_SAHF)},
2043 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2044 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2045 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2046 | PTA_NO_SAHF)},
2047 {"opteron", PROCESSOR_K8, (PTA_64BIT
2048 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2049 | PTA_SSE | PTA_SSE2
2050 | PTA_NO_SAHF)},
2051 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2052 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2053 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2054 | PTA_NO_SAHF)},
2055 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2056 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2057 | PTA_SSE | PTA_SSE2
2058 | PTA_NO_SAHF)},
2059 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2060 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2061 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2062 | PTA_NO_SAHF)},
2063 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2064 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2065 | PTA_SSE | PTA_SSE2
2066 | PTA_NO_SAHF)},
2067 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2068 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2069 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2070 | PTA_SSE4A
2071 | PTA_CX16 | PTA_ABM)},
2072 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2073 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2074 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2075 | PTA_SSE4A
2076 | PTA_CX16 | PTA_ABM)},
2077 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2078 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2081 int const pta_size = ARRAY_SIZE (processor_alias_table);
2083 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2084 SUBTARGET_OVERRIDE_OPTIONS;
2085 #endif
2087 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2088 SUBSUBTARGET_OVERRIDE_OPTIONS;
2089 #endif
2091 /* -fPIC is the default for x86_64. */
2092 if (TARGET_MACHO && TARGET_64BIT)
2093 flag_pic = 2;
2095 /* Set the default values for switches whose default depends on TARGET_64BIT
2096 in case they weren't overwritten by command line options. */
2097 if (TARGET_64BIT)
2099 /* Mach-O doesn't support omitting the frame pointer for now. */
2100 if (flag_omit_frame_pointer == 2)
2101 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2102 if (flag_asynchronous_unwind_tables == 2)
2103 flag_asynchronous_unwind_tables = 1;
2104 if (flag_pcc_struct_return == 2)
2105 flag_pcc_struct_return = 0;
2107 else
2109 if (flag_omit_frame_pointer == 2)
2110 flag_omit_frame_pointer = 0;
2111 if (flag_asynchronous_unwind_tables == 2)
2112 flag_asynchronous_unwind_tables = 0;
2113 if (flag_pcc_struct_return == 2)
2114 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2117 /* Need to check -mtune=generic first. */
2118 if (ix86_tune_string)
2120 if (!strcmp (ix86_tune_string, "generic")
2121 || !strcmp (ix86_tune_string, "i686")
2122 /* As special support for cross compilers we read -mtune=native
2123 as -mtune=generic. With native compilers we won't see the
2124 -mtune=native, as it was changed by the driver. */
2125 || !strcmp (ix86_tune_string, "native"))
2127 if (TARGET_64BIT)
2128 ix86_tune_string = "generic64";
2129 else
2130 ix86_tune_string = "generic32";
2132 else if (!strncmp (ix86_tune_string, "generic", 7))
2133 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2135 else
2137 if (ix86_arch_string)
2138 ix86_tune_string = ix86_arch_string;
2139 if (!ix86_tune_string)
2141 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
2142 ix86_tune_defaulted = 1;
2145 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2146 need to use a sensible tune option. */
2147 if (!strcmp (ix86_tune_string, "generic")
2148 || !strcmp (ix86_tune_string, "x86-64")
2149 || !strcmp (ix86_tune_string, "i686"))
2151 if (TARGET_64BIT)
2152 ix86_tune_string = "generic64";
2153 else
2154 ix86_tune_string = "generic32";
2157 if (ix86_stringop_string)
2159 if (!strcmp (ix86_stringop_string, "rep_byte"))
2160 stringop_alg = rep_prefix_1_byte;
2161 else if (!strcmp (ix86_stringop_string, "libcall"))
2162 stringop_alg = libcall;
2163 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2164 stringop_alg = rep_prefix_4_byte;
2165 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2166 stringop_alg = rep_prefix_8_byte;
2167 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2168 stringop_alg = loop_1_byte;
2169 else if (!strcmp (ix86_stringop_string, "loop"))
2170 stringop_alg = loop;
2171 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2172 stringop_alg = unrolled_loop;
2173 else
2174 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2176 if (!strcmp (ix86_tune_string, "x86-64"))
2177 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2178 "-mtune=generic instead as appropriate.");
2180 if (!ix86_arch_string)
2181 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2182 else
2183 ix86_arch_specified = 1;
2185 if (!strcmp (ix86_arch_string, "generic"))
2186 error ("generic CPU can be used only for -mtune= switch");
2187 if (!strncmp (ix86_arch_string, "generic", 7))
2188 error ("bad value (%s) for -march= switch", ix86_arch_string);
2190 if (ix86_cmodel_string != 0)
2192 if (!strcmp (ix86_cmodel_string, "small"))
2193 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2194 else if (!strcmp (ix86_cmodel_string, "medium"))
2195 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2196 else if (!strcmp (ix86_cmodel_string, "large"))
2197 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2198 else if (flag_pic)
2199 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2200 else if (!strcmp (ix86_cmodel_string, "32"))
2201 ix86_cmodel = CM_32;
2202 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2203 ix86_cmodel = CM_KERNEL;
2204 else
2205 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2207 else
2209 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2210 use of rip-relative addressing. This eliminates fixups that
2211 would otherwise be needed if this object is to be placed in a
2212 DLL, and is essentially just as efficient as direct addressing. */
2213 if (TARGET_64BIT_MS_ABI)
2214 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2215 else if (TARGET_64BIT)
2216 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2217 else
2218 ix86_cmodel = CM_32;
2220 if (ix86_asm_string != 0)
2222 if (! TARGET_MACHO
2223 && !strcmp (ix86_asm_string, "intel"))
2224 ix86_asm_dialect = ASM_INTEL;
2225 else if (!strcmp (ix86_asm_string, "att"))
2226 ix86_asm_dialect = ASM_ATT;
2227 else
2228 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2230 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2231 error ("code model %qs not supported in the %s bit mode",
2232 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2233 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2234 sorry ("%i-bit mode not compiled in",
2235 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2237 for (i = 0; i < pta_size; i++)
2238 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2240 ix86_arch = processor_alias_table[i].processor;
2241 /* Default cpu tuning to the architecture. */
2242 ix86_tune = ix86_arch;
2244 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2245 error ("CPU you selected does not support x86-64 "
2246 "instruction set");
2248 if (processor_alias_table[i].flags & PTA_MMX
2249 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2250 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2251 if (processor_alias_table[i].flags & PTA_3DNOW
2252 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2253 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2254 if (processor_alias_table[i].flags & PTA_3DNOW_A
2255 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2256 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2257 if (processor_alias_table[i].flags & PTA_SSE
2258 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2259 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2260 if (processor_alias_table[i].flags & PTA_SSE2
2261 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2262 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2263 if (processor_alias_table[i].flags & PTA_SSE3
2264 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2265 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2266 if (processor_alias_table[i].flags & PTA_SSSE3
2267 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2268 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2269 if (processor_alias_table[i].flags & PTA_SSE4_1
2270 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2271 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2272 if (processor_alias_table[i].flags & PTA_SSE4_2
2273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2274 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2275 if (processor_alias_table[i].flags & PTA_SSE4A
2276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2277 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2278 if (processor_alias_table[i].flags & PTA_SSE5
2279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2280 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2282 if (processor_alias_table[i].flags & PTA_ABM)
2283 x86_abm = true;
2284 if (processor_alias_table[i].flags & PTA_CX16)
2285 x86_cmpxchg16b = true;
2286 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2287 x86_popcnt = true;
2288 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2289 x86_prefetch_sse = true;
2290 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2291 x86_sahf = true;
2293 break;
2296 if (i == pta_size)
2297 error ("bad value (%s) for -march= switch", ix86_arch_string);
2299 ix86_arch_mask = 1u << ix86_arch;
2300 for (i = 0; i < X86_ARCH_LAST; ++i)
2301 ix86_arch_features[i] &= ix86_arch_mask;
2303 for (i = 0; i < pta_size; i++)
2304 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2306 ix86_tune = processor_alias_table[i].processor;
2307 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2309 if (ix86_tune_defaulted)
2311 ix86_tune_string = "x86-64";
2312 for (i = 0; i < pta_size; i++)
2313 if (! strcmp (ix86_tune_string,
2314 processor_alias_table[i].name))
2315 break;
2316 ix86_tune = processor_alias_table[i].processor;
2318 else
2319 error ("CPU you selected does not support x86-64 "
2320 "instruction set");
2322 /* Intel CPUs have always interpreted SSE prefetch instructions as
2323 NOPs; so, we can enable SSE prefetch instructions even when
2324 -mtune (rather than -march) points us to a processor that has them.
2325 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2326 higher processors. */
2327 if (TARGET_CMOVE
2328 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2329 x86_prefetch_sse = true;
2330 break;
2332 if (i == pta_size)
2333 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2335 ix86_tune_mask = 1u << ix86_tune;
2336 for (i = 0; i < X86_TUNE_LAST; ++i)
2337 ix86_tune_features[i] &= ix86_tune_mask;
2339 if (optimize_size)
2340 ix86_cost = &size_cost;
2341 else
2342 ix86_cost = processor_target_table[ix86_tune].cost;
2344 /* Arrange to set up i386_stack_locals for all functions. */
2345 init_machine_status = ix86_init_machine_status;
2347 /* Validate -mregparm= value. */
2348 if (ix86_regparm_string)
2350 if (TARGET_64BIT)
2351 warning (0, "-mregparm is ignored in 64-bit mode");
2352 i = atoi (ix86_regparm_string);
2353 if (i < 0 || i > REGPARM_MAX)
2354 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2355 else
2356 ix86_regparm = i;
2358 if (TARGET_64BIT)
2359 ix86_regparm = REGPARM_MAX;
2361 /* If the user has provided any of the -malign-* options,
2362 warn and use that value only if -falign-* is not set.
2363 Remove this code in GCC 3.2 or later. */
2364 if (ix86_align_loops_string)
2366 warning (0, "-malign-loops is obsolete, use -falign-loops");
2367 if (align_loops == 0)
2369 i = atoi (ix86_align_loops_string);
2370 if (i < 0 || i > MAX_CODE_ALIGN)
2371 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2372 else
2373 align_loops = 1 << i;
2377 if (ix86_align_jumps_string)
2379 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2380 if (align_jumps == 0)
2382 i = atoi (ix86_align_jumps_string);
2383 if (i < 0 || i > MAX_CODE_ALIGN)
2384 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2385 else
2386 align_jumps = 1 << i;
2390 if (ix86_align_funcs_string)
2392 warning (0, "-malign-functions is obsolete, use -falign-functions");
2393 if (align_functions == 0)
2395 i = atoi (ix86_align_funcs_string);
2396 if (i < 0 || i > MAX_CODE_ALIGN)
2397 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2398 else
2399 align_functions = 1 << i;
2403 /* Default align_* from the processor table. */
2404 if (align_loops == 0)
2406 align_loops = processor_target_table[ix86_tune].align_loop;
2407 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2409 if (align_jumps == 0)
2411 align_jumps = processor_target_table[ix86_tune].align_jump;
2412 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2414 if (align_functions == 0)
2416 align_functions = processor_target_table[ix86_tune].align_func;
2419 /* Validate -mbranch-cost= value, or provide default. */
2420 ix86_branch_cost = ix86_cost->branch_cost;
2421 if (ix86_branch_cost_string)
2423 i = atoi (ix86_branch_cost_string);
2424 if (i < 0 || i > 5)
2425 error ("-mbranch-cost=%d is not between 0 and 5", i);
2426 else
2427 ix86_branch_cost = i;
2429 if (ix86_section_threshold_string)
2431 i = atoi (ix86_section_threshold_string);
2432 if (i < 0)
2433 error ("-mlarge-data-threshold=%d is negative", i);
2434 else
2435 ix86_section_threshold = i;
2438 if (ix86_tls_dialect_string)
2440 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2441 ix86_tls_dialect = TLS_DIALECT_GNU;
2442 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2443 ix86_tls_dialect = TLS_DIALECT_GNU2;
2444 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2445 ix86_tls_dialect = TLS_DIALECT_SUN;
2446 else
2447 error ("bad value (%s) for -mtls-dialect= switch",
2448 ix86_tls_dialect_string);
2451 if (ix87_precision_string)
2453 i = atoi (ix87_precision_string);
2454 if (i != 32 && i != 64 && i != 80)
2455 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2458 if (TARGET_64BIT)
2460 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2462 /* Enable by default the SSE and MMX builtins. Do allow the user to
2463 explicitly disable any of these. In particular, disabling SSE and
2464 MMX for kernel code is extremely useful. */
2465 if (!ix86_arch_specified)
2466 ix86_isa_flags
2467 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2468 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2470 if (TARGET_RTD)
2471 warning (0, "-mrtd is ignored in 64bit mode");
2473 else
2475 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2477 if (!ix86_arch_specified)
2478 ix86_isa_flags
2479 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2481 /* i386 ABI does not specify red zone. It still makes sense to use it
2482 when programmer takes care to stack from being destroyed. */
2483 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2484 target_flags |= MASK_NO_RED_ZONE;
2487 /* Keep nonleaf frame pointers. */
2488 if (flag_omit_frame_pointer)
2489 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2490 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2491 flag_omit_frame_pointer = 1;
2493 /* If we're doing fast math, we don't care about comparison order
2494 wrt NaNs. This lets us use a shorter comparison sequence. */
2495 if (flag_finite_math_only)
2496 target_flags &= ~MASK_IEEE_FP;
2498 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2499 since the insns won't need emulation. */
2500 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2501 target_flags &= ~MASK_NO_FANCY_MATH_387;
2503 /* Likewise, if the target doesn't have a 387, or we've specified
2504 software floating point, don't use 387 inline intrinsics. */
2505 if (!TARGET_80387)
2506 target_flags |= MASK_NO_FANCY_MATH_387;
2508 /* Turn on SSE4A bultins for -msse5. */
2509 if (TARGET_SSE5)
2510 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2512 /* Turn on SSE4.1 builtins for -msse4.2. */
2513 if (TARGET_SSE4_2)
2514 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2516 /* Turn on SSSE3 builtins for -msse4.1. */
2517 if (TARGET_SSE4_1)
2518 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2520 /* Turn on SSE3 builtins for -mssse3. */
2521 if (TARGET_SSSE3)
2522 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2524 /* Turn on SSE3 builtins for -msse4a. */
2525 if (TARGET_SSE4A)
2526 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2528 /* Turn on SSE2 builtins for -msse3. */
2529 if (TARGET_SSE3)
2530 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2532 /* Turn on SSE builtins for -msse2. */
2533 if (TARGET_SSE2)
2534 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2536 /* Turn on MMX builtins for -msse. */
2537 if (TARGET_SSE)
2539 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2540 x86_prefetch_sse = true;
2543 /* Turn on MMX builtins for 3Dnow. */
2544 if (TARGET_3DNOW)
2545 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2547 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2548 if (TARGET_SSE4_2 || TARGET_ABM)
2549 x86_popcnt = true;
2551 /* Validate -mpreferred-stack-boundary= value, or provide default.
2552 The default of 128 bits is for Pentium III's SSE __m128. We can't
2553 change it because of optimize_size. Otherwise, we can't mix object
2554 files compiled with -Os and -On. */
2555 ix86_preferred_stack_boundary = 128;
2556 if (ix86_preferred_stack_boundary_string)
2558 i = atoi (ix86_preferred_stack_boundary_string);
2559 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2560 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2561 TARGET_64BIT ? 4 : 2);
2562 else
2563 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2566 /* Accept -msseregparm only if at least SSE support is enabled. */
2567 if (TARGET_SSEREGPARM
2568 && ! TARGET_SSE)
2569 error ("-msseregparm used without SSE enabled");
2571 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2572 if (ix86_fpmath_string != 0)
2574 if (! strcmp (ix86_fpmath_string, "387"))
2575 ix86_fpmath = FPMATH_387;
2576 else if (! strcmp (ix86_fpmath_string, "sse"))
2578 if (!TARGET_SSE)
2580 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2581 ix86_fpmath = FPMATH_387;
2583 else
2584 ix86_fpmath = FPMATH_SSE;
2586 else if (! strcmp (ix86_fpmath_string, "387,sse")
2587 || ! strcmp (ix86_fpmath_string, "sse,387"))
2589 if (!TARGET_SSE)
2591 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2592 ix86_fpmath = FPMATH_387;
2594 else if (!TARGET_80387)
2596 warning (0, "387 instruction set disabled, using SSE arithmetics");
2597 ix86_fpmath = FPMATH_SSE;
2599 else
2600 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2602 else
2603 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2606 /* If the i387 is disabled, then do not return values in it. */
2607 if (!TARGET_80387)
2608 target_flags &= ~MASK_FLOAT_RETURNS;
2610 /* Use external vectorized library in vectorizing intrinsics. */
2611 if (ix86_veclibabi_string)
2613 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2614 ix86_veclib_handler = ix86_veclibabi_acml;
2615 else
2616 error ("unknown vectorization library ABI type (%s) for "
2617 "-mveclibabi= switch", ix86_veclibabi_string);
2620 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2621 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2622 && !optimize_size)
2623 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2625 /* ??? Unwind info is not correct around the CFG unless either a frame
2626 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2627 unwind info generation to be aware of the CFG and propagating states
2628 around edges. */
2629 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2630 || flag_exceptions || flag_non_call_exceptions)
2631 && flag_omit_frame_pointer
2632 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2634 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2635 warning (0, "unwind tables currently require either a frame pointer "
2636 "or -maccumulate-outgoing-args for correctness");
2637 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2640 /* For sane SSE instruction set generation we need fcomi instruction.
2641 It is safe to enable all CMOVE instructions. */
2642 if (TARGET_SSE)
2643 TARGET_CMOVE = 1;
2645 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2647 char *p;
2648 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2649 p = strchr (internal_label_prefix, 'X');
2650 internal_label_prefix_len = p - internal_label_prefix;
2651 *p = '\0';
2654 /* When scheduling description is not available, disable scheduler pass
2655 so it won't slow down the compilation and make x87 code slower. */
2656 if (!TARGET_SCHEDULE)
2657 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2659 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2660 set_param_value ("simultaneous-prefetches",
2661 ix86_cost->simultaneous_prefetches);
2662 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2663 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2664 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2665 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2666 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2667 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2670 /* Return true if this goes in large data/bss. */
2672 static bool
2673 ix86_in_large_data_p (tree exp)
2675 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2676 return false;
2678 /* Functions are never large data. */
2679 if (TREE_CODE (exp) == FUNCTION_DECL)
2680 return false;
2682 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2684 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2685 if (strcmp (section, ".ldata") == 0
2686 || strcmp (section, ".lbss") == 0)
2687 return true;
2688 return false;
2690 else
2692 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2694 /* If this is an incomplete type with size 0, then we can't put it
2695 in data because it might be too big when completed. */
2696 if (!size || size > ix86_section_threshold)
2697 return true;
2700 return false;
2703 /* Switch to the appropriate section for output of DECL.
2704 DECL is either a `VAR_DECL' node or a constant of some sort.
2705 RELOC indicates whether forming the initial value of DECL requires
2706 link-time relocations. */
2708 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2709 ATTRIBUTE_UNUSED;
2711 static section *
2712 x86_64_elf_select_section (tree decl, int reloc,
2713 unsigned HOST_WIDE_INT align)
2715 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2716 && ix86_in_large_data_p (decl))
2718 const char *sname = NULL;
2719 unsigned int flags = SECTION_WRITE;
2720 switch (categorize_decl_for_section (decl, reloc))
2722 case SECCAT_DATA:
2723 sname = ".ldata";
2724 break;
2725 case SECCAT_DATA_REL:
2726 sname = ".ldata.rel";
2727 break;
2728 case SECCAT_DATA_REL_LOCAL:
2729 sname = ".ldata.rel.local";
2730 break;
2731 case SECCAT_DATA_REL_RO:
2732 sname = ".ldata.rel.ro";
2733 break;
2734 case SECCAT_DATA_REL_RO_LOCAL:
2735 sname = ".ldata.rel.ro.local";
2736 break;
2737 case SECCAT_BSS:
2738 sname = ".lbss";
2739 flags |= SECTION_BSS;
2740 break;
2741 case SECCAT_RODATA:
2742 case SECCAT_RODATA_MERGE_STR:
2743 case SECCAT_RODATA_MERGE_STR_INIT:
2744 case SECCAT_RODATA_MERGE_CONST:
2745 sname = ".lrodata";
2746 flags = 0;
2747 break;
2748 case SECCAT_SRODATA:
2749 case SECCAT_SDATA:
2750 case SECCAT_SBSS:
2751 gcc_unreachable ();
2752 case SECCAT_TEXT:
2753 case SECCAT_TDATA:
2754 case SECCAT_TBSS:
2755 /* We don't split these for medium model. Place them into
2756 default sections and hope for best. */
2757 break;
2759 if (sname)
2761 /* We might get called with string constants, but get_named_section
2762 doesn't like them as they are not DECLs. Also, we need to set
2763 flags in that case. */
2764 if (!DECL_P (decl))
2765 return get_section (sname, flags, NULL);
2766 return get_named_section (decl, sname, reloc);
2769 return default_elf_select_section (decl, reloc, align);
2772 /* Build up a unique section name, expressed as a
2773 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2774 RELOC indicates whether the initial value of EXP requires
2775 link-time relocations. */
2777 static void ATTRIBUTE_UNUSED
2778 x86_64_elf_unique_section (tree decl, int reloc)
2780 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2781 && ix86_in_large_data_p (decl))
2783 const char *prefix = NULL;
2784 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2785 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2787 switch (categorize_decl_for_section (decl, reloc))
2789 case SECCAT_DATA:
2790 case SECCAT_DATA_REL:
2791 case SECCAT_DATA_REL_LOCAL:
2792 case SECCAT_DATA_REL_RO:
2793 case SECCAT_DATA_REL_RO_LOCAL:
2794 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2795 break;
2796 case SECCAT_BSS:
2797 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2798 break;
2799 case SECCAT_RODATA:
2800 case SECCAT_RODATA_MERGE_STR:
2801 case SECCAT_RODATA_MERGE_STR_INIT:
2802 case SECCAT_RODATA_MERGE_CONST:
2803 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2804 break;
2805 case SECCAT_SRODATA:
2806 case SECCAT_SDATA:
2807 case SECCAT_SBSS:
2808 gcc_unreachable ();
2809 case SECCAT_TEXT:
2810 case SECCAT_TDATA:
2811 case SECCAT_TBSS:
2812 /* We don't split these for medium model. Place them into
2813 default sections and hope for best. */
2814 break;
2816 if (prefix)
2818 const char *name;
2819 size_t nlen, plen;
2820 char *string;
2821 plen = strlen (prefix);
2823 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2824 name = targetm.strip_name_encoding (name);
2825 nlen = strlen (name);
2827 string = (char *) alloca (nlen + plen + 1);
2828 memcpy (string, prefix, plen);
2829 memcpy (string + plen, name, nlen + 1);
2831 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2832 return;
2835 default_unique_section (decl, reloc);
2838 #ifdef COMMON_ASM_OP
2839 /* This says how to output assembler code to declare an
2840 uninitialized external linkage data object.
2842 For medium model x86-64 we need to use .largecomm opcode for
2843 large objects. */
2844 void
2845 x86_elf_aligned_common (FILE *file,
2846 const char *name, unsigned HOST_WIDE_INT size,
2847 int align)
2849 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2850 && size > (unsigned int)ix86_section_threshold)
2851 fprintf (file, ".largecomm\t");
2852 else
2853 fprintf (file, "%s", COMMON_ASM_OP);
2854 assemble_name (file, name);
2855 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2856 size, align / BITS_PER_UNIT);
2858 #endif
2860 /* Utility function for targets to use in implementing
2861 ASM_OUTPUT_ALIGNED_BSS. */
2863 void
2864 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2865 const char *name, unsigned HOST_WIDE_INT size,
2866 int align)
2868 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2869 && size > (unsigned int)ix86_section_threshold)
2870 switch_to_section (get_named_section (decl, ".lbss", 0));
2871 else
2872 switch_to_section (bss_section);
2873 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2874 #ifdef ASM_DECLARE_OBJECT_NAME
2875 last_assemble_variable_decl = decl;
2876 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2877 #else
2878 /* Standard thing is just output label for the object. */
2879 ASM_OUTPUT_LABEL (file, name);
2880 #endif /* ASM_DECLARE_OBJECT_NAME */
2881 ASM_OUTPUT_SKIP (file, size ? size : 1);
2884 void
2885 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2887 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2888 make the problem with not enough registers even worse. */
2889 #ifdef INSN_SCHEDULING
2890 if (level > 1)
2891 flag_schedule_insns = 0;
2892 #endif
2894 if (TARGET_MACHO)
2895 /* The Darwin libraries never set errno, so we might as well
2896 avoid calling them when that's the only reason we would. */
2897 flag_errno_math = 0;
2899 /* The default values of these switches depend on the TARGET_64BIT
2900 that is not known at this moment. Mark these values with 2 and
2901 let user the to override these. In case there is no command line option
2902 specifying them, we will set the defaults in override_options. */
2903 if (optimize >= 1)
2904 flag_omit_frame_pointer = 2;
2905 flag_pcc_struct_return = 2;
2906 flag_asynchronous_unwind_tables = 2;
2907 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2908 SUBTARGET_OPTIMIZATION_OPTIONS;
2909 #endif
2912 /* Decide whether we can make a sibling call to a function. DECL is the
2913 declaration of the function being targeted by the call and EXP is the
2914 CALL_EXPR representing the call. */
2916 static bool
2917 ix86_function_ok_for_sibcall (tree decl, tree exp)
2919 tree func;
2920 rtx a, b;
2922 /* If we are generating position-independent code, we cannot sibcall
2923 optimize any indirect call, or a direct call to a global function,
2924 as the PLT requires %ebx be live. */
2925 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2926 return false;
2928 if (decl)
2929 func = decl;
2930 else
2932 func = TREE_TYPE (CALL_EXPR_FN (exp));
2933 if (POINTER_TYPE_P (func))
2934 func = TREE_TYPE (func);
2937 /* Check that the return value locations are the same. Like
2938 if we are returning floats on the 80387 register stack, we cannot
2939 make a sibcall from a function that doesn't return a float to a
2940 function that does or, conversely, from a function that does return
2941 a float to a function that doesn't; the necessary stack adjustment
2942 would not be executed. This is also the place we notice
2943 differences in the return value ABI. Note that it is ok for one
2944 of the functions to have void return type as long as the return
2945 value of the other is passed in a register. */
2946 a = ix86_function_value (TREE_TYPE (exp), func, false);
2947 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2948 cfun->decl, false);
2949 if (STACK_REG_P (a) || STACK_REG_P (b))
2951 if (!rtx_equal_p (a, b))
2952 return false;
2954 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2956 else if (!rtx_equal_p (a, b))
2957 return false;
2959 /* If this call is indirect, we'll need to be able to use a call-clobbered
2960 register for the address of the target function. Make sure that all
2961 such registers are not used for passing parameters. */
2962 if (!decl && !TARGET_64BIT)
2964 tree type;
2966 /* We're looking at the CALL_EXPR, we need the type of the function. */
2967 type = CALL_EXPR_FN (exp); /* pointer expression */
2968 type = TREE_TYPE (type); /* pointer type */
2969 type = TREE_TYPE (type); /* function type */
2971 if (ix86_function_regparm (type, NULL) >= 3)
2973 /* ??? Need to count the actual number of registers to be used,
2974 not the possible number of registers. Fix later. */
2975 return false;
2979 /* Dllimport'd functions are also called indirectly. */
2980 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2981 && decl && DECL_DLLIMPORT_P (decl)
2982 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2983 return false;
2985 /* If we forced aligned the stack, then sibcalling would unalign the
2986 stack, which may break the called function. */
2987 if (cfun->machine->force_align_arg_pointer)
2988 return false;
2990 /* Otherwise okay. That also includes certain types of indirect calls. */
2991 return true;
2994 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2995 calling convention attributes;
2996 arguments as in struct attribute_spec.handler. */
2998 static tree
2999 ix86_handle_cconv_attribute (tree *node, tree name,
3000 tree args,
3001 int flags ATTRIBUTE_UNUSED,
3002 bool *no_add_attrs)
3004 if (TREE_CODE (*node) != FUNCTION_TYPE
3005 && TREE_CODE (*node) != METHOD_TYPE
3006 && TREE_CODE (*node) != FIELD_DECL
3007 && TREE_CODE (*node) != TYPE_DECL)
3009 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3010 IDENTIFIER_POINTER (name));
3011 *no_add_attrs = true;
3012 return NULL_TREE;
3015 /* Can combine regparm with all attributes but fastcall. */
3016 if (is_attribute_p ("regparm", name))
3018 tree cst;
3020 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3022 error ("fastcall and regparm attributes are not compatible");
3025 cst = TREE_VALUE (args);
3026 if (TREE_CODE (cst) != INTEGER_CST)
3028 warning (OPT_Wattributes,
3029 "%qs attribute requires an integer constant argument",
3030 IDENTIFIER_POINTER (name));
3031 *no_add_attrs = true;
3033 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3035 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3036 IDENTIFIER_POINTER (name), REGPARM_MAX);
3037 *no_add_attrs = true;
3040 if (!TARGET_64BIT
3041 && lookup_attribute (ix86_force_align_arg_pointer_string,
3042 TYPE_ATTRIBUTES (*node))
3043 && compare_tree_int (cst, REGPARM_MAX-1))
3045 error ("%s functions limited to %d register parameters",
3046 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3049 return NULL_TREE;
3052 if (TARGET_64BIT)
3054 /* Do not warn when emulating the MS ABI. */
3055 if (!TARGET_64BIT_MS_ABI)
3056 warning (OPT_Wattributes, "%qs attribute ignored",
3057 IDENTIFIER_POINTER (name));
3058 *no_add_attrs = true;
3059 return NULL_TREE;
3062 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3063 if (is_attribute_p ("fastcall", name))
3065 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3067 error ("fastcall and cdecl attributes are not compatible");
3069 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3071 error ("fastcall and stdcall attributes are not compatible");
3073 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3075 error ("fastcall and regparm attributes are not compatible");
3079 /* Can combine stdcall with fastcall (redundant), regparm and
3080 sseregparm. */
3081 else if (is_attribute_p ("stdcall", name))
3083 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3085 error ("stdcall and cdecl attributes are not compatible");
3087 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3089 error ("stdcall and fastcall attributes are not compatible");
3093 /* Can combine cdecl with regparm and sseregparm. */
3094 else if (is_attribute_p ("cdecl", name))
3096 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3098 error ("stdcall and cdecl attributes are not compatible");
3100 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3102 error ("fastcall and cdecl attributes are not compatible");
3106 /* Can combine sseregparm with all attributes. */
3108 return NULL_TREE;
3111 /* Return 0 if the attributes for two types are incompatible, 1 if they
3112 are compatible, and 2 if they are nearly compatible (which causes a
3113 warning to be generated). */
3115 static int
3116 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3118 /* Check for mismatch of non-default calling convention. */
3119 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3121 if (TREE_CODE (type1) != FUNCTION_TYPE
3122 && TREE_CODE (type1) != METHOD_TYPE)
3123 return 1;
3125 /* Check for mismatched fastcall/regparm types. */
3126 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3127 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3128 || (ix86_function_regparm (type1, NULL)
3129 != ix86_function_regparm (type2, NULL)))
3130 return 0;
3132 /* Check for mismatched sseregparm types. */
3133 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3134 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3135 return 0;
3137 /* Check for mismatched return types (cdecl vs stdcall). */
3138 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3139 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3140 return 0;
3142 return 1;
3145 /* Return the regparm value for a function with the indicated TYPE and DECL.
3146 DECL may be NULL when calling function indirectly
3147 or considering a libcall. */
3149 static int
3150 ix86_function_regparm (const_tree type, const_tree decl)
3152 tree attr;
3153 int regparm = ix86_regparm;
3155 if (TARGET_64BIT)
3156 return regparm;
3158 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3159 if (attr)
3160 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3162 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3163 return 2;
3165 /* Use register calling convention for local functions when possible. */
3166 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3167 && flag_unit_at_a_time && !profile_flag)
3169 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3170 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3171 if (i && i->local)
3173 int local_regparm, globals = 0, regno;
3174 struct function *f;
3176 /* Make sure no regparm register is taken by a
3177 global register variable. */
3178 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3179 if (global_regs[local_regparm])
3180 break;
3182 /* We can't use regparm(3) for nested functions as these use
3183 static chain pointer in third argument. */
3184 if (local_regparm == 3
3185 && (decl_function_context (decl)
3186 || ix86_force_align_arg_pointer)
3187 && !DECL_NO_STATIC_CHAIN (decl))
3188 local_regparm = 2;
3190 /* If the function realigns its stackpointer, the prologue will
3191 clobber %ecx. If we've already generated code for the callee,
3192 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3193 scanning the attributes for the self-realigning property. */
3194 f = DECL_STRUCT_FUNCTION (decl);
3195 if (local_regparm == 3
3196 && (f ? !!f->machine->force_align_arg_pointer
3197 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3198 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3199 local_regparm = 2;
3201 /* Each global register variable increases register preassure,
3202 so the more global reg vars there are, the smaller regparm
3203 optimization use, unless requested by the user explicitly. */
3204 for (regno = 0; regno < 6; regno++)
3205 if (global_regs[regno])
3206 globals++;
3207 local_regparm
3208 = globals < local_regparm ? local_regparm - globals : 0;
3210 if (local_regparm > regparm)
3211 regparm = local_regparm;
3215 return regparm;
3218 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3219 DFmode (2) arguments in SSE registers for a function with the
3220 indicated TYPE and DECL. DECL may be NULL when calling function
3221 indirectly or considering a libcall. Otherwise return 0. */
3223 static int
3224 ix86_function_sseregparm (const_tree type, const_tree decl)
3226 gcc_assert (!TARGET_64BIT);
3228 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3229 by the sseregparm attribute. */
3230 if (TARGET_SSEREGPARM
3231 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3233 if (!TARGET_SSE)
3235 if (decl)
3236 error ("Calling %qD with attribute sseregparm without "
3237 "SSE/SSE2 enabled", decl);
3238 else
3239 error ("Calling %qT with attribute sseregparm without "
3240 "SSE/SSE2 enabled", type);
3241 return 0;
3244 return 2;
3247 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3248 (and DFmode for SSE2) arguments in SSE registers. */
3249 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3251 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3252 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3253 if (i && i->local)
3254 return TARGET_SSE2 ? 2 : 1;
3257 return 0;
3260 /* Return true if EAX is live at the start of the function. Used by
3261 ix86_expand_prologue to determine if we need special help before
3262 calling allocate_stack_worker. */
3264 static bool
3265 ix86_eax_live_at_start_p (void)
3267 /* Cheat. Don't bother working forward from ix86_function_regparm
3268 to the function type to whether an actual argument is located in
3269 eax. Instead just look at cfg info, which is still close enough
3270 to correct at this point. This gives false positives for broken
3271 functions that might use uninitialized data that happens to be
3272 allocated in eax, but who cares? */
3273 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3276 /* Value is the number of bytes of arguments automatically
3277 popped when returning from a subroutine call.
3278 FUNDECL is the declaration node of the function (as a tree),
3279 FUNTYPE is the data type of the function (as a tree),
3280 or for a library call it is an identifier node for the subroutine name.
3281 SIZE is the number of bytes of arguments passed on the stack.
3283 On the 80386, the RTD insn may be used to pop them if the number
3284 of args is fixed, but if the number is variable then the caller
3285 must pop them all. RTD can't be used for library calls now
3286 because the library is compiled with the Unix compiler.
3287 Use of RTD is a selectable option, since it is incompatible with
3288 standard Unix calling sequences. If the option is not selected,
3289 the caller must always pop the args.
3291 The attribute stdcall is equivalent to RTD on a per module basis. */
3294 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3296 int rtd;
3298 /* None of the 64-bit ABIs pop arguments. */
3299 if (TARGET_64BIT)
3300 return 0;
3302 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3304 /* Cdecl functions override -mrtd, and never pop the stack. */
3305 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3307 /* Stdcall and fastcall functions will pop the stack if not
3308 variable args. */
3309 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3310 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3311 rtd = 1;
3313 if (rtd && ! stdarg_p (funtype))
3314 return size;
3317 /* Lose any fake structure return argument if it is passed on the stack. */
3318 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3319 && !KEEP_AGGREGATE_RETURN_POINTER)
3321 int nregs = ix86_function_regparm (funtype, fundecl);
3322 if (nregs == 0)
3323 return GET_MODE_SIZE (Pmode);
3326 return 0;
3329 /* Argument support functions. */
3331 /* Return true when register may be used to pass function parameters. */
3332 bool
3333 ix86_function_arg_regno_p (int regno)
3335 int i;
3336 const int *parm_regs;
3338 if (!TARGET_64BIT)
3340 if (TARGET_MACHO)
3341 return (regno < REGPARM_MAX
3342 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3343 else
3344 return (regno < REGPARM_MAX
3345 || (TARGET_MMX && MMX_REGNO_P (regno)
3346 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3347 || (TARGET_SSE && SSE_REGNO_P (regno)
3348 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3351 if (TARGET_MACHO)
3353 if (SSE_REGNO_P (regno) && TARGET_SSE)
3354 return true;
3356 else
3358 if (TARGET_SSE && SSE_REGNO_P (regno)
3359 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3360 return true;
3363 /* RAX is used as hidden argument to va_arg functions. */
3364 if (!TARGET_64BIT_MS_ABI && regno == 0)
3365 return true;
3367 if (TARGET_64BIT_MS_ABI)
3368 parm_regs = x86_64_ms_abi_int_parameter_registers;
3369 else
3370 parm_regs = x86_64_int_parameter_registers;
3371 for (i = 0; i < REGPARM_MAX; i++)
3372 if (regno == parm_regs[i])
3373 return true;
3374 return false;
3377 /* Return if we do not know how to pass TYPE solely in registers. */
3379 static bool
3380 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3382 if (must_pass_in_stack_var_size_or_pad (mode, type))
3383 return true;
3385 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3386 The layout_type routine is crafty and tries to trick us into passing
3387 currently unsupported vector types on the stack by using TImode. */
3388 return (!TARGET_64BIT && mode == TImode
3389 && type && TREE_CODE (type) != VECTOR_TYPE);
3392 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3393 for a call to a function whose data type is FNTYPE.
3394 For a library call, FNTYPE is 0. */
3396 void
3397 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3398 tree fntype, /* tree ptr for function decl */
3399 rtx libname, /* SYMBOL_REF of library name or 0 */
3400 tree fndecl)
3402 memset (cum, 0, sizeof (*cum));
3404 /* Set up the number of registers to use for passing arguments. */
3405 cum->nregs = ix86_regparm;
3406 if (TARGET_SSE)
3407 cum->sse_nregs = SSE_REGPARM_MAX;
3408 if (TARGET_MMX)
3409 cum->mmx_nregs = MMX_REGPARM_MAX;
3410 cum->warn_sse = true;
3411 cum->warn_mmx = true;
3412 cum->maybe_vaarg = (fntype
3413 ? (!prototype_p (fntype) || stdarg_p (fntype))
3414 : !libname);
3416 if (!TARGET_64BIT)
3418 /* If there are variable arguments, then we won't pass anything
3419 in registers in 32-bit mode. */
3420 if (cum->maybe_vaarg)
3422 cum->nregs = 0;
3423 cum->sse_nregs = 0;
3424 cum->mmx_nregs = 0;
3425 cum->warn_sse = 0;
3426 cum->warn_mmx = 0;
3427 return;
3430 /* Use ecx and edx registers if function has fastcall attribute,
3431 else look for regparm information. */
3432 if (fntype)
3434 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3436 cum->nregs = 2;
3437 cum->fastcall = 1;
3439 else
3440 cum->nregs = ix86_function_regparm (fntype, fndecl);
3443 /* Set up the number of SSE registers used for passing SFmode
3444 and DFmode arguments. Warn for mismatching ABI. */
3445 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3449 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3450 But in the case of vector types, it is some vector mode.
3452 When we have only some of our vector isa extensions enabled, then there
3453 are some modes for which vector_mode_supported_p is false. For these
3454 modes, the generic vector support in gcc will choose some non-vector mode
3455 in order to implement the type. By computing the natural mode, we'll
3456 select the proper ABI location for the operand and not depend on whatever
3457 the middle-end decides to do with these vector types. */
3459 static enum machine_mode
3460 type_natural_mode (const_tree type)
3462 enum machine_mode mode = TYPE_MODE (type);
3464 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3466 HOST_WIDE_INT size = int_size_in_bytes (type);
3467 if ((size == 8 || size == 16)
3468 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3469 && TYPE_VECTOR_SUBPARTS (type) > 1)
3471 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3473 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3474 mode = MIN_MODE_VECTOR_FLOAT;
3475 else
3476 mode = MIN_MODE_VECTOR_INT;
3478 /* Get the mode which has this inner mode and number of units. */
3479 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3480 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3481 && GET_MODE_INNER (mode) == innermode)
3482 return mode;
3484 gcc_unreachable ();
3488 return mode;
3491 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3492 this may not agree with the mode that the type system has chosen for the
3493 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3494 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3496 static rtx
3497 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3498 unsigned int regno)
3500 rtx tmp;
3502 if (orig_mode != BLKmode)
3503 tmp = gen_rtx_REG (orig_mode, regno);
3504 else
3506 tmp = gen_rtx_REG (mode, regno);
3507 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3508 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3511 return tmp;
3514 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3515 of this code is to classify each 8bytes of incoming argument by the register
3516 class and assign registers accordingly. */
3518 /* Return the union class of CLASS1 and CLASS2.
3519 See the x86-64 PS ABI for details. */
3521 static enum x86_64_reg_class
3522 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3524 /* Rule #1: If both classes are equal, this is the resulting class. */
3525 if (class1 == class2)
3526 return class1;
3528 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3529 the other class. */
3530 if (class1 == X86_64_NO_CLASS)
3531 return class2;
3532 if (class2 == X86_64_NO_CLASS)
3533 return class1;
3535 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3536 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3537 return X86_64_MEMORY_CLASS;
3539 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3540 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3541 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3542 return X86_64_INTEGERSI_CLASS;
3543 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3544 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3545 return X86_64_INTEGER_CLASS;
3547 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3548 MEMORY is used. */
3549 if (class1 == X86_64_X87_CLASS
3550 || class1 == X86_64_X87UP_CLASS
3551 || class1 == X86_64_COMPLEX_X87_CLASS
3552 || class2 == X86_64_X87_CLASS
3553 || class2 == X86_64_X87UP_CLASS
3554 || class2 == X86_64_COMPLEX_X87_CLASS)
3555 return X86_64_MEMORY_CLASS;
3557 /* Rule #6: Otherwise class SSE is used. */
3558 return X86_64_SSE_CLASS;
3561 /* Classify the argument of type TYPE and mode MODE.
3562 CLASSES will be filled by the register class used to pass each word
3563 of the operand. The number of words is returned. In case the parameter
3564 should be passed in memory, 0 is returned. As a special case for zero
3565 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3567 BIT_OFFSET is used internally for handling records and specifies offset
3568 of the offset in bits modulo 256 to avoid overflow cases.
3570 See the x86-64 PS ABI for details.
3573 static int
3574 classify_argument (enum machine_mode mode, const_tree type,
3575 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3577 HOST_WIDE_INT bytes =
3578 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3579 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3581 /* Variable sized entities are always passed/returned in memory. */
3582 if (bytes < 0)
3583 return 0;
3585 if (mode != VOIDmode
3586 && targetm.calls.must_pass_in_stack (mode, type))
3587 return 0;
3589 if (type && AGGREGATE_TYPE_P (type))
3591 int i;
3592 tree field;
3593 enum x86_64_reg_class subclasses[MAX_CLASSES];
3595 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3596 if (bytes > 16)
3597 return 0;
3599 for (i = 0; i < words; i++)
3600 classes[i] = X86_64_NO_CLASS;
3602 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3603 signalize memory class, so handle it as special case. */
3604 if (!words)
3606 classes[0] = X86_64_NO_CLASS;
3607 return 1;
3610 /* Classify each field of record and merge classes. */
3611 switch (TREE_CODE (type))
3613 case RECORD_TYPE:
3614 /* And now merge the fields of structure. */
3615 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3617 if (TREE_CODE (field) == FIELD_DECL)
3619 int num;
3621 if (TREE_TYPE (field) == error_mark_node)
3622 continue;
3624 /* Bitfields are always classified as integer. Handle them
3625 early, since later code would consider them to be
3626 misaligned integers. */
3627 if (DECL_BIT_FIELD (field))
3629 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3630 i < ((int_bit_position (field) + (bit_offset % 64))
3631 + tree_low_cst (DECL_SIZE (field), 0)
3632 + 63) / 8 / 8; i++)
3633 classes[i] =
3634 merge_classes (X86_64_INTEGER_CLASS,
3635 classes[i]);
3637 else
3639 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3640 TREE_TYPE (field), subclasses,
3641 (int_bit_position (field)
3642 + bit_offset) % 256);
3643 if (!num)
3644 return 0;
3645 for (i = 0; i < num; i++)
3647 int pos =
3648 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3649 classes[i + pos] =
3650 merge_classes (subclasses[i], classes[i + pos]);
3655 break;
3657 case ARRAY_TYPE:
3658 /* Arrays are handled as small records. */
3660 int num;
3661 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3662 TREE_TYPE (type), subclasses, bit_offset);
3663 if (!num)
3664 return 0;
3666 /* The partial classes are now full classes. */
3667 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3668 subclasses[0] = X86_64_SSE_CLASS;
3669 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3670 subclasses[0] = X86_64_INTEGER_CLASS;
3672 for (i = 0; i < words; i++)
3673 classes[i] = subclasses[i % num];
3675 break;
3677 case UNION_TYPE:
3678 case QUAL_UNION_TYPE:
3679 /* Unions are similar to RECORD_TYPE but offset is always 0.
3681 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3683 if (TREE_CODE (field) == FIELD_DECL)
3685 int num;
3687 if (TREE_TYPE (field) == error_mark_node)
3688 continue;
3690 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3691 TREE_TYPE (field), subclasses,
3692 bit_offset);
3693 if (!num)
3694 return 0;
3695 for (i = 0; i < num; i++)
3696 classes[i] = merge_classes (subclasses[i], classes[i]);
3699 break;
3701 default:
3702 gcc_unreachable ();
3705 /* Final merger cleanup. */
3706 for (i = 0; i < words; i++)
3708 /* If one class is MEMORY, everything should be passed in
3709 memory. */
3710 if (classes[i] == X86_64_MEMORY_CLASS)
3711 return 0;
3713 /* The X86_64_SSEUP_CLASS should be always preceded by
3714 X86_64_SSE_CLASS. */
3715 if (classes[i] == X86_64_SSEUP_CLASS
3716 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3717 classes[i] = X86_64_SSE_CLASS;
3719 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3720 if (classes[i] == X86_64_X87UP_CLASS
3721 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3722 classes[i] = X86_64_SSE_CLASS;
3724 return words;
3727 /* Compute alignment needed. We align all types to natural boundaries with
3728 exception of XFmode that is aligned to 64bits. */
3729 if (mode != VOIDmode && mode != BLKmode)
3731 int mode_alignment = GET_MODE_BITSIZE (mode);
3733 if (mode == XFmode)
3734 mode_alignment = 128;
3735 else if (mode == XCmode)
3736 mode_alignment = 256;
3737 if (COMPLEX_MODE_P (mode))
3738 mode_alignment /= 2;
3739 /* Misaligned fields are always returned in memory. */
3740 if (bit_offset % mode_alignment)
3741 return 0;
3744 /* for V1xx modes, just use the base mode */
3745 if (VECTOR_MODE_P (mode)
3746 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3747 mode = GET_MODE_INNER (mode);
3749 /* Classification of atomic types. */
3750 switch (mode)
3752 case SDmode:
3753 case DDmode:
3754 classes[0] = X86_64_SSE_CLASS;
3755 return 1;
3756 case TDmode:
3757 classes[0] = X86_64_SSE_CLASS;
3758 classes[1] = X86_64_SSEUP_CLASS;
3759 return 2;
3760 case DImode:
3761 case SImode:
3762 case HImode:
3763 case QImode:
3764 case CSImode:
3765 case CHImode:
3766 case CQImode:
3767 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3768 classes[0] = X86_64_INTEGERSI_CLASS;
3769 else
3770 classes[0] = X86_64_INTEGER_CLASS;
3771 return 1;
3772 case CDImode:
3773 case TImode:
3774 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3775 return 2;
3776 case CTImode:
3777 return 0;
3778 case SFmode:
3779 if (!(bit_offset % 64))
3780 classes[0] = X86_64_SSESF_CLASS;
3781 else
3782 classes[0] = X86_64_SSE_CLASS;
3783 return 1;
3784 case DFmode:
3785 classes[0] = X86_64_SSEDF_CLASS;
3786 return 1;
3787 case XFmode:
3788 classes[0] = X86_64_X87_CLASS;
3789 classes[1] = X86_64_X87UP_CLASS;
3790 return 2;
3791 case TFmode:
3792 classes[0] = X86_64_SSE_CLASS;
3793 classes[1] = X86_64_SSEUP_CLASS;
3794 return 2;
3795 case SCmode:
3796 classes[0] = X86_64_SSE_CLASS;
3797 return 1;
3798 case DCmode:
3799 classes[0] = X86_64_SSEDF_CLASS;
3800 classes[1] = X86_64_SSEDF_CLASS;
3801 return 2;
3802 case XCmode:
3803 classes[0] = X86_64_COMPLEX_X87_CLASS;
3804 return 1;
3805 case TCmode:
3806 /* This modes is larger than 16 bytes. */
3807 return 0;
3808 case V4SFmode:
3809 case V4SImode:
3810 case V16QImode:
3811 case V8HImode:
3812 case V2DFmode:
3813 case V2DImode:
3814 classes[0] = X86_64_SSE_CLASS;
3815 classes[1] = X86_64_SSEUP_CLASS;
3816 return 2;
3817 case V2SFmode:
3818 case V2SImode:
3819 case V4HImode:
3820 case V8QImode:
3821 classes[0] = X86_64_SSE_CLASS;
3822 return 1;
3823 case BLKmode:
3824 case VOIDmode:
3825 return 0;
3826 default:
3827 gcc_assert (VECTOR_MODE_P (mode));
3829 if (bytes > 16)
3830 return 0;
3832 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3834 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3835 classes[0] = X86_64_INTEGERSI_CLASS;
3836 else
3837 classes[0] = X86_64_INTEGER_CLASS;
3838 classes[1] = X86_64_INTEGER_CLASS;
3839 return 1 + (bytes > 8);
3843 /* Examine the argument and return set number of register required in each
3844 class. Return 0 iff parameter should be passed in memory. */
3845 static int
3846 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3847 int *int_nregs, int *sse_nregs)
3849 enum x86_64_reg_class regclass[MAX_CLASSES];
3850 int n = classify_argument (mode, type, regclass, 0);
3852 *int_nregs = 0;
3853 *sse_nregs = 0;
3854 if (!n)
3855 return 0;
3856 for (n--; n >= 0; n--)
3857 switch (regclass[n])
3859 case X86_64_INTEGER_CLASS:
3860 case X86_64_INTEGERSI_CLASS:
3861 (*int_nregs)++;
3862 break;
3863 case X86_64_SSE_CLASS:
3864 case X86_64_SSESF_CLASS:
3865 case X86_64_SSEDF_CLASS:
3866 (*sse_nregs)++;
3867 break;
3868 case X86_64_NO_CLASS:
3869 case X86_64_SSEUP_CLASS:
3870 break;
3871 case X86_64_X87_CLASS:
3872 case X86_64_X87UP_CLASS:
3873 if (!in_return)
3874 return 0;
3875 break;
3876 case X86_64_COMPLEX_X87_CLASS:
3877 return in_return ? 2 : 0;
3878 case X86_64_MEMORY_CLASS:
3879 gcc_unreachable ();
3881 return 1;
3884 /* Construct container for the argument used by GCC interface. See
3885 FUNCTION_ARG for the detailed description. */
3887 static rtx
3888 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3889 const_tree type, int in_return, int nintregs, int nsseregs,
3890 const int *intreg, int sse_regno)
3892 /* The following variables hold the static issued_error state. */
3893 static bool issued_sse_arg_error;
3894 static bool issued_sse_ret_error;
3895 static bool issued_x87_ret_error;
3897 enum machine_mode tmpmode;
3898 int bytes =
3899 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3900 enum x86_64_reg_class regclass[MAX_CLASSES];
3901 int n;
3902 int i;
3903 int nexps = 0;
3904 int needed_sseregs, needed_intregs;
3905 rtx exp[MAX_CLASSES];
3906 rtx ret;
3908 n = classify_argument (mode, type, regclass, 0);
3909 if (!n)
3910 return NULL;
3911 if (!examine_argument (mode, type, in_return, &needed_intregs,
3912 &needed_sseregs))
3913 return NULL;
3914 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3915 return NULL;
3917 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3918 some less clueful developer tries to use floating-point anyway. */
3919 if (needed_sseregs && !TARGET_SSE)
3921 if (in_return)
3923 if (!issued_sse_ret_error)
3925 error ("SSE register return with SSE disabled");
3926 issued_sse_ret_error = true;
3929 else if (!issued_sse_arg_error)
3931 error ("SSE register argument with SSE disabled");
3932 issued_sse_arg_error = true;
3934 return NULL;
3937 /* Likewise, error if the ABI requires us to return values in the
3938 x87 registers and the user specified -mno-80387. */
3939 if (!TARGET_80387 && in_return)
3940 for (i = 0; i < n; i++)
3941 if (regclass[i] == X86_64_X87_CLASS
3942 || regclass[i] == X86_64_X87UP_CLASS
3943 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3945 if (!issued_x87_ret_error)
3947 error ("x87 register return with x87 disabled");
3948 issued_x87_ret_error = true;
3950 return NULL;
3953 /* First construct simple cases. Avoid SCmode, since we want to use
3954 single register to pass this type. */
3955 if (n == 1 && mode != SCmode)
3956 switch (regclass[0])
3958 case X86_64_INTEGER_CLASS:
3959 case X86_64_INTEGERSI_CLASS:
3960 return gen_rtx_REG (mode, intreg[0]);
3961 case X86_64_SSE_CLASS:
3962 case X86_64_SSESF_CLASS:
3963 case X86_64_SSEDF_CLASS:
3964 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3965 case X86_64_X87_CLASS:
3966 case X86_64_COMPLEX_X87_CLASS:
3967 return gen_rtx_REG (mode, FIRST_STACK_REG);
3968 case X86_64_NO_CLASS:
3969 /* Zero sized array, struct or class. */
3970 return NULL;
3971 default:
3972 gcc_unreachable ();
3974 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3975 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3976 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3978 if (n == 2
3979 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3980 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3981 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3982 && regclass[1] == X86_64_INTEGER_CLASS
3983 && (mode == CDImode || mode == TImode || mode == TFmode)
3984 && intreg[0] + 1 == intreg[1])
3985 return gen_rtx_REG (mode, intreg[0]);
3987 /* Otherwise figure out the entries of the PARALLEL. */
3988 for (i = 0; i < n; i++)
3990 switch (regclass[i])
3992 case X86_64_NO_CLASS:
3993 break;
3994 case X86_64_INTEGER_CLASS:
3995 case X86_64_INTEGERSI_CLASS:
3996 /* Merge TImodes on aligned occasions here too. */
3997 if (i * 8 + 8 > bytes)
3998 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3999 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4000 tmpmode = SImode;
4001 else
4002 tmpmode = DImode;
4003 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4004 if (tmpmode == BLKmode)
4005 tmpmode = DImode;
4006 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4007 gen_rtx_REG (tmpmode, *intreg),
4008 GEN_INT (i*8));
4009 intreg++;
4010 break;
4011 case X86_64_SSESF_CLASS:
4012 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4013 gen_rtx_REG (SFmode,
4014 SSE_REGNO (sse_regno)),
4015 GEN_INT (i*8));
4016 sse_regno++;
4017 break;
4018 case X86_64_SSEDF_CLASS:
4019 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4020 gen_rtx_REG (DFmode,
4021 SSE_REGNO (sse_regno)),
4022 GEN_INT (i*8));
4023 sse_regno++;
4024 break;
4025 case X86_64_SSE_CLASS:
4026 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4027 tmpmode = TImode;
4028 else
4029 tmpmode = DImode;
4030 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4031 gen_rtx_REG (tmpmode,
4032 SSE_REGNO (sse_regno)),
4033 GEN_INT (i*8));
4034 if (tmpmode == TImode)
4035 i++;
4036 sse_regno++;
4037 break;
4038 default:
4039 gcc_unreachable ();
4043 /* Empty aligned struct, union or class. */
4044 if (nexps == 0)
4045 return NULL;
4047 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4048 for (i = 0; i < nexps; i++)
4049 XVECEXP (ret, 0, i) = exp [i];
4050 return ret;
4053 /* Update the data in CUM to advance over an argument of mode MODE
4054 and data type TYPE. (TYPE is null for libcalls where that information
4055 may not be available.) */
4057 static void
4058 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4059 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4061 switch (mode)
4063 default:
4064 break;
4066 case BLKmode:
4067 if (bytes < 0)
4068 break;
4069 /* FALLTHRU */
4071 case DImode:
4072 case SImode:
4073 case HImode:
4074 case QImode:
4075 cum->words += words;
4076 cum->nregs -= words;
4077 cum->regno += words;
4079 if (cum->nregs <= 0)
4081 cum->nregs = 0;
4082 cum->regno = 0;
4084 break;
4086 case DFmode:
4087 if (cum->float_in_sse < 2)
4088 break;
4089 case SFmode:
4090 if (cum->float_in_sse < 1)
4091 break;
4092 /* FALLTHRU */
4094 case TImode:
4095 case V16QImode:
4096 case V8HImode:
4097 case V4SImode:
4098 case V2DImode:
4099 case V4SFmode:
4100 case V2DFmode:
4101 if (!type || !AGGREGATE_TYPE_P (type))
4103 cum->sse_words += words;
4104 cum->sse_nregs -= 1;
4105 cum->sse_regno += 1;
4106 if (cum->sse_nregs <= 0)
4108 cum->sse_nregs = 0;
4109 cum->sse_regno = 0;
4112 break;
4114 case V8QImode:
4115 case V4HImode:
4116 case V2SImode:
4117 case V2SFmode:
4118 if (!type || !AGGREGATE_TYPE_P (type))
4120 cum->mmx_words += words;
4121 cum->mmx_nregs -= 1;
4122 cum->mmx_regno += 1;
4123 if (cum->mmx_nregs <= 0)
4125 cum->mmx_nregs = 0;
4126 cum->mmx_regno = 0;
4129 break;
4133 static void
4134 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4135 tree type, HOST_WIDE_INT words)
4137 int int_nregs, sse_nregs;
4139 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4140 cum->words += words;
4141 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4143 cum->nregs -= int_nregs;
4144 cum->sse_nregs -= sse_nregs;
4145 cum->regno += int_nregs;
4146 cum->sse_regno += sse_nregs;
4148 else
4149 cum->words += words;
4152 static void
4153 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4154 HOST_WIDE_INT words)
4156 /* Otherwise, this should be passed indirect. */
4157 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4159 cum->words += words;
4160 if (cum->nregs > 0)
4162 cum->nregs -= 1;
4163 cum->regno += 1;
4167 void
4168 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4169 tree type, int named ATTRIBUTE_UNUSED)
4171 HOST_WIDE_INT bytes, words;
4173 if (mode == BLKmode)
4174 bytes = int_size_in_bytes (type);
4175 else
4176 bytes = GET_MODE_SIZE (mode);
4177 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4179 if (type)
4180 mode = type_natural_mode (type);
4182 if (TARGET_64BIT_MS_ABI)
4183 function_arg_advance_ms_64 (cum, bytes, words);
4184 else if (TARGET_64BIT)
4185 function_arg_advance_64 (cum, mode, type, words);
4186 else
4187 function_arg_advance_32 (cum, mode, type, bytes, words);
4190 /* Define where to put the arguments to a function.
4191 Value is zero to push the argument on the stack,
4192 or a hard register in which to store the argument.
4194 MODE is the argument's machine mode.
4195 TYPE is the data type of the argument (as a tree).
4196 This is null for libcalls where that information may
4197 not be available.
4198 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4199 the preceding args and about the function being called.
4200 NAMED is nonzero if this argument is a named parameter
4201 (otherwise it is an extra parameter matching an ellipsis). */
4203 static rtx
4204 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4205 enum machine_mode orig_mode, tree type,
4206 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4208 static bool warnedsse, warnedmmx;
4210 /* Avoid the AL settings for the Unix64 ABI. */
4211 if (mode == VOIDmode)
4212 return constm1_rtx;
4214 switch (mode)
4216 default:
4217 break;
4219 case BLKmode:
4220 if (bytes < 0)
4221 break;
4222 /* FALLTHRU */
4223 case DImode:
4224 case SImode:
4225 case HImode:
4226 case QImode:
4227 if (words <= cum->nregs)
4229 int regno = cum->regno;
4231 /* Fastcall allocates the first two DWORD (SImode) or
4232 smaller arguments to ECX and EDX. */
4233 if (cum->fastcall)
4235 if (mode == BLKmode || mode == DImode)
4236 break;
4238 /* ECX not EAX is the first allocated register. */
4239 if (regno == 0)
4240 regno = 2;
4242 return gen_rtx_REG (mode, regno);
4244 break;
4246 case DFmode:
4247 if (cum->float_in_sse < 2)
4248 break;
4249 case SFmode:
4250 if (cum->float_in_sse < 1)
4251 break;
4252 /* FALLTHRU */
4253 case TImode:
4254 case V16QImode:
4255 case V8HImode:
4256 case V4SImode:
4257 case V2DImode:
4258 case V4SFmode:
4259 case V2DFmode:
4260 if (!type || !AGGREGATE_TYPE_P (type))
4262 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4264 warnedsse = true;
4265 warning (0, "SSE vector argument without SSE enabled "
4266 "changes the ABI");
4268 if (cum->sse_nregs)
4269 return gen_reg_or_parallel (mode, orig_mode,
4270 cum->sse_regno + FIRST_SSE_REG);
4272 break;
4274 case V8QImode:
4275 case V4HImode:
4276 case V2SImode:
4277 case V2SFmode:
4278 if (!type || !AGGREGATE_TYPE_P (type))
4280 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4282 warnedmmx = true;
4283 warning (0, "MMX vector argument without MMX enabled "
4284 "changes the ABI");
4286 if (cum->mmx_nregs)
4287 return gen_reg_or_parallel (mode, orig_mode,
4288 cum->mmx_regno + FIRST_MMX_REG);
4290 break;
4293 return NULL_RTX;
4296 static rtx
4297 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4298 enum machine_mode orig_mode, tree type)
4300 /* Handle a hidden AL argument containing number of registers
4301 for varargs x86-64 functions. */
4302 if (mode == VOIDmode)
4303 return GEN_INT (cum->maybe_vaarg
4304 ? (cum->sse_nregs < 0
4305 ? SSE_REGPARM_MAX
4306 : cum->sse_regno)
4307 : -1);
4309 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4310 cum->sse_nregs,
4311 &x86_64_int_parameter_registers [cum->regno],
4312 cum->sse_regno);
4315 static rtx
4316 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4317 enum machine_mode orig_mode, int named)
4319 unsigned int regno;
4321 /* Avoid the AL settings for the Unix64 ABI. */
4322 if (mode == VOIDmode)
4323 return constm1_rtx;
4325 /* If we've run out of registers, it goes on the stack. */
4326 if (cum->nregs == 0)
4327 return NULL_RTX;
4329 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4331 /* Only floating point modes are passed in anything but integer regs. */
4332 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4334 if (named)
4335 regno = cum->regno + FIRST_SSE_REG;
4336 else
4338 rtx t1, t2;
4340 /* Unnamed floating parameters are passed in both the
4341 SSE and integer registers. */
4342 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4343 t2 = gen_rtx_REG (mode, regno);
4344 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4345 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4346 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4350 return gen_reg_or_parallel (mode, orig_mode, regno);
4354 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4355 tree type, int named)
4357 enum machine_mode mode = omode;
4358 HOST_WIDE_INT bytes, words;
4360 if (mode == BLKmode)
4361 bytes = int_size_in_bytes (type);
4362 else
4363 bytes = GET_MODE_SIZE (mode);
4364 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4366 /* To simplify the code below, represent vector types with a vector mode
4367 even if MMX/SSE are not active. */
4368 if (type && TREE_CODE (type) == VECTOR_TYPE)
4369 mode = type_natural_mode (type);
4371 if (TARGET_64BIT_MS_ABI)
4372 return function_arg_ms_64 (cum, mode, omode, named);
4373 else if (TARGET_64BIT)
4374 return function_arg_64 (cum, mode, omode, type);
4375 else
4376 return function_arg_32 (cum, mode, omode, type, bytes, words);
4379 /* A C expression that indicates when an argument must be passed by
4380 reference. If nonzero for an argument, a copy of that argument is
4381 made in memory and a pointer to the argument is passed instead of
4382 the argument itself. The pointer is passed in whatever way is
4383 appropriate for passing a pointer to that type. */
4385 static bool
4386 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4387 enum machine_mode mode ATTRIBUTE_UNUSED,
4388 const_tree type, bool named ATTRIBUTE_UNUSED)
4390 if (TARGET_64BIT_MS_ABI)
4392 if (type)
4394 /* Arrays are passed by reference. */
4395 if (TREE_CODE (type) == ARRAY_TYPE)
4396 return true;
4398 if (AGGREGATE_TYPE_P (type))
4400 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4401 are passed by reference. */
4402 int el2 = exact_log2 (int_size_in_bytes (type));
4403 return !(el2 >= 0 && el2 <= 3);
4407 /* __m128 is passed by reference. */
4408 /* ??? How to handle complex? For now treat them as structs,
4409 and pass them by reference if they're too large. */
4410 if (GET_MODE_SIZE (mode) > 8)
4411 return true;
4413 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4414 return 1;
4416 return 0;
4419 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4420 ABI. Only called if TARGET_SSE. */
4421 static bool
4422 contains_128bit_aligned_vector_p (tree type)
4424 enum machine_mode mode = TYPE_MODE (type);
4425 if (SSE_REG_MODE_P (mode)
4426 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4427 return true;
4428 if (TYPE_ALIGN (type) < 128)
4429 return false;
4431 if (AGGREGATE_TYPE_P (type))
4433 /* Walk the aggregates recursively. */
4434 switch (TREE_CODE (type))
4436 case RECORD_TYPE:
4437 case UNION_TYPE:
4438 case QUAL_UNION_TYPE:
4440 tree field;
4442 /* Walk all the structure fields. */
4443 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4445 if (TREE_CODE (field) == FIELD_DECL
4446 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4447 return true;
4449 break;
4452 case ARRAY_TYPE:
4453 /* Just for use if some languages passes arrays by value. */
4454 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4455 return true;
4456 break;
4458 default:
4459 gcc_unreachable ();
4462 return false;
4465 /* Gives the alignment boundary, in bits, of an argument with the
4466 specified mode and type. */
4469 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4471 int align;
4472 if (type)
4473 align = TYPE_ALIGN (type);
4474 else
4475 align = GET_MODE_ALIGNMENT (mode);
4476 if (align < PARM_BOUNDARY)
4477 align = PARM_BOUNDARY;
4478 if (!TARGET_64BIT)
4480 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4481 make an exception for SSE modes since these require 128bit
4482 alignment.
4484 The handling here differs from field_alignment. ICC aligns MMX
4485 arguments to 4 byte boundaries, while structure fields are aligned
4486 to 8 byte boundaries. */
4487 if (!TARGET_SSE)
4488 align = PARM_BOUNDARY;
4489 else if (!type)
4491 if (!SSE_REG_MODE_P (mode))
4492 align = PARM_BOUNDARY;
4494 else
4496 if (!contains_128bit_aligned_vector_p (type))
4497 align = PARM_BOUNDARY;
4500 if (align > 128)
4501 align = 128;
4502 return align;
4505 /* Return true if N is a possible register number of function value. */
4507 bool
4508 ix86_function_value_regno_p (int regno)
4510 switch (regno)
4512 case 0:
4513 return true;
4515 case FIRST_FLOAT_REG:
4516 if (TARGET_64BIT_MS_ABI)
4517 return false;
4518 return TARGET_FLOAT_RETURNS_IN_80387;
4520 case FIRST_SSE_REG:
4521 return TARGET_SSE;
4523 case FIRST_MMX_REG:
4524 if (TARGET_MACHO || TARGET_64BIT)
4525 return false;
4526 return TARGET_MMX;
4529 return false;
4532 /* Define how to find the value returned by a function.
4533 VALTYPE is the data type of the value (as a tree).
4534 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4535 otherwise, FUNC is 0. */
4537 static rtx
4538 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4539 const_tree fntype, const_tree fn)
4541 unsigned int regno;
4543 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4544 we normally prevent this case when mmx is not available. However
4545 some ABIs may require the result to be returned like DImode. */
4546 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4547 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4549 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4550 we prevent this case when sse is not available. However some ABIs
4551 may require the result to be returned like integer TImode. */
4552 else if (mode == TImode
4553 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4554 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4556 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4557 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4558 regno = FIRST_FLOAT_REG;
4559 else
4560 /* Most things go in %eax. */
4561 regno = 0;
4563 /* Override FP return register with %xmm0 for local functions when
4564 SSE math is enabled or for functions with sseregparm attribute. */
4565 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4567 int sse_level = ix86_function_sseregparm (fntype, fn);
4568 if ((sse_level >= 1 && mode == SFmode)
4569 || (sse_level == 2 && mode == DFmode))
4570 regno = FIRST_SSE_REG;
4573 return gen_rtx_REG (orig_mode, regno);
4576 static rtx
4577 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4578 const_tree valtype)
4580 rtx ret;
4582 /* Handle libcalls, which don't provide a type node. */
4583 if (valtype == NULL)
4585 switch (mode)
4587 case SFmode:
4588 case SCmode:
4589 case DFmode:
4590 case DCmode:
4591 case TFmode:
4592 case SDmode:
4593 case DDmode:
4594 case TDmode:
4595 return gen_rtx_REG (mode, FIRST_SSE_REG);
4596 case XFmode:
4597 case XCmode:
4598 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4599 case TCmode:
4600 return NULL;
4601 default:
4602 return gen_rtx_REG (mode, 0);
4606 ret = construct_container (mode, orig_mode, valtype, 1,
4607 REGPARM_MAX, SSE_REGPARM_MAX,
4608 x86_64_int_return_registers, 0);
4610 /* For zero sized structures, construct_container returns NULL, but we
4611 need to keep rest of compiler happy by returning meaningful value. */
4612 if (!ret)
4613 ret = gen_rtx_REG (orig_mode, 0);
4615 return ret;
4618 static rtx
4619 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4621 unsigned int regno = 0;
4623 if (TARGET_SSE)
4625 if (mode == SFmode || mode == DFmode)
4626 regno = FIRST_SSE_REG;
4627 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4628 regno = FIRST_SSE_REG;
4631 return gen_rtx_REG (orig_mode, regno);
4634 static rtx
4635 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4636 enum machine_mode orig_mode, enum machine_mode mode)
4638 const_tree fn, fntype;
4640 fn = NULL_TREE;
4641 if (fntype_or_decl && DECL_P (fntype_or_decl))
4642 fn = fntype_or_decl;
4643 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4645 if (TARGET_64BIT_MS_ABI)
4646 return function_value_ms_64 (orig_mode, mode);
4647 else if (TARGET_64BIT)
4648 return function_value_64 (orig_mode, mode, valtype);
4649 else
4650 return function_value_32 (orig_mode, mode, fntype, fn);
4653 static rtx
4654 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4655 bool outgoing ATTRIBUTE_UNUSED)
4657 enum machine_mode mode, orig_mode;
4659 orig_mode = TYPE_MODE (valtype);
4660 mode = type_natural_mode (valtype);
4661 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4665 ix86_libcall_value (enum machine_mode mode)
4667 return ix86_function_value_1 (NULL, NULL, mode, mode);
4670 /* Return true iff type is returned in memory. */
4672 static int
4673 return_in_memory_32 (const_tree type, enum machine_mode mode)
4675 HOST_WIDE_INT size;
4677 if (mode == BLKmode)
4678 return 1;
4680 size = int_size_in_bytes (type);
4682 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4683 return 0;
4685 if (VECTOR_MODE_P (mode) || mode == TImode)
4687 /* User-created vectors small enough to fit in EAX. */
4688 if (size < 8)
4689 return 0;
4691 /* MMX/3dNow values are returned in MM0,
4692 except when it doesn't exits. */
4693 if (size == 8)
4694 return (TARGET_MMX ? 0 : 1);
4696 /* SSE values are returned in XMM0, except when it doesn't exist. */
4697 if (size == 16)
4698 return (TARGET_SSE ? 0 : 1);
4701 if (mode == XFmode)
4702 return 0;
4704 if (mode == TDmode)
4705 return 1;
4707 if (size > 12)
4708 return 1;
4709 return 0;
4712 static int
4713 return_in_memory_64 (const_tree type, enum machine_mode mode)
4715 int needed_intregs, needed_sseregs;
4716 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4719 static int
4720 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4722 HOST_WIDE_INT size = int_size_in_bytes (type);
4724 /* __m128 and friends are returned in xmm0. */
4725 if (!COMPLEX_MODE_P (mode) && size == 16 && VECTOR_MODE_P (mode))
4726 return 0;
4728 /* Otherwise, the size must be exactly in [1248]. But not for complex. */
4729 return (size != 1 && size != 2 && size != 4 && size != 8)
4730 || COMPLEX_MODE_P (mode);
4734 ix86_return_in_memory (const_tree type)
4736 const enum machine_mode mode = type_natural_mode (type);
4738 if (TARGET_64BIT_MS_ABI)
4739 return return_in_memory_ms_64 (type, mode);
4740 else if (TARGET_64BIT)
4741 return return_in_memory_64 (type, mode);
4742 else
4743 return return_in_memory_32 (type, mode);
4746 /* Return false iff TYPE is returned in memory. This version is used
4747 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4748 but differs notably in that when MMX is available, 8-byte vectors
4749 are returned in memory, rather than in MMX registers. */
4752 ix86_sol10_return_in_memory (const_tree type)
4754 int size;
4755 enum machine_mode mode = type_natural_mode (type);
4757 if (TARGET_64BIT)
4758 return return_in_memory_64 (type, mode);
4760 if (mode == BLKmode)
4761 return 1;
4763 size = int_size_in_bytes (type);
4765 if (VECTOR_MODE_P (mode))
4767 /* Return in memory only if MMX registers *are* available. This
4768 seems backwards, but it is consistent with the existing
4769 Solaris x86 ABI. */
4770 if (size == 8)
4771 return TARGET_MMX;
4772 if (size == 16)
4773 return !TARGET_SSE;
4775 else if (mode == TImode)
4776 return !TARGET_SSE;
4777 else if (mode == XFmode)
4778 return 0;
4780 return size > 12;
4783 /* When returning SSE vector types, we have a choice of either
4784 (1) being abi incompatible with a -march switch, or
4785 (2) generating an error.
4786 Given no good solution, I think the safest thing is one warning.
4787 The user won't be able to use -Werror, but....
4789 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4790 called in response to actually generating a caller or callee that
4791 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4792 via aggregate_value_p for general type probing from tree-ssa. */
4794 static rtx
4795 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4797 static bool warnedsse, warnedmmx;
4799 if (!TARGET_64BIT && type)
4801 /* Look at the return type of the function, not the function type. */
4802 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4804 if (!TARGET_SSE && !warnedsse)
4806 if (mode == TImode
4807 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4809 warnedsse = true;
4810 warning (0, "SSE vector return without SSE enabled "
4811 "changes the ABI");
4815 if (!TARGET_MMX && !warnedmmx)
4817 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4819 warnedmmx = true;
4820 warning (0, "MMX vector return without MMX enabled "
4821 "changes the ABI");
4826 return NULL;
4830 /* Create the va_list data type. */
4832 static tree
4833 ix86_build_builtin_va_list (void)
4835 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4837 /* For i386 we use plain pointer to argument area. */
4838 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4839 return build_pointer_type (char_type_node);
4841 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4842 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4844 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4845 unsigned_type_node);
4846 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4847 unsigned_type_node);
4848 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4849 ptr_type_node);
4850 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4851 ptr_type_node);
4853 va_list_gpr_counter_field = f_gpr;
4854 va_list_fpr_counter_field = f_fpr;
4856 DECL_FIELD_CONTEXT (f_gpr) = record;
4857 DECL_FIELD_CONTEXT (f_fpr) = record;
4858 DECL_FIELD_CONTEXT (f_ovf) = record;
4859 DECL_FIELD_CONTEXT (f_sav) = record;
4861 TREE_CHAIN (record) = type_decl;
4862 TYPE_NAME (record) = type_decl;
4863 TYPE_FIELDS (record) = f_gpr;
4864 TREE_CHAIN (f_gpr) = f_fpr;
4865 TREE_CHAIN (f_fpr) = f_ovf;
4866 TREE_CHAIN (f_ovf) = f_sav;
4868 layout_type (record);
4870 /* The correct type is an array type of one element. */
4871 return build_array_type (record, build_index_type (size_zero_node));
4874 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4876 static void
4877 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4879 rtx save_area, mem;
4880 rtx label;
4881 rtx label_ref;
4882 rtx tmp_reg;
4883 rtx nsse_reg;
4884 alias_set_type set;
4885 int i;
4887 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4888 return;
4890 /* Indicate to allocate space on the stack for varargs save area. */
4891 ix86_save_varrargs_registers = 1;
4892 /* We need 16-byte stack alignment to save SSE registers. If user
4893 asked for lower preferred_stack_boundary, lets just hope that he knows
4894 what he is doing and won't varargs SSE values.
4896 We also may end up assuming that only 64bit values are stored in SSE
4897 register let some floating point program work. */
4898 if (ix86_preferred_stack_boundary >= 128)
4899 cfun->stack_alignment_needed = 128;
4901 save_area = frame_pointer_rtx;
4902 set = get_varargs_alias_set ();
4904 for (i = cum->regno;
4905 i < ix86_regparm
4906 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4907 i++)
4909 mem = gen_rtx_MEM (Pmode,
4910 plus_constant (save_area, i * UNITS_PER_WORD));
4911 MEM_NOTRAP_P (mem) = 1;
4912 set_mem_alias_set (mem, set);
4913 emit_move_insn (mem, gen_rtx_REG (Pmode,
4914 x86_64_int_parameter_registers[i]));
4917 if (cum->sse_nregs && cfun->va_list_fpr_size)
4919 /* Now emit code to save SSE registers. The AX parameter contains number
4920 of SSE parameter registers used to call this function. We use
4921 sse_prologue_save insn template that produces computed jump across
4922 SSE saves. We need some preparation work to get this working. */
4924 label = gen_label_rtx ();
4925 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4927 /* Compute address to jump to :
4928 label - 5*eax + nnamed_sse_arguments*5 */
4929 tmp_reg = gen_reg_rtx (Pmode);
4930 nsse_reg = gen_reg_rtx (Pmode);
4931 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4932 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4933 gen_rtx_MULT (Pmode, nsse_reg,
4934 GEN_INT (4))));
4935 if (cum->sse_regno)
4936 emit_move_insn
4937 (nsse_reg,
4938 gen_rtx_CONST (DImode,
4939 gen_rtx_PLUS (DImode,
4940 label_ref,
4941 GEN_INT (cum->sse_regno * 4))));
4942 else
4943 emit_move_insn (nsse_reg, label_ref);
4944 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4946 /* Compute address of memory block we save into. We always use pointer
4947 pointing 127 bytes after first byte to store - this is needed to keep
4948 instruction size limited by 4 bytes. */
4949 tmp_reg = gen_reg_rtx (Pmode);
4950 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4951 plus_constant (save_area,
4952 8 * REGPARM_MAX + 127)));
4953 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4954 MEM_NOTRAP_P (mem) = 1;
4955 set_mem_alias_set (mem, set);
4956 set_mem_align (mem, BITS_PER_WORD);
4958 /* And finally do the dirty job! */
4959 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4960 GEN_INT (cum->sse_regno), label));
4964 static void
4965 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4967 alias_set_type set = get_varargs_alias_set ();
4968 int i;
4970 for (i = cum->regno; i < REGPARM_MAX; i++)
4972 rtx reg, mem;
4974 mem = gen_rtx_MEM (Pmode,
4975 plus_constant (virtual_incoming_args_rtx,
4976 i * UNITS_PER_WORD));
4977 MEM_NOTRAP_P (mem) = 1;
4978 set_mem_alias_set (mem, set);
4980 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4981 emit_move_insn (mem, reg);
4985 static void
4986 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4987 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4988 int no_rtl)
4990 CUMULATIVE_ARGS next_cum;
4991 tree fntype;
4993 /* This argument doesn't appear to be used anymore. Which is good,
4994 because the old code here didn't suppress rtl generation. */
4995 gcc_assert (!no_rtl);
4997 if (!TARGET_64BIT)
4998 return;
5000 fntype = TREE_TYPE (current_function_decl);
5002 /* For varargs, we do not want to skip the dummy va_dcl argument.
5003 For stdargs, we do want to skip the last named argument. */
5004 next_cum = *cum;
5005 if (stdarg_p (fntype))
5006 function_arg_advance (&next_cum, mode, type, 1);
5008 if (TARGET_64BIT_MS_ABI)
5009 setup_incoming_varargs_ms_64 (&next_cum);
5010 else
5011 setup_incoming_varargs_64 (&next_cum);
5014 /* Implement va_start. */
5016 void
5017 ix86_va_start (tree valist, rtx nextarg)
5019 HOST_WIDE_INT words, n_gpr, n_fpr;
5020 tree f_gpr, f_fpr, f_ovf, f_sav;
5021 tree gpr, fpr, ovf, sav, t;
5022 tree type;
5024 /* Only 64bit target needs something special. */
5025 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5027 std_expand_builtin_va_start (valist, nextarg);
5028 return;
5031 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5032 f_fpr = TREE_CHAIN (f_gpr);
5033 f_ovf = TREE_CHAIN (f_fpr);
5034 f_sav = TREE_CHAIN (f_ovf);
5036 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5037 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5038 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5039 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5040 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5042 /* Count number of gp and fp argument registers used. */
5043 words = current_function_args_info.words;
5044 n_gpr = current_function_args_info.regno;
5045 n_fpr = current_function_args_info.sse_regno;
5047 if (cfun->va_list_gpr_size)
5049 type = TREE_TYPE (gpr);
5050 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5051 build_int_cst (type, n_gpr * 8));
5052 TREE_SIDE_EFFECTS (t) = 1;
5053 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5056 if (cfun->va_list_fpr_size)
5058 type = TREE_TYPE (fpr);
5059 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5060 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5061 TREE_SIDE_EFFECTS (t) = 1;
5062 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5065 /* Find the overflow area. */
5066 type = TREE_TYPE (ovf);
5067 t = make_tree (type, virtual_incoming_args_rtx);
5068 if (words != 0)
5069 t = build2 (POINTER_PLUS_EXPR, type, t,
5070 size_int (words * UNITS_PER_WORD));
5071 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5072 TREE_SIDE_EFFECTS (t) = 1;
5073 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5075 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5077 /* Find the register save area.
5078 Prologue of the function save it right above stack frame. */
5079 type = TREE_TYPE (sav);
5080 t = make_tree (type, frame_pointer_rtx);
5081 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5082 TREE_SIDE_EFFECTS (t) = 1;
5083 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5087 /* Implement va_arg. */
5089 static tree
5090 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5092 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5093 tree f_gpr, f_fpr, f_ovf, f_sav;
5094 tree gpr, fpr, ovf, sav, t;
5095 int size, rsize;
5096 tree lab_false, lab_over = NULL_TREE;
5097 tree addr, t2;
5098 rtx container;
5099 int indirect_p = 0;
5100 tree ptrtype;
5101 enum machine_mode nat_mode;
5103 /* Only 64bit target needs something special. */
5104 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5105 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5107 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5108 f_fpr = TREE_CHAIN (f_gpr);
5109 f_ovf = TREE_CHAIN (f_fpr);
5110 f_sav = TREE_CHAIN (f_ovf);
5112 valist = build_va_arg_indirect_ref (valist);
5113 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5114 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5115 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5116 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5118 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5119 if (indirect_p)
5120 type = build_pointer_type (type);
5121 size = int_size_in_bytes (type);
5122 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5124 nat_mode = type_natural_mode (type);
5125 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5126 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5128 /* Pull the value out of the saved registers. */
5130 addr = create_tmp_var (ptr_type_node, "addr");
5131 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5133 if (container)
5135 int needed_intregs, needed_sseregs;
5136 bool need_temp;
5137 tree int_addr, sse_addr;
5139 lab_false = create_artificial_label ();
5140 lab_over = create_artificial_label ();
5142 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5144 need_temp = (!REG_P (container)
5145 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5146 || TYPE_ALIGN (type) > 128));
5148 /* In case we are passing structure, verify that it is consecutive block
5149 on the register save area. If not we need to do moves. */
5150 if (!need_temp && !REG_P (container))
5152 /* Verify that all registers are strictly consecutive */
5153 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5155 int i;
5157 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5159 rtx slot = XVECEXP (container, 0, i);
5160 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5161 || INTVAL (XEXP (slot, 1)) != i * 16)
5162 need_temp = 1;
5165 else
5167 int i;
5169 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5171 rtx slot = XVECEXP (container, 0, i);
5172 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5173 || INTVAL (XEXP (slot, 1)) != i * 8)
5174 need_temp = 1;
5178 if (!need_temp)
5180 int_addr = addr;
5181 sse_addr = addr;
5183 else
5185 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5186 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5187 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5188 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5191 /* First ensure that we fit completely in registers. */
5192 if (needed_intregs)
5194 t = build_int_cst (TREE_TYPE (gpr),
5195 (REGPARM_MAX - needed_intregs + 1) * 8);
5196 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5197 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5198 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5199 gimplify_and_add (t, pre_p);
5201 if (needed_sseregs)
5203 t = build_int_cst (TREE_TYPE (fpr),
5204 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5205 + REGPARM_MAX * 8);
5206 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5207 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5208 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5209 gimplify_and_add (t, pre_p);
5212 /* Compute index to start of area used for integer regs. */
5213 if (needed_intregs)
5215 /* int_addr = gpr + sav; */
5216 t = fold_convert (sizetype, gpr);
5217 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5218 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5219 gimplify_and_add (t, pre_p);
5221 if (needed_sseregs)
5223 /* sse_addr = fpr + sav; */
5224 t = fold_convert (sizetype, fpr);
5225 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5226 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5227 gimplify_and_add (t, pre_p);
5229 if (need_temp)
5231 int i;
5232 tree temp = create_tmp_var (type, "va_arg_tmp");
5234 /* addr = &temp; */
5235 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5236 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5237 gimplify_and_add (t, pre_p);
5239 for (i = 0; i < XVECLEN (container, 0); i++)
5241 rtx slot = XVECEXP (container, 0, i);
5242 rtx reg = XEXP (slot, 0);
5243 enum machine_mode mode = GET_MODE (reg);
5244 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5245 tree addr_type = build_pointer_type (piece_type);
5246 tree src_addr, src;
5247 int src_offset;
5248 tree dest_addr, dest;
5250 if (SSE_REGNO_P (REGNO (reg)))
5252 src_addr = sse_addr;
5253 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5255 else
5257 src_addr = int_addr;
5258 src_offset = REGNO (reg) * 8;
5260 src_addr = fold_convert (addr_type, src_addr);
5261 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5262 size_int (src_offset));
5263 src = build_va_arg_indirect_ref (src_addr);
5265 dest_addr = fold_convert (addr_type, addr);
5266 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5267 size_int (INTVAL (XEXP (slot, 1))));
5268 dest = build_va_arg_indirect_ref (dest_addr);
5270 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5271 gimplify_and_add (t, pre_p);
5275 if (needed_intregs)
5277 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5278 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5279 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5280 gimplify_and_add (t, pre_p);
5282 if (needed_sseregs)
5284 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5285 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5286 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5287 gimplify_and_add (t, pre_p);
5290 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5291 gimplify_and_add (t, pre_p);
5293 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5294 append_to_statement_list (t, pre_p);
5297 /* ... otherwise out of the overflow area. */
5299 /* Care for on-stack alignment if needed. */
5300 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5301 || integer_zerop (TYPE_SIZE (type)))
5302 t = ovf;
5303 else
5305 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5306 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5307 size_int (align - 1));
5308 t = fold_convert (sizetype, t);
5309 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5310 size_int (-align));
5311 t = fold_convert (TREE_TYPE (ovf), t);
5313 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5315 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5316 gimplify_and_add (t2, pre_p);
5318 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5319 size_int (rsize * UNITS_PER_WORD));
5320 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5321 gimplify_and_add (t, pre_p);
5323 if (container)
5325 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5326 append_to_statement_list (t, pre_p);
5329 ptrtype = build_pointer_type (type);
5330 addr = fold_convert (ptrtype, addr);
5332 if (indirect_p)
5333 addr = build_va_arg_indirect_ref (addr);
5334 return build_va_arg_indirect_ref (addr);
5337 /* Return nonzero if OPNUM's MEM should be matched
5338 in movabs* patterns. */
5341 ix86_check_movabs (rtx insn, int opnum)
5343 rtx set, mem;
5345 set = PATTERN (insn);
5346 if (GET_CODE (set) == PARALLEL)
5347 set = XVECEXP (set, 0, 0);
5348 gcc_assert (GET_CODE (set) == SET);
5349 mem = XEXP (set, opnum);
5350 while (GET_CODE (mem) == SUBREG)
5351 mem = SUBREG_REG (mem);
5352 gcc_assert (MEM_P (mem));
5353 return (volatile_ok || !MEM_VOLATILE_P (mem));
5356 /* Initialize the table of extra 80387 mathematical constants. */
5358 static void
5359 init_ext_80387_constants (void)
5361 static const char * cst[5] =
5363 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5364 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5365 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5366 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5367 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5369 int i;
5371 for (i = 0; i < 5; i++)
5373 real_from_string (&ext_80387_constants_table[i], cst[i]);
5374 /* Ensure each constant is rounded to XFmode precision. */
5375 real_convert (&ext_80387_constants_table[i],
5376 XFmode, &ext_80387_constants_table[i]);
5379 ext_80387_constants_init = 1;
5382 /* Return true if the constant is something that can be loaded with
5383 a special instruction. */
5386 standard_80387_constant_p (rtx x)
5388 enum machine_mode mode = GET_MODE (x);
5390 REAL_VALUE_TYPE r;
5392 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5393 return -1;
5395 if (x == CONST0_RTX (mode))
5396 return 1;
5397 if (x == CONST1_RTX (mode))
5398 return 2;
5400 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5402 /* For XFmode constants, try to find a special 80387 instruction when
5403 optimizing for size or on those CPUs that benefit from them. */
5404 if (mode == XFmode
5405 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5407 int i;
5409 if (! ext_80387_constants_init)
5410 init_ext_80387_constants ();
5412 for (i = 0; i < 5; i++)
5413 if (real_identical (&r, &ext_80387_constants_table[i]))
5414 return i + 3;
5417 /* Load of the constant -0.0 or -1.0 will be split as
5418 fldz;fchs or fld1;fchs sequence. */
5419 if (real_isnegzero (&r))
5420 return 8;
5421 if (real_identical (&r, &dconstm1))
5422 return 9;
5424 return 0;
5427 /* Return the opcode of the special instruction to be used to load
5428 the constant X. */
5430 const char *
5431 standard_80387_constant_opcode (rtx x)
5433 switch (standard_80387_constant_p (x))
5435 case 1:
5436 return "fldz";
5437 case 2:
5438 return "fld1";
5439 case 3:
5440 return "fldlg2";
5441 case 4:
5442 return "fldln2";
5443 case 5:
5444 return "fldl2e";
5445 case 6:
5446 return "fldl2t";
5447 case 7:
5448 return "fldpi";
5449 case 8:
5450 case 9:
5451 return "#";
5452 default:
5453 gcc_unreachable ();
5457 /* Return the CONST_DOUBLE representing the 80387 constant that is
5458 loaded by the specified special instruction. The argument IDX
5459 matches the return value from standard_80387_constant_p. */
5462 standard_80387_constant_rtx (int idx)
5464 int i;
5466 if (! ext_80387_constants_init)
5467 init_ext_80387_constants ();
5469 switch (idx)
5471 case 3:
5472 case 4:
5473 case 5:
5474 case 6:
5475 case 7:
5476 i = idx - 3;
5477 break;
5479 default:
5480 gcc_unreachable ();
5483 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5484 XFmode);
5487 /* Return 1 if mode is a valid mode for sse. */
5488 static int
5489 standard_sse_mode_p (enum machine_mode mode)
5491 switch (mode)
5493 case V16QImode:
5494 case V8HImode:
5495 case V4SImode:
5496 case V2DImode:
5497 case V4SFmode:
5498 case V2DFmode:
5499 return 1;
5501 default:
5502 return 0;
5506 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5509 standard_sse_constant_p (rtx x)
5511 enum machine_mode mode = GET_MODE (x);
5513 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5514 return 1;
5515 if (vector_all_ones_operand (x, mode)
5516 && standard_sse_mode_p (mode))
5517 return TARGET_SSE2 ? 2 : -1;
5519 return 0;
5522 /* Return the opcode of the special instruction to be used to load
5523 the constant X. */
5525 const char *
5526 standard_sse_constant_opcode (rtx insn, rtx x)
5528 switch (standard_sse_constant_p (x))
5530 case 1:
5531 if (get_attr_mode (insn) == MODE_V4SF)
5532 return "xorps\t%0, %0";
5533 else if (get_attr_mode (insn) == MODE_V2DF)
5534 return "xorpd\t%0, %0";
5535 else
5536 return "pxor\t%0, %0";
5537 case 2:
5538 return "pcmpeqd\t%0, %0";
5540 gcc_unreachable ();
5543 /* Returns 1 if OP contains a symbol reference */
5546 symbolic_reference_mentioned_p (rtx op)
5548 const char *fmt;
5549 int i;
5551 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5552 return 1;
5554 fmt = GET_RTX_FORMAT (GET_CODE (op));
5555 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5557 if (fmt[i] == 'E')
5559 int j;
5561 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5562 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5563 return 1;
5566 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5567 return 1;
5570 return 0;
5573 /* Return 1 if it is appropriate to emit `ret' instructions in the
5574 body of a function. Do this only if the epilogue is simple, needing a
5575 couple of insns. Prior to reloading, we can't tell how many registers
5576 must be saved, so return 0 then. Return 0 if there is no frame
5577 marker to de-allocate. */
5580 ix86_can_use_return_insn_p (void)
5582 struct ix86_frame frame;
5584 if (! reload_completed || frame_pointer_needed)
5585 return 0;
5587 /* Don't allow more than 32 pop, since that's all we can do
5588 with one instruction. */
5589 if (current_function_pops_args
5590 && current_function_args_size >= 32768)
5591 return 0;
5593 ix86_compute_frame_layout (&frame);
5594 return frame.to_allocate == 0 && frame.nregs == 0;
5597 /* Value should be nonzero if functions must have frame pointers.
5598 Zero means the frame pointer need not be set up (and parms may
5599 be accessed via the stack pointer) in functions that seem suitable. */
5602 ix86_frame_pointer_required (void)
5604 /* If we accessed previous frames, then the generated code expects
5605 to be able to access the saved ebp value in our frame. */
5606 if (cfun->machine->accesses_prev_frame)
5607 return 1;
5609 /* Several x86 os'es need a frame pointer for other reasons,
5610 usually pertaining to setjmp. */
5611 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5612 return 1;
5614 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5615 the frame pointer by default. Turn it back on now if we've not
5616 got a leaf function. */
5617 if (TARGET_OMIT_LEAF_FRAME_POINTER
5618 && (!current_function_is_leaf
5619 || ix86_current_function_calls_tls_descriptor))
5620 return 1;
5622 if (current_function_profile)
5623 return 1;
5625 return 0;
5628 /* Record that the current function accesses previous call frames. */
5630 void
5631 ix86_setup_frame_addresses (void)
5633 cfun->machine->accesses_prev_frame = 1;
5636 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5637 # define USE_HIDDEN_LINKONCE 1
5638 #else
5639 # define USE_HIDDEN_LINKONCE 0
5640 #endif
5642 static int pic_labels_used;
5644 /* Fills in the label name that should be used for a pc thunk for
5645 the given register. */
5647 static void
5648 get_pc_thunk_name (char name[32], unsigned int regno)
5650 gcc_assert (!TARGET_64BIT);
5652 if (USE_HIDDEN_LINKONCE)
5653 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5654 else
5655 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5659 /* This function generates code for -fpic that loads %ebx with
5660 the return address of the caller and then returns. */
5662 void
5663 ix86_file_end (void)
5665 rtx xops[2];
5666 int regno;
5668 for (regno = 0; regno < 8; ++regno)
5670 char name[32];
5672 if (! ((pic_labels_used >> regno) & 1))
5673 continue;
5675 get_pc_thunk_name (name, regno);
5677 #if TARGET_MACHO
5678 if (TARGET_MACHO)
5680 switch_to_section (darwin_sections[text_coal_section]);
5681 fputs ("\t.weak_definition\t", asm_out_file);
5682 assemble_name (asm_out_file, name);
5683 fputs ("\n\t.private_extern\t", asm_out_file);
5684 assemble_name (asm_out_file, name);
5685 fputs ("\n", asm_out_file);
5686 ASM_OUTPUT_LABEL (asm_out_file, name);
5688 else
5689 #endif
5690 if (USE_HIDDEN_LINKONCE)
5692 tree decl;
5694 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5695 error_mark_node);
5696 TREE_PUBLIC (decl) = 1;
5697 TREE_STATIC (decl) = 1;
5698 DECL_ONE_ONLY (decl) = 1;
5700 (*targetm.asm_out.unique_section) (decl, 0);
5701 switch_to_section (get_named_section (decl, NULL, 0));
5703 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5704 fputs ("\t.hidden\t", asm_out_file);
5705 assemble_name (asm_out_file, name);
5706 fputc ('\n', asm_out_file);
5707 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5709 else
5711 switch_to_section (text_section);
5712 ASM_OUTPUT_LABEL (asm_out_file, name);
5715 xops[0] = gen_rtx_REG (SImode, regno);
5716 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5717 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5718 output_asm_insn ("ret", xops);
5721 if (NEED_INDICATE_EXEC_STACK)
5722 file_end_indicate_exec_stack ();
5725 /* Emit code for the SET_GOT patterns. */
5727 const char *
5728 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5730 rtx xops[3];
5732 xops[0] = dest;
5734 if (TARGET_VXWORKS_RTP && flag_pic)
5736 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5737 xops[2] = gen_rtx_MEM (Pmode,
5738 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5739 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5741 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5742 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5743 an unadorned address. */
5744 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5745 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5746 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5747 return "";
5750 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5752 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5754 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5756 if (!flag_pic)
5757 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5758 else
5759 output_asm_insn ("call\t%a2", xops);
5761 #if TARGET_MACHO
5762 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5763 is what will be referenced by the Mach-O PIC subsystem. */
5764 if (!label)
5765 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5766 #endif
5768 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5769 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5771 if (flag_pic)
5772 output_asm_insn ("pop{l}\t%0", xops);
5774 else
5776 char name[32];
5777 get_pc_thunk_name (name, REGNO (dest));
5778 pic_labels_used |= 1 << REGNO (dest);
5780 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5781 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5782 output_asm_insn ("call\t%X2", xops);
5783 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5784 is what will be referenced by the Mach-O PIC subsystem. */
5785 #if TARGET_MACHO
5786 if (!label)
5787 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5788 else
5789 targetm.asm_out.internal_label (asm_out_file, "L",
5790 CODE_LABEL_NUMBER (label));
5791 #endif
5794 if (TARGET_MACHO)
5795 return "";
5797 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5798 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5799 else
5800 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5802 return "";
5805 /* Generate an "push" pattern for input ARG. */
5807 static rtx
5808 gen_push (rtx arg)
5810 return gen_rtx_SET (VOIDmode,
5811 gen_rtx_MEM (Pmode,
5812 gen_rtx_PRE_DEC (Pmode,
5813 stack_pointer_rtx)),
5814 arg);
5817 /* Return >= 0 if there is an unused call-clobbered register available
5818 for the entire function. */
5820 static unsigned int
5821 ix86_select_alt_pic_regnum (void)
5823 if (current_function_is_leaf && !current_function_profile
5824 && !ix86_current_function_calls_tls_descriptor)
5826 int i;
5827 for (i = 2; i >= 0; --i)
5828 if (!df_regs_ever_live_p (i))
5829 return i;
5832 return INVALID_REGNUM;
5835 /* Return 1 if we need to save REGNO. */
5836 static int
5837 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5839 if (pic_offset_table_rtx
5840 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5841 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5842 || current_function_profile
5843 || current_function_calls_eh_return
5844 || current_function_uses_const_pool))
5846 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5847 return 0;
5848 return 1;
5851 if (current_function_calls_eh_return && maybe_eh_return)
5853 unsigned i;
5854 for (i = 0; ; i++)
5856 unsigned test = EH_RETURN_DATA_REGNO (i);
5857 if (test == INVALID_REGNUM)
5858 break;
5859 if (test == regno)
5860 return 1;
5864 if (cfun->machine->force_align_arg_pointer
5865 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5866 return 1;
5868 return (df_regs_ever_live_p (regno)
5869 && !call_used_regs[regno]
5870 && !fixed_regs[regno]
5871 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5874 /* Return number of registers to be saved on the stack. */
5876 static int
5877 ix86_nsaved_regs (void)
5879 int nregs = 0;
5880 int regno;
5882 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5883 if (ix86_save_reg (regno, true))
5884 nregs++;
5885 return nregs;
5888 /* Return the offset between two registers, one to be eliminated, and the other
5889 its replacement, at the start of a routine. */
5891 HOST_WIDE_INT
5892 ix86_initial_elimination_offset (int from, int to)
5894 struct ix86_frame frame;
5895 ix86_compute_frame_layout (&frame);
5897 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5898 return frame.hard_frame_pointer_offset;
5899 else if (from == FRAME_POINTER_REGNUM
5900 && to == HARD_FRAME_POINTER_REGNUM)
5901 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5902 else
5904 gcc_assert (to == STACK_POINTER_REGNUM);
5906 if (from == ARG_POINTER_REGNUM)
5907 return frame.stack_pointer_offset;
5909 gcc_assert (from == FRAME_POINTER_REGNUM);
5910 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5914 /* Fill structure ix86_frame about frame of currently computed function. */
5916 static void
5917 ix86_compute_frame_layout (struct ix86_frame *frame)
5919 HOST_WIDE_INT total_size;
5920 unsigned int stack_alignment_needed;
5921 HOST_WIDE_INT offset;
5922 unsigned int preferred_alignment;
5923 HOST_WIDE_INT size = get_frame_size ();
5925 frame->nregs = ix86_nsaved_regs ();
5926 total_size = size;
5928 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5929 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5931 /* During reload iteration the amount of registers saved can change.
5932 Recompute the value as needed. Do not recompute when amount of registers
5933 didn't change as reload does multiple calls to the function and does not
5934 expect the decision to change within single iteration. */
5935 if (!optimize_size
5936 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5938 int count = frame->nregs;
5940 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5941 /* The fast prologue uses move instead of push to save registers. This
5942 is significantly longer, but also executes faster as modern hardware
5943 can execute the moves in parallel, but can't do that for push/pop.
5945 Be careful about choosing what prologue to emit: When function takes
5946 many instructions to execute we may use slow version as well as in
5947 case function is known to be outside hot spot (this is known with
5948 feedback only). Weight the size of function by number of registers
5949 to save as it is cheap to use one or two push instructions but very
5950 slow to use many of them. */
5951 if (count)
5952 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5953 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5954 || (flag_branch_probabilities
5955 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5956 cfun->machine->use_fast_prologue_epilogue = false;
5957 else
5958 cfun->machine->use_fast_prologue_epilogue
5959 = !expensive_function_p (count);
5961 if (TARGET_PROLOGUE_USING_MOVE
5962 && cfun->machine->use_fast_prologue_epilogue)
5963 frame->save_regs_using_mov = true;
5964 else
5965 frame->save_regs_using_mov = false;
5968 /* Skip return address and saved base pointer. */
5969 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5971 frame->hard_frame_pointer_offset = offset;
5973 /* Do some sanity checking of stack_alignment_needed and
5974 preferred_alignment, since i386 port is the only using those features
5975 that may break easily. */
5977 gcc_assert (!size || stack_alignment_needed);
5978 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5979 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5980 gcc_assert (stack_alignment_needed
5981 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5983 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5984 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5986 /* Register save area */
5987 offset += frame->nregs * UNITS_PER_WORD;
5989 /* Va-arg area */
5990 if (ix86_save_varrargs_registers)
5992 offset += X86_64_VARARGS_SIZE;
5993 frame->va_arg_size = X86_64_VARARGS_SIZE;
5995 else
5996 frame->va_arg_size = 0;
5998 /* Align start of frame for local function. */
5999 frame->padding1 = ((offset + stack_alignment_needed - 1)
6000 & -stack_alignment_needed) - offset;
6002 offset += frame->padding1;
6004 /* Frame pointer points here. */
6005 frame->frame_pointer_offset = offset;
6007 offset += size;
6009 /* Add outgoing arguments area. Can be skipped if we eliminated
6010 all the function calls as dead code.
6011 Skipping is however impossible when function calls alloca. Alloca
6012 expander assumes that last current_function_outgoing_args_size
6013 of stack frame are unused. */
6014 if (ACCUMULATE_OUTGOING_ARGS
6015 && (!current_function_is_leaf || current_function_calls_alloca
6016 || ix86_current_function_calls_tls_descriptor))
6018 offset += current_function_outgoing_args_size;
6019 frame->outgoing_arguments_size = current_function_outgoing_args_size;
6021 else
6022 frame->outgoing_arguments_size = 0;
6024 /* Align stack boundary. Only needed if we're calling another function
6025 or using alloca. */
6026 if (!current_function_is_leaf || current_function_calls_alloca
6027 || ix86_current_function_calls_tls_descriptor)
6028 frame->padding2 = ((offset + preferred_alignment - 1)
6029 & -preferred_alignment) - offset;
6030 else
6031 frame->padding2 = 0;
6033 offset += frame->padding2;
6035 /* We've reached end of stack frame. */
6036 frame->stack_pointer_offset = offset;
6038 /* Size prologue needs to allocate. */
6039 frame->to_allocate =
6040 (size + frame->padding1 + frame->padding2
6041 + frame->outgoing_arguments_size + frame->va_arg_size);
6043 if ((!frame->to_allocate && frame->nregs <= 1)
6044 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6045 frame->save_regs_using_mov = false;
6047 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6048 && current_function_is_leaf
6049 && !ix86_current_function_calls_tls_descriptor)
6051 frame->red_zone_size = frame->to_allocate;
6052 if (frame->save_regs_using_mov)
6053 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6054 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6055 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6057 else
6058 frame->red_zone_size = 0;
6059 frame->to_allocate -= frame->red_zone_size;
6060 frame->stack_pointer_offset -= frame->red_zone_size;
6061 #if 0
6062 fprintf (stderr, "\n");
6063 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6064 fprintf (stderr, "size: %ld\n", (long)size);
6065 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6066 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6067 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6068 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6069 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6070 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6071 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6072 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6073 (long)frame->hard_frame_pointer_offset);
6074 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6075 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6076 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6077 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6078 #endif
6081 /* Emit code to save registers in the prologue. */
6083 static void
6084 ix86_emit_save_regs (void)
6086 unsigned int regno;
6087 rtx insn;
6089 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6090 if (ix86_save_reg (regno, true))
6092 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6093 RTX_FRAME_RELATED_P (insn) = 1;
6097 /* Emit code to save registers using MOV insns. First register
6098 is restored from POINTER + OFFSET. */
6099 static void
6100 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6102 unsigned int regno;
6103 rtx insn;
6105 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6106 if (ix86_save_reg (regno, true))
6108 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6109 Pmode, offset),
6110 gen_rtx_REG (Pmode, regno));
6111 RTX_FRAME_RELATED_P (insn) = 1;
6112 offset += UNITS_PER_WORD;
6116 /* Expand prologue or epilogue stack adjustment.
6117 The pattern exist to put a dependency on all ebp-based memory accesses.
6118 STYLE should be negative if instructions should be marked as frame related,
6119 zero if %r11 register is live and cannot be freely used and positive
6120 otherwise. */
6122 static void
6123 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6125 rtx insn;
6127 if (! TARGET_64BIT)
6128 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6129 else if (x86_64_immediate_operand (offset, DImode))
6130 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6131 else
6133 rtx r11;
6134 /* r11 is used by indirect sibcall return as well, set before the
6135 epilogue and used after the epilogue. ATM indirect sibcall
6136 shouldn't be used together with huge frame sizes in one
6137 function because of the frame_size check in sibcall.c. */
6138 gcc_assert (style);
6139 r11 = gen_rtx_REG (DImode, R11_REG);
6140 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6141 if (style < 0)
6142 RTX_FRAME_RELATED_P (insn) = 1;
6143 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6144 offset));
6146 if (style < 0)
6147 RTX_FRAME_RELATED_P (insn) = 1;
6150 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6152 static rtx
6153 ix86_internal_arg_pointer (void)
6155 bool has_force_align_arg_pointer =
6156 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6157 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6158 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6159 && DECL_NAME (current_function_decl)
6160 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6161 && DECL_FILE_SCOPE_P (current_function_decl))
6162 || ix86_force_align_arg_pointer
6163 || has_force_align_arg_pointer)
6165 /* Nested functions can't realign the stack due to a register
6166 conflict. */
6167 if (DECL_CONTEXT (current_function_decl)
6168 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6170 if (ix86_force_align_arg_pointer)
6171 warning (0, "-mstackrealign ignored for nested functions");
6172 if (has_force_align_arg_pointer)
6173 error ("%s not supported for nested functions",
6174 ix86_force_align_arg_pointer_string);
6175 return virtual_incoming_args_rtx;
6177 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
6178 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6180 else
6181 return virtual_incoming_args_rtx;
6184 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6185 This is called from dwarf2out.c to emit call frame instructions
6186 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6187 static void
6188 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6190 rtx unspec = SET_SRC (pattern);
6191 gcc_assert (GET_CODE (unspec) == UNSPEC);
6193 switch (index)
6195 case UNSPEC_REG_SAVE:
6196 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6197 SET_DEST (pattern));
6198 break;
6199 case UNSPEC_DEF_CFA:
6200 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6201 INTVAL (XVECEXP (unspec, 0, 0)));
6202 break;
6203 default:
6204 gcc_unreachable ();
6208 /* Expand the prologue into a bunch of separate insns. */
6210 void
6211 ix86_expand_prologue (void)
6213 rtx insn;
6214 bool pic_reg_used;
6215 struct ix86_frame frame;
6216 HOST_WIDE_INT allocate;
6218 ix86_compute_frame_layout (&frame);
6220 if (cfun->machine->force_align_arg_pointer)
6222 rtx x, y;
6224 /* Grab the argument pointer. */
6225 x = plus_constant (stack_pointer_rtx, 4);
6226 y = cfun->machine->force_align_arg_pointer;
6227 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6228 RTX_FRAME_RELATED_P (insn) = 1;
6230 /* The unwind info consists of two parts: install the fafp as the cfa,
6231 and record the fafp as the "save register" of the stack pointer.
6232 The later is there in order that the unwinder can see where it
6233 should restore the stack pointer across the and insn. */
6234 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6235 x = gen_rtx_SET (VOIDmode, y, x);
6236 RTX_FRAME_RELATED_P (x) = 1;
6237 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6238 UNSPEC_REG_SAVE);
6239 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6240 RTX_FRAME_RELATED_P (y) = 1;
6241 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6242 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6243 REG_NOTES (insn) = x;
6245 /* Align the stack. */
6246 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6247 GEN_INT (-16)));
6249 /* And here we cheat like madmen with the unwind info. We force the
6250 cfa register back to sp+4, which is exactly what it was at the
6251 start of the function. Re-pushing the return address results in
6252 the return at the same spot relative to the cfa, and thus is
6253 correct wrt the unwind info. */
6254 x = cfun->machine->force_align_arg_pointer;
6255 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6256 insn = emit_insn (gen_push (x));
6257 RTX_FRAME_RELATED_P (insn) = 1;
6259 x = GEN_INT (4);
6260 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6261 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6262 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6263 REG_NOTES (insn) = x;
6266 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6267 slower on all targets. Also sdb doesn't like it. */
6269 if (frame_pointer_needed)
6271 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6272 RTX_FRAME_RELATED_P (insn) = 1;
6274 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6275 RTX_FRAME_RELATED_P (insn) = 1;
6278 allocate = frame.to_allocate;
6280 if (!frame.save_regs_using_mov)
6281 ix86_emit_save_regs ();
6282 else
6283 allocate += frame.nregs * UNITS_PER_WORD;
6285 /* When using red zone we may start register saving before allocating
6286 the stack frame saving one cycle of the prologue. */
6287 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6288 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6289 : stack_pointer_rtx,
6290 -frame.nregs * UNITS_PER_WORD);
6292 if (allocate == 0)
6294 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6295 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6296 GEN_INT (-allocate), -1);
6297 else
6299 /* Only valid for Win32. */
6300 rtx eax = gen_rtx_REG (Pmode, 0);
6301 bool eax_live;
6302 rtx t;
6304 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6306 if (TARGET_64BIT_MS_ABI)
6307 eax_live = false;
6308 else
6309 eax_live = ix86_eax_live_at_start_p ();
6311 if (eax_live)
6313 emit_insn (gen_push (eax));
6314 allocate -= UNITS_PER_WORD;
6317 emit_move_insn (eax, GEN_INT (allocate));
6319 if (TARGET_64BIT)
6320 insn = gen_allocate_stack_worker_64 (eax);
6321 else
6322 insn = gen_allocate_stack_worker_32 (eax);
6323 insn = emit_insn (insn);
6324 RTX_FRAME_RELATED_P (insn) = 1;
6325 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6326 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6327 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6328 t, REG_NOTES (insn));
6330 if (eax_live)
6332 if (frame_pointer_needed)
6333 t = plus_constant (hard_frame_pointer_rtx,
6334 allocate
6335 - frame.to_allocate
6336 - frame.nregs * UNITS_PER_WORD);
6337 else
6338 t = plus_constant (stack_pointer_rtx, allocate);
6339 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6343 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6345 if (!frame_pointer_needed || !frame.to_allocate)
6346 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6347 else
6348 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6349 -frame.nregs * UNITS_PER_WORD);
6352 pic_reg_used = false;
6353 if (pic_offset_table_rtx
6354 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6355 || current_function_profile))
6357 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6359 if (alt_pic_reg_used != INVALID_REGNUM)
6360 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6362 pic_reg_used = true;
6365 if (pic_reg_used)
6367 if (TARGET_64BIT)
6369 if (ix86_cmodel == CM_LARGE_PIC)
6371 rtx tmp_reg = gen_rtx_REG (DImode,
6372 FIRST_REX_INT_REG + 3 /* R11 */);
6373 rtx label = gen_label_rtx ();
6374 emit_label (label);
6375 LABEL_PRESERVE_P (label) = 1;
6376 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6377 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6378 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6379 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6380 pic_offset_table_rtx, tmp_reg));
6382 else
6383 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6385 else
6386 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6389 /* Prevent function calls from being scheduled before the call to mcount.
6390 In the pic_reg_used case, make sure that the got load isn't deleted. */
6391 if (current_function_profile)
6393 if (pic_reg_used)
6394 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6395 emit_insn (gen_blockage ());
6399 /* Emit code to restore saved registers using MOV insns. First register
6400 is restored from POINTER + OFFSET. */
6401 static void
6402 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6403 int maybe_eh_return)
6405 int regno;
6406 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6408 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6409 if (ix86_save_reg (regno, maybe_eh_return))
6411 /* Ensure that adjust_address won't be forced to produce pointer
6412 out of range allowed by x86-64 instruction set. */
6413 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6415 rtx r11;
6417 r11 = gen_rtx_REG (DImode, R11_REG);
6418 emit_move_insn (r11, GEN_INT (offset));
6419 emit_insn (gen_adddi3 (r11, r11, pointer));
6420 base_address = gen_rtx_MEM (Pmode, r11);
6421 offset = 0;
6423 emit_move_insn (gen_rtx_REG (Pmode, regno),
6424 adjust_address (base_address, Pmode, offset));
6425 offset += UNITS_PER_WORD;
6429 /* Restore function stack, frame, and registers. */
6431 void
6432 ix86_expand_epilogue (int style)
6434 int regno;
6435 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6436 struct ix86_frame frame;
6437 HOST_WIDE_INT offset;
6439 ix86_compute_frame_layout (&frame);
6441 /* Calculate start of saved registers relative to ebp. Special care
6442 must be taken for the normal return case of a function using
6443 eh_return: the eax and edx registers are marked as saved, but not
6444 restored along this path. */
6445 offset = frame.nregs;
6446 if (current_function_calls_eh_return && style != 2)
6447 offset -= 2;
6448 offset *= -UNITS_PER_WORD;
6450 /* If we're only restoring one register and sp is not valid then
6451 using a move instruction to restore the register since it's
6452 less work than reloading sp and popping the register.
6454 The default code result in stack adjustment using add/lea instruction,
6455 while this code results in LEAVE instruction (or discrete equivalent),
6456 so it is profitable in some other cases as well. Especially when there
6457 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6458 and there is exactly one register to pop. This heuristic may need some
6459 tuning in future. */
6460 if ((!sp_valid && frame.nregs <= 1)
6461 || (TARGET_EPILOGUE_USING_MOVE
6462 && cfun->machine->use_fast_prologue_epilogue
6463 && (frame.nregs > 1 || frame.to_allocate))
6464 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6465 || (frame_pointer_needed && TARGET_USE_LEAVE
6466 && cfun->machine->use_fast_prologue_epilogue
6467 && frame.nregs == 1)
6468 || current_function_calls_eh_return)
6470 /* Restore registers. We can use ebp or esp to address the memory
6471 locations. If both are available, default to ebp, since offsets
6472 are known to be small. Only exception is esp pointing directly to the
6473 end of block of saved registers, where we may simplify addressing
6474 mode. */
6476 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6477 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6478 frame.to_allocate, style == 2);
6479 else
6480 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6481 offset, style == 2);
6483 /* eh_return epilogues need %ecx added to the stack pointer. */
6484 if (style == 2)
6486 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6488 if (frame_pointer_needed)
6490 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6491 tmp = plus_constant (tmp, UNITS_PER_WORD);
6492 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6494 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6495 emit_move_insn (hard_frame_pointer_rtx, tmp);
6497 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6498 const0_rtx, style);
6500 else
6502 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6503 tmp = plus_constant (tmp, (frame.to_allocate
6504 + frame.nregs * UNITS_PER_WORD));
6505 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6508 else if (!frame_pointer_needed)
6509 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6510 GEN_INT (frame.to_allocate
6511 + frame.nregs * UNITS_PER_WORD),
6512 style);
6513 /* If not an i386, mov & pop is faster than "leave". */
6514 else if (TARGET_USE_LEAVE || optimize_size
6515 || !cfun->machine->use_fast_prologue_epilogue)
6516 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6517 else
6519 pro_epilogue_adjust_stack (stack_pointer_rtx,
6520 hard_frame_pointer_rtx,
6521 const0_rtx, style);
6522 if (TARGET_64BIT)
6523 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6524 else
6525 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6528 else
6530 /* First step is to deallocate the stack frame so that we can
6531 pop the registers. */
6532 if (!sp_valid)
6534 gcc_assert (frame_pointer_needed);
6535 pro_epilogue_adjust_stack (stack_pointer_rtx,
6536 hard_frame_pointer_rtx,
6537 GEN_INT (offset), style);
6539 else if (frame.to_allocate)
6540 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6541 GEN_INT (frame.to_allocate), style);
6543 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6544 if (ix86_save_reg (regno, false))
6546 if (TARGET_64BIT)
6547 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6548 else
6549 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6551 if (frame_pointer_needed)
6553 /* Leave results in shorter dependency chains on CPUs that are
6554 able to grok it fast. */
6555 if (TARGET_USE_LEAVE)
6556 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6557 else if (TARGET_64BIT)
6558 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6559 else
6560 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6564 if (cfun->machine->force_align_arg_pointer)
6566 emit_insn (gen_addsi3 (stack_pointer_rtx,
6567 cfun->machine->force_align_arg_pointer,
6568 GEN_INT (-4)));
6571 /* Sibcall epilogues don't want a return instruction. */
6572 if (style == 0)
6573 return;
6575 if (current_function_pops_args && current_function_args_size)
6577 rtx popc = GEN_INT (current_function_pops_args);
6579 /* i386 can only pop 64K bytes. If asked to pop more, pop
6580 return address, do explicit add, and jump indirectly to the
6581 caller. */
6583 if (current_function_pops_args >= 65536)
6585 rtx ecx = gen_rtx_REG (SImode, 2);
6587 /* There is no "pascal" calling convention in any 64bit ABI. */
6588 gcc_assert (!TARGET_64BIT);
6590 emit_insn (gen_popsi1 (ecx));
6591 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6592 emit_jump_insn (gen_return_indirect_internal (ecx));
6594 else
6595 emit_jump_insn (gen_return_pop_internal (popc));
6597 else
6598 emit_jump_insn (gen_return_internal ());
6601 /* Reset from the function's potential modifications. */
6603 static void
6604 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6605 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6607 if (pic_offset_table_rtx)
6608 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6609 #if TARGET_MACHO
6610 /* Mach-O doesn't support labels at the end of objects, so if
6611 it looks like we might want one, insert a NOP. */
6613 rtx insn = get_last_insn ();
6614 while (insn
6615 && NOTE_P (insn)
6616 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6617 insn = PREV_INSN (insn);
6618 if (insn
6619 && (LABEL_P (insn)
6620 || (NOTE_P (insn)
6621 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6622 fputs ("\tnop\n", file);
6624 #endif
6628 /* Extract the parts of an RTL expression that is a valid memory address
6629 for an instruction. Return 0 if the structure of the address is
6630 grossly off. Return -1 if the address contains ASHIFT, so it is not
6631 strictly valid, but still used for computing length of lea instruction. */
6634 ix86_decompose_address (rtx addr, struct ix86_address *out)
6636 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6637 rtx base_reg, index_reg;
6638 HOST_WIDE_INT scale = 1;
6639 rtx scale_rtx = NULL_RTX;
6640 int retval = 1;
6641 enum ix86_address_seg seg = SEG_DEFAULT;
6643 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6644 base = addr;
6645 else if (GET_CODE (addr) == PLUS)
6647 rtx addends[4], op;
6648 int n = 0, i;
6650 op = addr;
6653 if (n >= 4)
6654 return 0;
6655 addends[n++] = XEXP (op, 1);
6656 op = XEXP (op, 0);
6658 while (GET_CODE (op) == PLUS);
6659 if (n >= 4)
6660 return 0;
6661 addends[n] = op;
6663 for (i = n; i >= 0; --i)
6665 op = addends[i];
6666 switch (GET_CODE (op))
6668 case MULT:
6669 if (index)
6670 return 0;
6671 index = XEXP (op, 0);
6672 scale_rtx = XEXP (op, 1);
6673 break;
6675 case UNSPEC:
6676 if (XINT (op, 1) == UNSPEC_TP
6677 && TARGET_TLS_DIRECT_SEG_REFS
6678 && seg == SEG_DEFAULT)
6679 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6680 else
6681 return 0;
6682 break;
6684 case REG:
6685 case SUBREG:
6686 if (!base)
6687 base = op;
6688 else if (!index)
6689 index = op;
6690 else
6691 return 0;
6692 break;
6694 case CONST:
6695 case CONST_INT:
6696 case SYMBOL_REF:
6697 case LABEL_REF:
6698 if (disp)
6699 return 0;
6700 disp = op;
6701 break;
6703 default:
6704 return 0;
6708 else if (GET_CODE (addr) == MULT)
6710 index = XEXP (addr, 0); /* index*scale */
6711 scale_rtx = XEXP (addr, 1);
6713 else if (GET_CODE (addr) == ASHIFT)
6715 rtx tmp;
6717 /* We're called for lea too, which implements ashift on occasion. */
6718 index = XEXP (addr, 0);
6719 tmp = XEXP (addr, 1);
6720 if (!CONST_INT_P (tmp))
6721 return 0;
6722 scale = INTVAL (tmp);
6723 if ((unsigned HOST_WIDE_INT) scale > 3)
6724 return 0;
6725 scale = 1 << scale;
6726 retval = -1;
6728 else
6729 disp = addr; /* displacement */
6731 /* Extract the integral value of scale. */
6732 if (scale_rtx)
6734 if (!CONST_INT_P (scale_rtx))
6735 return 0;
6736 scale = INTVAL (scale_rtx);
6739 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6740 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6742 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6743 if (base_reg && index_reg && scale == 1
6744 && (index_reg == arg_pointer_rtx
6745 || index_reg == frame_pointer_rtx
6746 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6748 rtx tmp;
6749 tmp = base, base = index, index = tmp;
6750 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6753 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6754 if ((base_reg == hard_frame_pointer_rtx
6755 || base_reg == frame_pointer_rtx
6756 || base_reg == arg_pointer_rtx) && !disp)
6757 disp = const0_rtx;
6759 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6760 Avoid this by transforming to [%esi+0]. */
6761 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6762 && base_reg && !index_reg && !disp
6763 && REG_P (base_reg)
6764 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6765 disp = const0_rtx;
6767 /* Special case: encode reg+reg instead of reg*2. */
6768 if (!base && index && scale && scale == 2)
6769 base = index, base_reg = index_reg, scale = 1;
6771 /* Special case: scaling cannot be encoded without base or displacement. */
6772 if (!base && !disp && index && scale != 1)
6773 disp = const0_rtx;
6775 out->base = base;
6776 out->index = index;
6777 out->disp = disp;
6778 out->scale = scale;
6779 out->seg = seg;
6781 return retval;
6784 /* Return cost of the memory address x.
6785 For i386, it is better to use a complex address than let gcc copy
6786 the address into a reg and make a new pseudo. But not if the address
6787 requires to two regs - that would mean more pseudos with longer
6788 lifetimes. */
6789 static int
6790 ix86_address_cost (rtx x)
6792 struct ix86_address parts;
6793 int cost = 1;
6794 int ok = ix86_decompose_address (x, &parts);
6796 gcc_assert (ok);
6798 if (parts.base && GET_CODE (parts.base) == SUBREG)
6799 parts.base = SUBREG_REG (parts.base);
6800 if (parts.index && GET_CODE (parts.index) == SUBREG)
6801 parts.index = SUBREG_REG (parts.index);
6803 /* Attempt to minimize number of registers in the address. */
6804 if ((parts.base
6805 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6806 || (parts.index
6807 && (!REG_P (parts.index)
6808 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6809 cost++;
6811 if (parts.base
6812 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6813 && parts.index
6814 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6815 && parts.base != parts.index)
6816 cost++;
6818 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6819 since it's predecode logic can't detect the length of instructions
6820 and it degenerates to vector decoded. Increase cost of such
6821 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6822 to split such addresses or even refuse such addresses at all.
6824 Following addressing modes are affected:
6825 [base+scale*index]
6826 [scale*index+disp]
6827 [base+index]
6829 The first and last case may be avoidable by explicitly coding the zero in
6830 memory address, but I don't have AMD-K6 machine handy to check this
6831 theory. */
6833 if (TARGET_K6
6834 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6835 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6836 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6837 cost += 10;
6839 return cost;
6842 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6843 this is used for to form addresses to local data when -fPIC is in
6844 use. */
6846 static bool
6847 darwin_local_data_pic (rtx disp)
6849 if (GET_CODE (disp) == MINUS)
6851 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6852 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6853 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6855 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6856 if (! strcmp (sym_name, "<pic base>"))
6857 return true;
6861 return false;
6864 /* Determine if a given RTX is a valid constant. We already know this
6865 satisfies CONSTANT_P. */
6867 bool
6868 legitimate_constant_p (rtx x)
6870 switch (GET_CODE (x))
6872 case CONST:
6873 x = XEXP (x, 0);
6875 if (GET_CODE (x) == PLUS)
6877 if (!CONST_INT_P (XEXP (x, 1)))
6878 return false;
6879 x = XEXP (x, 0);
6882 if (TARGET_MACHO && darwin_local_data_pic (x))
6883 return true;
6885 /* Only some unspecs are valid as "constants". */
6886 if (GET_CODE (x) == UNSPEC)
6887 switch (XINT (x, 1))
6889 case UNSPEC_GOT:
6890 case UNSPEC_GOTOFF:
6891 case UNSPEC_PLTOFF:
6892 return TARGET_64BIT;
6893 case UNSPEC_TPOFF:
6894 case UNSPEC_NTPOFF:
6895 x = XVECEXP (x, 0, 0);
6896 return (GET_CODE (x) == SYMBOL_REF
6897 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6898 case UNSPEC_DTPOFF:
6899 x = XVECEXP (x, 0, 0);
6900 return (GET_CODE (x) == SYMBOL_REF
6901 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6902 default:
6903 return false;
6906 /* We must have drilled down to a symbol. */
6907 if (GET_CODE (x) == LABEL_REF)
6908 return true;
6909 if (GET_CODE (x) != SYMBOL_REF)
6910 return false;
6911 /* FALLTHRU */
6913 case SYMBOL_REF:
6914 /* TLS symbols are never valid. */
6915 if (SYMBOL_REF_TLS_MODEL (x))
6916 return false;
6918 /* DLLIMPORT symbols are never valid. */
6919 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6920 && SYMBOL_REF_DLLIMPORT_P (x))
6921 return false;
6922 break;
6924 case CONST_DOUBLE:
6925 if (GET_MODE (x) == TImode
6926 && x != CONST0_RTX (TImode)
6927 && !TARGET_64BIT)
6928 return false;
6929 break;
6931 case CONST_VECTOR:
6932 if (x == CONST0_RTX (GET_MODE (x)))
6933 return true;
6934 return false;
6936 default:
6937 break;
6940 /* Otherwise we handle everything else in the move patterns. */
6941 return true;
6944 /* Determine if it's legal to put X into the constant pool. This
6945 is not possible for the address of thread-local symbols, which
6946 is checked above. */
6948 static bool
6949 ix86_cannot_force_const_mem (rtx x)
6951 /* We can always put integral constants and vectors in memory. */
6952 switch (GET_CODE (x))
6954 case CONST_INT:
6955 case CONST_DOUBLE:
6956 case CONST_VECTOR:
6957 return false;
6959 default:
6960 break;
6962 return !legitimate_constant_p (x);
6965 /* Determine if a given RTX is a valid constant address. */
6967 bool
6968 constant_address_p (rtx x)
6970 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6973 /* Nonzero if the constant value X is a legitimate general operand
6974 when generating PIC code. It is given that flag_pic is on and
6975 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6977 bool
6978 legitimate_pic_operand_p (rtx x)
6980 rtx inner;
6982 switch (GET_CODE (x))
6984 case CONST:
6985 inner = XEXP (x, 0);
6986 if (GET_CODE (inner) == PLUS
6987 && CONST_INT_P (XEXP (inner, 1)))
6988 inner = XEXP (inner, 0);
6990 /* Only some unspecs are valid as "constants". */
6991 if (GET_CODE (inner) == UNSPEC)
6992 switch (XINT (inner, 1))
6994 case UNSPEC_GOT:
6995 case UNSPEC_GOTOFF:
6996 case UNSPEC_PLTOFF:
6997 return TARGET_64BIT;
6998 case UNSPEC_TPOFF:
6999 x = XVECEXP (inner, 0, 0);
7000 return (GET_CODE (x) == SYMBOL_REF
7001 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7002 default:
7003 return false;
7005 /* FALLTHRU */
7007 case SYMBOL_REF:
7008 case LABEL_REF:
7009 return legitimate_pic_address_disp_p (x);
7011 default:
7012 return true;
7016 /* Determine if a given CONST RTX is a valid memory displacement
7017 in PIC mode. */
7020 legitimate_pic_address_disp_p (rtx disp)
7022 bool saw_plus;
7024 /* In 64bit mode we can allow direct addresses of symbols and labels
7025 when they are not dynamic symbols. */
7026 if (TARGET_64BIT)
7028 rtx op0 = disp, op1;
7030 switch (GET_CODE (disp))
7032 case LABEL_REF:
7033 return true;
7035 case CONST:
7036 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7037 break;
7038 op0 = XEXP (XEXP (disp, 0), 0);
7039 op1 = XEXP (XEXP (disp, 0), 1);
7040 if (!CONST_INT_P (op1)
7041 || INTVAL (op1) >= 16*1024*1024
7042 || INTVAL (op1) < -16*1024*1024)
7043 break;
7044 if (GET_CODE (op0) == LABEL_REF)
7045 return true;
7046 if (GET_CODE (op0) != SYMBOL_REF)
7047 break;
7048 /* FALLTHRU */
7050 case SYMBOL_REF:
7051 /* TLS references should always be enclosed in UNSPEC. */
7052 if (SYMBOL_REF_TLS_MODEL (op0))
7053 return false;
7054 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7055 && ix86_cmodel != CM_LARGE_PIC)
7056 return true;
7057 break;
7059 default:
7060 break;
7063 if (GET_CODE (disp) != CONST)
7064 return 0;
7065 disp = XEXP (disp, 0);
7067 if (TARGET_64BIT)
7069 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7070 of GOT tables. We should not need these anyway. */
7071 if (GET_CODE (disp) != UNSPEC
7072 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7073 && XINT (disp, 1) != UNSPEC_GOTOFF
7074 && XINT (disp, 1) != UNSPEC_PLTOFF))
7075 return 0;
7077 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7078 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7079 return 0;
7080 return 1;
7083 saw_plus = false;
7084 if (GET_CODE (disp) == PLUS)
7086 if (!CONST_INT_P (XEXP (disp, 1)))
7087 return 0;
7088 disp = XEXP (disp, 0);
7089 saw_plus = true;
7092 if (TARGET_MACHO && darwin_local_data_pic (disp))
7093 return 1;
7095 if (GET_CODE (disp) != UNSPEC)
7096 return 0;
7098 switch (XINT (disp, 1))
7100 case UNSPEC_GOT:
7101 if (saw_plus)
7102 return false;
7103 /* We need to check for both symbols and labels because VxWorks loads
7104 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7105 details. */
7106 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7107 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7108 case UNSPEC_GOTOFF:
7109 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7110 While ABI specify also 32bit relocation but we don't produce it in
7111 small PIC model at all. */
7112 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7113 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7114 && !TARGET_64BIT)
7115 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7116 return false;
7117 case UNSPEC_GOTTPOFF:
7118 case UNSPEC_GOTNTPOFF:
7119 case UNSPEC_INDNTPOFF:
7120 if (saw_plus)
7121 return false;
7122 disp = XVECEXP (disp, 0, 0);
7123 return (GET_CODE (disp) == SYMBOL_REF
7124 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7125 case UNSPEC_NTPOFF:
7126 disp = XVECEXP (disp, 0, 0);
7127 return (GET_CODE (disp) == SYMBOL_REF
7128 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7129 case UNSPEC_DTPOFF:
7130 disp = XVECEXP (disp, 0, 0);
7131 return (GET_CODE (disp) == SYMBOL_REF
7132 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7135 return 0;
7138 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7139 memory address for an instruction. The MODE argument is the machine mode
7140 for the MEM expression that wants to use this address.
7142 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7143 convert common non-canonical forms to canonical form so that they will
7144 be recognized. */
7147 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7148 rtx addr, int strict)
7150 struct ix86_address parts;
7151 rtx base, index, disp;
7152 HOST_WIDE_INT scale;
7153 const char *reason = NULL;
7154 rtx reason_rtx = NULL_RTX;
7156 if (ix86_decompose_address (addr, &parts) <= 0)
7158 reason = "decomposition failed";
7159 goto report_error;
7162 base = parts.base;
7163 index = parts.index;
7164 disp = parts.disp;
7165 scale = parts.scale;
7167 /* Validate base register.
7169 Don't allow SUBREG's that span more than a word here. It can lead to spill
7170 failures when the base is one word out of a two word structure, which is
7171 represented internally as a DImode int. */
7173 if (base)
7175 rtx reg;
7176 reason_rtx = base;
7178 if (REG_P (base))
7179 reg = base;
7180 else if (GET_CODE (base) == SUBREG
7181 && REG_P (SUBREG_REG (base))
7182 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7183 <= UNITS_PER_WORD)
7184 reg = SUBREG_REG (base);
7185 else
7187 reason = "base is not a register";
7188 goto report_error;
7191 if (GET_MODE (base) != Pmode)
7193 reason = "base is not in Pmode";
7194 goto report_error;
7197 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7198 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7200 reason = "base is not valid";
7201 goto report_error;
7205 /* Validate index register.
7207 Don't allow SUBREG's that span more than a word here -- same as above. */
7209 if (index)
7211 rtx reg;
7212 reason_rtx = index;
7214 if (REG_P (index))
7215 reg = index;
7216 else if (GET_CODE (index) == SUBREG
7217 && REG_P (SUBREG_REG (index))
7218 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7219 <= UNITS_PER_WORD)
7220 reg = SUBREG_REG (index);
7221 else
7223 reason = "index is not a register";
7224 goto report_error;
7227 if (GET_MODE (index) != Pmode)
7229 reason = "index is not in Pmode";
7230 goto report_error;
7233 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7234 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7236 reason = "index is not valid";
7237 goto report_error;
7241 /* Validate scale factor. */
7242 if (scale != 1)
7244 reason_rtx = GEN_INT (scale);
7245 if (!index)
7247 reason = "scale without index";
7248 goto report_error;
7251 if (scale != 2 && scale != 4 && scale != 8)
7253 reason = "scale is not a valid multiplier";
7254 goto report_error;
7258 /* Validate displacement. */
7259 if (disp)
7261 reason_rtx = disp;
7263 if (GET_CODE (disp) == CONST
7264 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7265 switch (XINT (XEXP (disp, 0), 1))
7267 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7268 used. While ABI specify also 32bit relocations, we don't produce
7269 them at all and use IP relative instead. */
7270 case UNSPEC_GOT:
7271 case UNSPEC_GOTOFF:
7272 gcc_assert (flag_pic);
7273 if (!TARGET_64BIT)
7274 goto is_legitimate_pic;
7275 reason = "64bit address unspec";
7276 goto report_error;
7278 case UNSPEC_GOTPCREL:
7279 gcc_assert (flag_pic);
7280 goto is_legitimate_pic;
7282 case UNSPEC_GOTTPOFF:
7283 case UNSPEC_GOTNTPOFF:
7284 case UNSPEC_INDNTPOFF:
7285 case UNSPEC_NTPOFF:
7286 case UNSPEC_DTPOFF:
7287 break;
7289 default:
7290 reason = "invalid address unspec";
7291 goto report_error;
7294 else if (SYMBOLIC_CONST (disp)
7295 && (flag_pic
7296 || (TARGET_MACHO
7297 #if TARGET_MACHO
7298 && MACHOPIC_INDIRECT
7299 && !machopic_operand_p (disp)
7300 #endif
7304 is_legitimate_pic:
7305 if (TARGET_64BIT && (index || base))
7307 /* foo@dtpoff(%rX) is ok. */
7308 if (GET_CODE (disp) != CONST
7309 || GET_CODE (XEXP (disp, 0)) != PLUS
7310 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7311 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7312 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7313 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7315 reason = "non-constant pic memory reference";
7316 goto report_error;
7319 else if (! legitimate_pic_address_disp_p (disp))
7321 reason = "displacement is an invalid pic construct";
7322 goto report_error;
7325 /* This code used to verify that a symbolic pic displacement
7326 includes the pic_offset_table_rtx register.
7328 While this is good idea, unfortunately these constructs may
7329 be created by "adds using lea" optimization for incorrect
7330 code like:
7332 int a;
7333 int foo(int i)
7335 return *(&a+i);
7338 This code is nonsensical, but results in addressing
7339 GOT table with pic_offset_table_rtx base. We can't
7340 just refuse it easily, since it gets matched by
7341 "addsi3" pattern, that later gets split to lea in the
7342 case output register differs from input. While this
7343 can be handled by separate addsi pattern for this case
7344 that never results in lea, this seems to be easier and
7345 correct fix for crash to disable this test. */
7347 else if (GET_CODE (disp) != LABEL_REF
7348 && !CONST_INT_P (disp)
7349 && (GET_CODE (disp) != CONST
7350 || !legitimate_constant_p (disp))
7351 && (GET_CODE (disp) != SYMBOL_REF
7352 || !legitimate_constant_p (disp)))
7354 reason = "displacement is not constant";
7355 goto report_error;
7357 else if (TARGET_64BIT
7358 && !x86_64_immediate_operand (disp, VOIDmode))
7360 reason = "displacement is out of range";
7361 goto report_error;
7365 /* Everything looks valid. */
7366 return TRUE;
7368 report_error:
7369 return FALSE;
7372 /* Return a unique alias set for the GOT. */
7374 static alias_set_type
7375 ix86_GOT_alias_set (void)
7377 static alias_set_type set = -1;
7378 if (set == -1)
7379 set = new_alias_set ();
7380 return set;
7383 /* Return a legitimate reference for ORIG (an address) using the
7384 register REG. If REG is 0, a new pseudo is generated.
7386 There are two types of references that must be handled:
7388 1. Global data references must load the address from the GOT, via
7389 the PIC reg. An insn is emitted to do this load, and the reg is
7390 returned.
7392 2. Static data references, constant pool addresses, and code labels
7393 compute the address as an offset from the GOT, whose base is in
7394 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7395 differentiate them from global data objects. The returned
7396 address is the PIC reg + an unspec constant.
7398 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7399 reg also appears in the address. */
7401 static rtx
7402 legitimize_pic_address (rtx orig, rtx reg)
7404 rtx addr = orig;
7405 rtx new_rtx = orig;
7406 rtx base;
7408 #if TARGET_MACHO
7409 if (TARGET_MACHO && !TARGET_64BIT)
7411 if (reg == 0)
7412 reg = gen_reg_rtx (Pmode);
7413 /* Use the generic Mach-O PIC machinery. */
7414 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7416 #endif
7418 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7419 new_rtx = addr;
7420 else if (TARGET_64BIT
7421 && ix86_cmodel != CM_SMALL_PIC
7422 && gotoff_operand (addr, Pmode))
7424 rtx tmpreg;
7425 /* This symbol may be referenced via a displacement from the PIC
7426 base address (@GOTOFF). */
7428 if (reload_in_progress)
7429 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7430 if (GET_CODE (addr) == CONST)
7431 addr = XEXP (addr, 0);
7432 if (GET_CODE (addr) == PLUS)
7434 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7435 UNSPEC_GOTOFF);
7436 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7438 else
7439 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7440 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7441 if (!reg)
7442 tmpreg = gen_reg_rtx (Pmode);
7443 else
7444 tmpreg = reg;
7445 emit_move_insn (tmpreg, new_rtx);
7447 if (reg != 0)
7449 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7450 tmpreg, 1, OPTAB_DIRECT);
7451 new_rtx = reg;
7453 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7455 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7457 /* This symbol may be referenced via a displacement from the PIC
7458 base address (@GOTOFF). */
7460 if (reload_in_progress)
7461 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7462 if (GET_CODE (addr) == CONST)
7463 addr = XEXP (addr, 0);
7464 if (GET_CODE (addr) == PLUS)
7466 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7467 UNSPEC_GOTOFF);
7468 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7470 else
7471 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7472 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7473 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7475 if (reg != 0)
7477 emit_move_insn (reg, new_rtx);
7478 new_rtx = reg;
7481 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7482 /* We can't use @GOTOFF for text labels on VxWorks;
7483 see gotoff_operand. */
7484 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7486 /* Given that we've already handled dllimport variables separately
7487 in legitimize_address, and all other variables should satisfy
7488 legitimate_pic_address_disp_p, we should never arrive here. */
7489 gcc_assert (!TARGET_64BIT_MS_ABI);
7491 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7493 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7494 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7495 new_rtx = gen_const_mem (Pmode, new_rtx);
7496 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7498 if (reg == 0)
7499 reg = gen_reg_rtx (Pmode);
7500 /* Use directly gen_movsi, otherwise the address is loaded
7501 into register for CSE. We don't want to CSE this addresses,
7502 instead we CSE addresses from the GOT table, so skip this. */
7503 emit_insn (gen_movsi (reg, new_rtx));
7504 new_rtx = reg;
7506 else
7508 /* This symbol must be referenced via a load from the
7509 Global Offset Table (@GOT). */
7511 if (reload_in_progress)
7512 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7513 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7514 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7515 if (TARGET_64BIT)
7516 new_rtx = force_reg (Pmode, new_rtx);
7517 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7518 new_rtx = gen_const_mem (Pmode, new_rtx);
7519 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7521 if (reg == 0)
7522 reg = gen_reg_rtx (Pmode);
7523 emit_move_insn (reg, new_rtx);
7524 new_rtx = reg;
7527 else
7529 if (CONST_INT_P (addr)
7530 && !x86_64_immediate_operand (addr, VOIDmode))
7532 if (reg)
7534 emit_move_insn (reg, addr);
7535 new_rtx = reg;
7537 else
7538 new_rtx = force_reg (Pmode, addr);
7540 else if (GET_CODE (addr) == CONST)
7542 addr = XEXP (addr, 0);
7544 /* We must match stuff we generate before. Assume the only
7545 unspecs that can get here are ours. Not that we could do
7546 anything with them anyway.... */
7547 if (GET_CODE (addr) == UNSPEC
7548 || (GET_CODE (addr) == PLUS
7549 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7550 return orig;
7551 gcc_assert (GET_CODE (addr) == PLUS);
7553 if (GET_CODE (addr) == PLUS)
7555 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7557 /* Check first to see if this is a constant offset from a @GOTOFF
7558 symbol reference. */
7559 if (gotoff_operand (op0, Pmode)
7560 && CONST_INT_P (op1))
7562 if (!TARGET_64BIT)
7564 if (reload_in_progress)
7565 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7566 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7567 UNSPEC_GOTOFF);
7568 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7569 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7570 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7572 if (reg != 0)
7574 emit_move_insn (reg, new_rtx);
7575 new_rtx = reg;
7578 else
7580 if (INTVAL (op1) < -16*1024*1024
7581 || INTVAL (op1) >= 16*1024*1024)
7583 if (!x86_64_immediate_operand (op1, Pmode))
7584 op1 = force_reg (Pmode, op1);
7585 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7589 else
7591 base = legitimize_pic_address (XEXP (addr, 0), reg);
7592 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7593 base == reg ? NULL_RTX : reg);
7595 if (CONST_INT_P (new_rtx))
7596 new_rtx = plus_constant (base, INTVAL (new_rtx));
7597 else
7599 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7601 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7602 new_rtx = XEXP (new_rtx, 1);
7604 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7609 return new_rtx;
7612 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7614 static rtx
7615 get_thread_pointer (int to_reg)
7617 rtx tp, reg, insn;
7619 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7620 if (!to_reg)
7621 return tp;
7623 reg = gen_reg_rtx (Pmode);
7624 insn = gen_rtx_SET (VOIDmode, reg, tp);
7625 insn = emit_insn (insn);
7627 return reg;
7630 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7631 false if we expect this to be used for a memory address and true if
7632 we expect to load the address into a register. */
7634 static rtx
7635 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7637 rtx dest, base, off, pic, tp;
7638 int type;
7640 switch (model)
7642 case TLS_MODEL_GLOBAL_DYNAMIC:
7643 dest = gen_reg_rtx (Pmode);
7644 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7646 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7648 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7650 start_sequence ();
7651 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7652 insns = get_insns ();
7653 end_sequence ();
7655 CONST_OR_PURE_CALL_P (insns) = 1;
7656 emit_libcall_block (insns, dest, rax, x);
7658 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7659 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7660 else
7661 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7663 if (TARGET_GNU2_TLS)
7665 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7667 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7669 break;
7671 case TLS_MODEL_LOCAL_DYNAMIC:
7672 base = gen_reg_rtx (Pmode);
7673 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7675 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7677 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7679 start_sequence ();
7680 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7681 insns = get_insns ();
7682 end_sequence ();
7684 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7685 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7686 CONST_OR_PURE_CALL_P (insns) = 1;
7687 emit_libcall_block (insns, base, rax, note);
7689 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7690 emit_insn (gen_tls_local_dynamic_base_64 (base));
7691 else
7692 emit_insn (gen_tls_local_dynamic_base_32 (base));
7694 if (TARGET_GNU2_TLS)
7696 rtx x = ix86_tls_module_base ();
7698 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7699 gen_rtx_MINUS (Pmode, x, tp));
7702 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7703 off = gen_rtx_CONST (Pmode, off);
7705 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7707 if (TARGET_GNU2_TLS)
7709 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7711 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7714 break;
7716 case TLS_MODEL_INITIAL_EXEC:
7717 if (TARGET_64BIT)
7719 pic = NULL;
7720 type = UNSPEC_GOTNTPOFF;
7722 else if (flag_pic)
7724 if (reload_in_progress)
7725 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7726 pic = pic_offset_table_rtx;
7727 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7729 else if (!TARGET_ANY_GNU_TLS)
7731 pic = gen_reg_rtx (Pmode);
7732 emit_insn (gen_set_got (pic));
7733 type = UNSPEC_GOTTPOFF;
7735 else
7737 pic = NULL;
7738 type = UNSPEC_INDNTPOFF;
7741 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7742 off = gen_rtx_CONST (Pmode, off);
7743 if (pic)
7744 off = gen_rtx_PLUS (Pmode, pic, off);
7745 off = gen_const_mem (Pmode, off);
7746 set_mem_alias_set (off, ix86_GOT_alias_set ());
7748 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7750 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7751 off = force_reg (Pmode, off);
7752 return gen_rtx_PLUS (Pmode, base, off);
7754 else
7756 base = get_thread_pointer (true);
7757 dest = gen_reg_rtx (Pmode);
7758 emit_insn (gen_subsi3 (dest, base, off));
7760 break;
7762 case TLS_MODEL_LOCAL_EXEC:
7763 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7764 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7765 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7766 off = gen_rtx_CONST (Pmode, off);
7768 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7770 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7771 return gen_rtx_PLUS (Pmode, base, off);
7773 else
7775 base = get_thread_pointer (true);
7776 dest = gen_reg_rtx (Pmode);
7777 emit_insn (gen_subsi3 (dest, base, off));
7779 break;
7781 default:
7782 gcc_unreachable ();
7785 return dest;
7788 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7789 to symbol DECL. */
7791 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7792 htab_t dllimport_map;
7794 static tree
7795 get_dllimport_decl (tree decl)
7797 struct tree_map *h, in;
7798 void **loc;
7799 const char *name;
7800 const char *prefix;
7801 size_t namelen, prefixlen;
7802 char *imp_name;
7803 tree to;
7804 rtx rtl;
7806 if (!dllimport_map)
7807 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7809 in.hash = htab_hash_pointer (decl);
7810 in.base.from = decl;
7811 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7812 h = (struct tree_map *) *loc;
7813 if (h)
7814 return h->to;
7816 *loc = h = GGC_NEW (struct tree_map);
7817 h->hash = in.hash;
7818 h->base.from = decl;
7819 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7820 DECL_ARTIFICIAL (to) = 1;
7821 DECL_IGNORED_P (to) = 1;
7822 DECL_EXTERNAL (to) = 1;
7823 TREE_READONLY (to) = 1;
7825 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7826 name = targetm.strip_name_encoding (name);
7827 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
7828 namelen = strlen (name);
7829 prefixlen = strlen (prefix);
7830 imp_name = (char *) alloca (namelen + prefixlen + 1);
7831 memcpy (imp_name, prefix, prefixlen);
7832 memcpy (imp_name + prefixlen, name, namelen + 1);
7834 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7835 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7836 SET_SYMBOL_REF_DECL (rtl, to);
7837 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7839 rtl = gen_const_mem (Pmode, rtl);
7840 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7842 SET_DECL_RTL (to, rtl);
7843 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
7845 return to;
7848 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7849 true if we require the result be a register. */
7851 static rtx
7852 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7854 tree imp_decl;
7855 rtx x;
7857 gcc_assert (SYMBOL_REF_DECL (symbol));
7858 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7860 x = DECL_RTL (imp_decl);
7861 if (want_reg)
7862 x = force_reg (Pmode, x);
7863 return x;
7866 /* Try machine-dependent ways of modifying an illegitimate address
7867 to be legitimate. If we find one, return the new, valid address.
7868 This macro is used in only one place: `memory_address' in explow.c.
7870 OLDX is the address as it was before break_out_memory_refs was called.
7871 In some cases it is useful to look at this to decide what needs to be done.
7873 MODE and WIN are passed so that this macro can use
7874 GO_IF_LEGITIMATE_ADDRESS.
7876 It is always safe for this macro to do nothing. It exists to recognize
7877 opportunities to optimize the output.
7879 For the 80386, we handle X+REG by loading X into a register R and
7880 using R+REG. R will go in a general reg and indexing will be used.
7881 However, if REG is a broken-out memory address or multiplication,
7882 nothing needs to be done because REG can certainly go in a general reg.
7884 When -fpic is used, special handling is needed for symbolic references.
7885 See comments by legitimize_pic_address in i386.c for details. */
7888 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7890 int changed = 0;
7891 unsigned log;
7893 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7894 if (log)
7895 return legitimize_tls_address (x, (enum tls_model) log, false);
7896 if (GET_CODE (x) == CONST
7897 && GET_CODE (XEXP (x, 0)) == PLUS
7898 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7899 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7901 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7902 (enum tls_model) log, false);
7903 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7906 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7908 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7909 return legitimize_dllimport_symbol (x, true);
7910 if (GET_CODE (x) == CONST
7911 && GET_CODE (XEXP (x, 0)) == PLUS
7912 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7913 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7915 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7916 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7920 if (flag_pic && SYMBOLIC_CONST (x))
7921 return legitimize_pic_address (x, 0);
7923 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7924 if (GET_CODE (x) == ASHIFT
7925 && CONST_INT_P (XEXP (x, 1))
7926 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7928 changed = 1;
7929 log = INTVAL (XEXP (x, 1));
7930 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7931 GEN_INT (1 << log));
7934 if (GET_CODE (x) == PLUS)
7936 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7938 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7939 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7940 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7942 changed = 1;
7943 log = INTVAL (XEXP (XEXP (x, 0), 1));
7944 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7945 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7946 GEN_INT (1 << log));
7949 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7950 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7951 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7953 changed = 1;
7954 log = INTVAL (XEXP (XEXP (x, 1), 1));
7955 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7956 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7957 GEN_INT (1 << log));
7960 /* Put multiply first if it isn't already. */
7961 if (GET_CODE (XEXP (x, 1)) == MULT)
7963 rtx tmp = XEXP (x, 0);
7964 XEXP (x, 0) = XEXP (x, 1);
7965 XEXP (x, 1) = tmp;
7966 changed = 1;
7969 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7970 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7971 created by virtual register instantiation, register elimination, and
7972 similar optimizations. */
7973 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7975 changed = 1;
7976 x = gen_rtx_PLUS (Pmode,
7977 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7978 XEXP (XEXP (x, 1), 0)),
7979 XEXP (XEXP (x, 1), 1));
7982 /* Canonicalize
7983 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7984 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7985 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7986 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7987 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7988 && CONSTANT_P (XEXP (x, 1)))
7990 rtx constant;
7991 rtx other = NULL_RTX;
7993 if (CONST_INT_P (XEXP (x, 1)))
7995 constant = XEXP (x, 1);
7996 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7998 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8000 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8001 other = XEXP (x, 1);
8003 else
8004 constant = 0;
8006 if (constant)
8008 changed = 1;
8009 x = gen_rtx_PLUS (Pmode,
8010 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8011 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8012 plus_constant (other, INTVAL (constant)));
8016 if (changed && legitimate_address_p (mode, x, FALSE))
8017 return x;
8019 if (GET_CODE (XEXP (x, 0)) == MULT)
8021 changed = 1;
8022 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8025 if (GET_CODE (XEXP (x, 1)) == MULT)
8027 changed = 1;
8028 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8031 if (changed
8032 && REG_P (XEXP (x, 1))
8033 && REG_P (XEXP (x, 0)))
8034 return x;
8036 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8038 changed = 1;
8039 x = legitimize_pic_address (x, 0);
8042 if (changed && legitimate_address_p (mode, x, FALSE))
8043 return x;
8045 if (REG_P (XEXP (x, 0)))
8047 rtx temp = gen_reg_rtx (Pmode);
8048 rtx val = force_operand (XEXP (x, 1), temp);
8049 if (val != temp)
8050 emit_move_insn (temp, val);
8052 XEXP (x, 1) = temp;
8053 return x;
8056 else if (REG_P (XEXP (x, 1)))
8058 rtx temp = gen_reg_rtx (Pmode);
8059 rtx val = force_operand (XEXP (x, 0), temp);
8060 if (val != temp)
8061 emit_move_insn (temp, val);
8063 XEXP (x, 0) = temp;
8064 return x;
8068 return x;
8071 /* Print an integer constant expression in assembler syntax. Addition
8072 and subtraction are the only arithmetic that may appear in these
8073 expressions. FILE is the stdio stream to write to, X is the rtx, and
8074 CODE is the operand print code from the output string. */
8076 static void
8077 output_pic_addr_const (FILE *file, rtx x, int code)
8079 char buf[256];
8081 switch (GET_CODE (x))
8083 case PC:
8084 gcc_assert (flag_pic);
8085 putc ('.', file);
8086 break;
8088 case SYMBOL_REF:
8089 if (! TARGET_MACHO || TARGET_64BIT)
8090 output_addr_const (file, x);
8091 else
8093 const char *name = XSTR (x, 0);
8095 /* Mark the decl as referenced so that cgraph will
8096 output the function. */
8097 if (SYMBOL_REF_DECL (x))
8098 mark_decl_referenced (SYMBOL_REF_DECL (x));
8100 #if TARGET_MACHO
8101 if (MACHOPIC_INDIRECT
8102 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8103 name = machopic_indirection_name (x, /*stub_p=*/true);
8104 #endif
8105 assemble_name (file, name);
8107 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8108 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8109 fputs ("@PLT", file);
8110 break;
8112 case LABEL_REF:
8113 x = XEXP (x, 0);
8114 /* FALLTHRU */
8115 case CODE_LABEL:
8116 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8117 assemble_name (asm_out_file, buf);
8118 break;
8120 case CONST_INT:
8121 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8122 break;
8124 case CONST:
8125 /* This used to output parentheses around the expression,
8126 but that does not work on the 386 (either ATT or BSD assembler). */
8127 output_pic_addr_const (file, XEXP (x, 0), code);
8128 break;
8130 case CONST_DOUBLE:
8131 if (GET_MODE (x) == VOIDmode)
8133 /* We can use %d if the number is <32 bits and positive. */
8134 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8135 fprintf (file, "0x%lx%08lx",
8136 (unsigned long) CONST_DOUBLE_HIGH (x),
8137 (unsigned long) CONST_DOUBLE_LOW (x));
8138 else
8139 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8141 else
8142 /* We can't handle floating point constants;
8143 PRINT_OPERAND must handle them. */
8144 output_operand_lossage ("floating constant misused");
8145 break;
8147 case PLUS:
8148 /* Some assemblers need integer constants to appear first. */
8149 if (CONST_INT_P (XEXP (x, 0)))
8151 output_pic_addr_const (file, XEXP (x, 0), code);
8152 putc ('+', file);
8153 output_pic_addr_const (file, XEXP (x, 1), code);
8155 else
8157 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8158 output_pic_addr_const (file, XEXP (x, 1), code);
8159 putc ('+', file);
8160 output_pic_addr_const (file, XEXP (x, 0), code);
8162 break;
8164 case MINUS:
8165 if (!TARGET_MACHO)
8166 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8167 output_pic_addr_const (file, XEXP (x, 0), code);
8168 putc ('-', file);
8169 output_pic_addr_const (file, XEXP (x, 1), code);
8170 if (!TARGET_MACHO)
8171 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8172 break;
8174 case UNSPEC:
8175 gcc_assert (XVECLEN (x, 0) == 1);
8176 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8177 switch (XINT (x, 1))
8179 case UNSPEC_GOT:
8180 fputs ("@GOT", file);
8181 break;
8182 case UNSPEC_GOTOFF:
8183 fputs ("@GOTOFF", file);
8184 break;
8185 case UNSPEC_PLTOFF:
8186 fputs ("@PLTOFF", file);
8187 break;
8188 case UNSPEC_GOTPCREL:
8189 fputs ("@GOTPCREL(%rip)", file);
8190 break;
8191 case UNSPEC_GOTTPOFF:
8192 /* FIXME: This might be @TPOFF in Sun ld too. */
8193 fputs ("@GOTTPOFF", file);
8194 break;
8195 case UNSPEC_TPOFF:
8196 fputs ("@TPOFF", file);
8197 break;
8198 case UNSPEC_NTPOFF:
8199 if (TARGET_64BIT)
8200 fputs ("@TPOFF", file);
8201 else
8202 fputs ("@NTPOFF", file);
8203 break;
8204 case UNSPEC_DTPOFF:
8205 fputs ("@DTPOFF", file);
8206 break;
8207 case UNSPEC_GOTNTPOFF:
8208 if (TARGET_64BIT)
8209 fputs ("@GOTTPOFF(%rip)", file);
8210 else
8211 fputs ("@GOTNTPOFF", file);
8212 break;
8213 case UNSPEC_INDNTPOFF:
8214 fputs ("@INDNTPOFF", file);
8215 break;
8216 default:
8217 output_operand_lossage ("invalid UNSPEC as operand");
8218 break;
8220 break;
8222 default:
8223 output_operand_lossage ("invalid expression as operand");
8227 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8228 We need to emit DTP-relative relocations. */
8230 static void ATTRIBUTE_UNUSED
8231 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8233 fputs (ASM_LONG, file);
8234 output_addr_const (file, x);
8235 fputs ("@DTPOFF", file);
8236 switch (size)
8238 case 4:
8239 break;
8240 case 8:
8241 fputs (", 0", file);
8242 break;
8243 default:
8244 gcc_unreachable ();
8248 /* In the name of slightly smaller debug output, and to cater to
8249 general assembler lossage, recognize PIC+GOTOFF and turn it back
8250 into a direct symbol reference.
8252 On Darwin, this is necessary to avoid a crash, because Darwin
8253 has a different PIC label for each routine but the DWARF debugging
8254 information is not associated with any particular routine, so it's
8255 necessary to remove references to the PIC label from RTL stored by
8256 the DWARF output code. */
8258 static rtx
8259 ix86_delegitimize_address (rtx orig_x)
8261 rtx x = orig_x;
8262 /* reg_addend is NULL or a multiple of some register. */
8263 rtx reg_addend = NULL_RTX;
8264 /* const_addend is NULL or a const_int. */
8265 rtx const_addend = NULL_RTX;
8266 /* This is the result, or NULL. */
8267 rtx result = NULL_RTX;
8269 if (MEM_P (x))
8270 x = XEXP (x, 0);
8272 if (TARGET_64BIT)
8274 if (GET_CODE (x) != CONST
8275 || GET_CODE (XEXP (x, 0)) != UNSPEC
8276 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8277 || !MEM_P (orig_x))
8278 return orig_x;
8279 return XVECEXP (XEXP (x, 0), 0, 0);
8282 if (GET_CODE (x) != PLUS
8283 || GET_CODE (XEXP (x, 1)) != CONST)
8284 return orig_x;
8286 if (REG_P (XEXP (x, 0))
8287 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8288 /* %ebx + GOT/GOTOFF */
8290 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8292 /* %ebx + %reg * scale + GOT/GOTOFF */
8293 reg_addend = XEXP (x, 0);
8294 if (REG_P (XEXP (reg_addend, 0))
8295 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8296 reg_addend = XEXP (reg_addend, 1);
8297 else if (REG_P (XEXP (reg_addend, 1))
8298 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8299 reg_addend = XEXP (reg_addend, 0);
8300 else
8301 return orig_x;
8302 if (!REG_P (reg_addend)
8303 && GET_CODE (reg_addend) != MULT
8304 && GET_CODE (reg_addend) != ASHIFT)
8305 return orig_x;
8307 else
8308 return orig_x;
8310 x = XEXP (XEXP (x, 1), 0);
8311 if (GET_CODE (x) == PLUS
8312 && CONST_INT_P (XEXP (x, 1)))
8314 const_addend = XEXP (x, 1);
8315 x = XEXP (x, 0);
8318 if (GET_CODE (x) == UNSPEC
8319 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8320 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8321 result = XVECEXP (x, 0, 0);
8323 if (TARGET_MACHO && darwin_local_data_pic (x)
8324 && !MEM_P (orig_x))
8325 result = XEXP (x, 0);
8327 if (! result)
8328 return orig_x;
8330 if (const_addend)
8331 result = gen_rtx_PLUS (Pmode, result, const_addend);
8332 if (reg_addend)
8333 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8334 return result;
8337 /* If X is a machine specific address (i.e. a symbol or label being
8338 referenced as a displacement from the GOT implemented using an
8339 UNSPEC), then return the base term. Otherwise return X. */
8342 ix86_find_base_term (rtx x)
8344 rtx term;
8346 if (TARGET_64BIT)
8348 if (GET_CODE (x) != CONST)
8349 return x;
8350 term = XEXP (x, 0);
8351 if (GET_CODE (term) == PLUS
8352 && (CONST_INT_P (XEXP (term, 1))
8353 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8354 term = XEXP (term, 0);
8355 if (GET_CODE (term) != UNSPEC
8356 || XINT (term, 1) != UNSPEC_GOTPCREL)
8357 return x;
8359 term = XVECEXP (term, 0, 0);
8361 if (GET_CODE (term) != SYMBOL_REF
8362 && GET_CODE (term) != LABEL_REF)
8363 return x;
8365 return term;
8368 term = ix86_delegitimize_address (x);
8370 if (GET_CODE (term) != SYMBOL_REF
8371 && GET_CODE (term) != LABEL_REF)
8372 return x;
8374 return term;
8377 static void
8378 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8379 int fp, FILE *file)
8381 const char *suffix;
8383 if (mode == CCFPmode || mode == CCFPUmode)
8385 enum rtx_code second_code, bypass_code;
8386 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8387 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8388 code = ix86_fp_compare_code_to_integer (code);
8389 mode = CCmode;
8391 if (reverse)
8392 code = reverse_condition (code);
8394 switch (code)
8396 case EQ:
8397 switch (mode)
8399 case CCAmode:
8400 suffix = "a";
8401 break;
8403 case CCCmode:
8404 suffix = "c";
8405 break;
8407 case CCOmode:
8408 suffix = "o";
8409 break;
8411 case CCSmode:
8412 suffix = "s";
8413 break;
8415 default:
8416 suffix = "e";
8418 break;
8419 case NE:
8420 switch (mode)
8422 case CCAmode:
8423 suffix = "na";
8424 break;
8426 case CCCmode:
8427 suffix = "nc";
8428 break;
8430 case CCOmode:
8431 suffix = "no";
8432 break;
8434 case CCSmode:
8435 suffix = "ns";
8436 break;
8438 default:
8439 suffix = "ne";
8441 break;
8442 case GT:
8443 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8444 suffix = "g";
8445 break;
8446 case GTU:
8447 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8448 Those same assemblers have the same but opposite lossage on cmov. */
8449 if (mode == CCmode)
8450 suffix = fp ? "nbe" : "a";
8451 else if (mode == CCCmode)
8452 suffix = "b";
8453 else
8454 gcc_unreachable ();
8455 break;
8456 case LT:
8457 switch (mode)
8459 case CCNOmode:
8460 case CCGOCmode:
8461 suffix = "s";
8462 break;
8464 case CCmode:
8465 case CCGCmode:
8466 suffix = "l";
8467 break;
8469 default:
8470 gcc_unreachable ();
8472 break;
8473 case LTU:
8474 gcc_assert (mode == CCmode || mode == CCCmode);
8475 suffix = "b";
8476 break;
8477 case GE:
8478 switch (mode)
8480 case CCNOmode:
8481 case CCGOCmode:
8482 suffix = "ns";
8483 break;
8485 case CCmode:
8486 case CCGCmode:
8487 suffix = "ge";
8488 break;
8490 default:
8491 gcc_unreachable ();
8493 break;
8494 case GEU:
8495 /* ??? As above. */
8496 gcc_assert (mode == CCmode || mode == CCCmode);
8497 suffix = fp ? "nb" : "ae";
8498 break;
8499 case LE:
8500 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8501 suffix = "le";
8502 break;
8503 case LEU:
8504 /* ??? As above. */
8505 if (mode == CCmode)
8506 suffix = "be";
8507 else if (mode == CCCmode)
8508 suffix = fp ? "nb" : "ae";
8509 else
8510 gcc_unreachable ();
8511 break;
8512 case UNORDERED:
8513 suffix = fp ? "u" : "p";
8514 break;
8515 case ORDERED:
8516 suffix = fp ? "nu" : "np";
8517 break;
8518 default:
8519 gcc_unreachable ();
8521 fputs (suffix, file);
8524 /* Print the name of register X to FILE based on its machine mode and number.
8525 If CODE is 'w', pretend the mode is HImode.
8526 If CODE is 'b', pretend the mode is QImode.
8527 If CODE is 'k', pretend the mode is SImode.
8528 If CODE is 'q', pretend the mode is DImode.
8529 If CODE is 'h', pretend the reg is the 'high' byte register.
8530 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8532 void
8533 print_reg (rtx x, int code, FILE *file)
8535 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8536 && REGNO (x) != FRAME_POINTER_REGNUM
8537 && REGNO (x) != FLAGS_REG
8538 && REGNO (x) != FPSR_REG
8539 && REGNO (x) != FPCR_REG);
8541 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8542 putc ('%', file);
8544 if (code == 'w' || MMX_REG_P (x))
8545 code = 2;
8546 else if (code == 'b')
8547 code = 1;
8548 else if (code == 'k')
8549 code = 4;
8550 else if (code == 'q')
8551 code = 8;
8552 else if (code == 'y')
8553 code = 3;
8554 else if (code == 'h')
8555 code = 0;
8556 else
8557 code = GET_MODE_SIZE (GET_MODE (x));
8559 /* Irritatingly, AMD extended registers use different naming convention
8560 from the normal registers. */
8561 if (REX_INT_REG_P (x))
8563 gcc_assert (TARGET_64BIT);
8564 switch (code)
8566 case 0:
8567 error ("extended registers have no high halves");
8568 break;
8569 case 1:
8570 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8571 break;
8572 case 2:
8573 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8574 break;
8575 case 4:
8576 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8577 break;
8578 case 8:
8579 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8580 break;
8581 default:
8582 error ("unsupported operand size for extended register");
8583 break;
8585 return;
8587 switch (code)
8589 case 3:
8590 if (STACK_TOP_P (x))
8592 fputs ("st(0)", file);
8593 break;
8595 /* FALLTHRU */
8596 case 8:
8597 case 4:
8598 case 12:
8599 if (! ANY_FP_REG_P (x))
8600 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8601 /* FALLTHRU */
8602 case 16:
8603 case 2:
8604 normal:
8605 fputs (hi_reg_name[REGNO (x)], file);
8606 break;
8607 case 1:
8608 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8609 goto normal;
8610 fputs (qi_reg_name[REGNO (x)], file);
8611 break;
8612 case 0:
8613 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8614 goto normal;
8615 fputs (qi_high_reg_name[REGNO (x)], file);
8616 break;
8617 default:
8618 gcc_unreachable ();
8622 /* Locate some local-dynamic symbol still in use by this function
8623 so that we can print its name in some tls_local_dynamic_base
8624 pattern. */
8626 static int
8627 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8629 rtx x = *px;
8631 if (GET_CODE (x) == SYMBOL_REF
8632 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8634 cfun->machine->some_ld_name = XSTR (x, 0);
8635 return 1;
8638 return 0;
8641 static const char *
8642 get_some_local_dynamic_name (void)
8644 rtx insn;
8646 if (cfun->machine->some_ld_name)
8647 return cfun->machine->some_ld_name;
8649 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8650 if (INSN_P (insn)
8651 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8652 return cfun->machine->some_ld_name;
8654 gcc_unreachable ();
8657 /* Meaning of CODE:
8658 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8659 C -- print opcode suffix for set/cmov insn.
8660 c -- like C, but print reversed condition
8661 F,f -- likewise, but for floating-point.
8662 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8663 otherwise nothing
8664 R -- print the prefix for register names.
8665 z -- print the opcode suffix for the size of the current operand.
8666 * -- print a star (in certain assembler syntax)
8667 A -- print an absolute memory reference.
8668 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8669 s -- print a shift double count, followed by the assemblers argument
8670 delimiter.
8671 b -- print the QImode name of the register for the indicated operand.
8672 %b0 would print %al if operands[0] is reg 0.
8673 w -- likewise, print the HImode name of the register.
8674 k -- likewise, print the SImode name of the register.
8675 q -- likewise, print the DImode name of the register.
8676 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8677 y -- print "st(0)" instead of "st" as a register.
8678 D -- print condition for SSE cmp instruction.
8679 P -- if PIC, print an @PLT suffix.
8680 X -- don't print any sort of PIC '@' suffix for a symbol.
8681 & -- print some in-use local-dynamic symbol name.
8682 H -- print a memory address offset by 8; used for sse high-parts
8683 Y -- print condition for SSE5 com* instruction.
8684 + -- print a branch hint as 'cs' or 'ds' prefix
8685 ; -- print a semicolon (after prefixes due to bug in older gas).
8688 void
8689 print_operand (FILE *file, rtx x, int code)
8691 if (code)
8693 switch (code)
8695 case '*':
8696 if (ASSEMBLER_DIALECT == ASM_ATT)
8697 putc ('*', file);
8698 return;
8700 case '&':
8701 assemble_name (file, get_some_local_dynamic_name ());
8702 return;
8704 case 'A':
8705 switch (ASSEMBLER_DIALECT)
8707 case ASM_ATT:
8708 putc ('*', file);
8709 break;
8711 case ASM_INTEL:
8712 /* Intel syntax. For absolute addresses, registers should not
8713 be surrounded by braces. */
8714 if (!REG_P (x))
8716 putc ('[', file);
8717 PRINT_OPERAND (file, x, 0);
8718 putc (']', file);
8719 return;
8721 break;
8723 default:
8724 gcc_unreachable ();
8727 PRINT_OPERAND (file, x, 0);
8728 return;
8731 case 'L':
8732 if (ASSEMBLER_DIALECT == ASM_ATT)
8733 putc ('l', file);
8734 return;
8736 case 'W':
8737 if (ASSEMBLER_DIALECT == ASM_ATT)
8738 putc ('w', file);
8739 return;
8741 case 'B':
8742 if (ASSEMBLER_DIALECT == ASM_ATT)
8743 putc ('b', file);
8744 return;
8746 case 'Q':
8747 if (ASSEMBLER_DIALECT == ASM_ATT)
8748 putc ('l', file);
8749 return;
8751 case 'S':
8752 if (ASSEMBLER_DIALECT == ASM_ATT)
8753 putc ('s', file);
8754 return;
8756 case 'T':
8757 if (ASSEMBLER_DIALECT == ASM_ATT)
8758 putc ('t', file);
8759 return;
8761 case 'z':
8762 /* 387 opcodes don't get size suffixes if the operands are
8763 registers. */
8764 if (STACK_REG_P (x))
8765 return;
8767 /* Likewise if using Intel opcodes. */
8768 if (ASSEMBLER_DIALECT == ASM_INTEL)
8769 return;
8771 /* This is the size of op from size of operand. */
8772 switch (GET_MODE_SIZE (GET_MODE (x)))
8774 case 1:
8775 putc ('b', file);
8776 return;
8778 case 2:
8779 if (MEM_P (x))
8781 #ifdef HAVE_GAS_FILDS_FISTS
8782 putc ('s', file);
8783 #endif
8784 return;
8786 else
8787 putc ('w', file);
8788 return;
8790 case 4:
8791 if (GET_MODE (x) == SFmode)
8793 putc ('s', file);
8794 return;
8796 else
8797 putc ('l', file);
8798 return;
8800 case 12:
8801 case 16:
8802 putc ('t', file);
8803 return;
8805 case 8:
8806 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8808 #ifdef GAS_MNEMONICS
8809 putc ('q', file);
8810 #else
8811 putc ('l', file);
8812 putc ('l', file);
8813 #endif
8815 else
8816 putc ('l', file);
8817 return;
8819 default:
8820 gcc_unreachable ();
8823 case 'b':
8824 case 'w':
8825 case 'k':
8826 case 'q':
8827 case 'h':
8828 case 'y':
8829 case 'X':
8830 case 'P':
8831 break;
8833 case 's':
8834 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8836 PRINT_OPERAND (file, x, 0);
8837 putc (',', file);
8839 return;
8841 case 'D':
8842 /* Little bit of braindamage here. The SSE compare instructions
8843 does use completely different names for the comparisons that the
8844 fp conditional moves. */
8845 switch (GET_CODE (x))
8847 case EQ:
8848 case UNEQ:
8849 fputs ("eq", file);
8850 break;
8851 case LT:
8852 case UNLT:
8853 fputs ("lt", file);
8854 break;
8855 case LE:
8856 case UNLE:
8857 fputs ("le", file);
8858 break;
8859 case UNORDERED:
8860 fputs ("unord", file);
8861 break;
8862 case NE:
8863 case LTGT:
8864 fputs ("neq", file);
8865 break;
8866 case UNGE:
8867 case GE:
8868 fputs ("nlt", file);
8869 break;
8870 case UNGT:
8871 case GT:
8872 fputs ("nle", file);
8873 break;
8874 case ORDERED:
8875 fputs ("ord", file);
8876 break;
8877 default:
8878 gcc_unreachable ();
8880 return;
8881 case 'O':
8882 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8883 if (ASSEMBLER_DIALECT == ASM_ATT)
8885 switch (GET_MODE (x))
8887 case HImode: putc ('w', file); break;
8888 case SImode:
8889 case SFmode: putc ('l', file); break;
8890 case DImode:
8891 case DFmode: putc ('q', file); break;
8892 default: gcc_unreachable ();
8894 putc ('.', file);
8896 #endif
8897 return;
8898 case 'C':
8899 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8900 return;
8901 case 'F':
8902 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8903 if (ASSEMBLER_DIALECT == ASM_ATT)
8904 putc ('.', file);
8905 #endif
8906 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8907 return;
8909 /* Like above, but reverse condition */
8910 case 'c':
8911 /* Check to see if argument to %c is really a constant
8912 and not a condition code which needs to be reversed. */
8913 if (!COMPARISON_P (x))
8915 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8916 return;
8918 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8919 return;
8920 case 'f':
8921 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8922 if (ASSEMBLER_DIALECT == ASM_ATT)
8923 putc ('.', file);
8924 #endif
8925 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8926 return;
8928 case 'H':
8929 /* It doesn't actually matter what mode we use here, as we're
8930 only going to use this for printing. */
8931 x = adjust_address_nv (x, DImode, 8);
8932 break;
8934 case '+':
8936 rtx x;
8938 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8939 return;
8941 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8942 if (x)
8944 int pred_val = INTVAL (XEXP (x, 0));
8946 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8947 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8949 int taken = pred_val > REG_BR_PROB_BASE / 2;
8950 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8952 /* Emit hints only in the case default branch prediction
8953 heuristics would fail. */
8954 if (taken != cputaken)
8956 /* We use 3e (DS) prefix for taken branches and
8957 2e (CS) prefix for not taken branches. */
8958 if (taken)
8959 fputs ("ds ; ", file);
8960 else
8961 fputs ("cs ; ", file);
8965 return;
8968 case 'Y':
8969 switch (GET_CODE (x))
8971 case NE:
8972 fputs ("neq", file);
8973 break;
8974 case EQ:
8975 fputs ("eq", file);
8976 break;
8977 case GE:
8978 case GEU:
8979 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
8980 break;
8981 case GT:
8982 case GTU:
8983 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
8984 break;
8985 case LE:
8986 case LEU:
8987 fputs ("le", file);
8988 break;
8989 case LT:
8990 case LTU:
8991 fputs ("lt", file);
8992 break;
8993 case UNORDERED:
8994 fputs ("unord", file);
8995 break;
8996 case ORDERED:
8997 fputs ("ord", file);
8998 break;
8999 case UNEQ:
9000 fputs ("ueq", file);
9001 break;
9002 case UNGE:
9003 fputs ("nlt", file);
9004 break;
9005 case UNGT:
9006 fputs ("nle", file);
9007 break;
9008 case UNLE:
9009 fputs ("ule", file);
9010 break;
9011 case UNLT:
9012 fputs ("ult", file);
9013 break;
9014 case LTGT:
9015 fputs ("une", file);
9016 break;
9017 default:
9018 gcc_unreachable ();
9020 return;
9022 case ';':
9023 #if TARGET_MACHO
9024 fputs (" ; ", file);
9025 #else
9026 fputc (' ', file);
9027 #endif
9028 return;
9030 default:
9031 output_operand_lossage ("invalid operand code '%c'", code);
9035 if (REG_P (x))
9036 print_reg (x, code, file);
9038 else if (MEM_P (x))
9040 /* No `byte ptr' prefix for call instructions. */
9041 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
9043 const char * size;
9044 switch (GET_MODE_SIZE (GET_MODE (x)))
9046 case 1: size = "BYTE"; break;
9047 case 2: size = "WORD"; break;
9048 case 4: size = "DWORD"; break;
9049 case 8: size = "QWORD"; break;
9050 case 12: size = "XWORD"; break;
9051 case 16: size = "XMMWORD"; break;
9052 default:
9053 gcc_unreachable ();
9056 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9057 if (code == 'b')
9058 size = "BYTE";
9059 else if (code == 'w')
9060 size = "WORD";
9061 else if (code == 'k')
9062 size = "DWORD";
9064 fputs (size, file);
9065 fputs (" PTR ", file);
9068 x = XEXP (x, 0);
9069 /* Avoid (%rip) for call operands. */
9070 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9071 && !CONST_INT_P (x))
9072 output_addr_const (file, x);
9073 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9074 output_operand_lossage ("invalid constraints for operand");
9075 else
9076 output_address (x);
9079 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9081 REAL_VALUE_TYPE r;
9082 long l;
9084 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9085 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9087 if (ASSEMBLER_DIALECT == ASM_ATT)
9088 putc ('$', file);
9089 fprintf (file, "0x%08lx", l);
9092 /* These float cases don't actually occur as immediate operands. */
9093 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9095 char dstr[30];
9097 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9098 fprintf (file, "%s", dstr);
9101 else if (GET_CODE (x) == CONST_DOUBLE
9102 && GET_MODE (x) == XFmode)
9104 char dstr[30];
9106 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9107 fprintf (file, "%s", dstr);
9110 else
9112 /* We have patterns that allow zero sets of memory, for instance.
9113 In 64-bit mode, we should probably support all 8-byte vectors,
9114 since we can in fact encode that into an immediate. */
9115 if (GET_CODE (x) == CONST_VECTOR)
9117 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9118 x = const0_rtx;
9121 if (code != 'P')
9123 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9125 if (ASSEMBLER_DIALECT == ASM_ATT)
9126 putc ('$', file);
9128 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9129 || GET_CODE (x) == LABEL_REF)
9131 if (ASSEMBLER_DIALECT == ASM_ATT)
9132 putc ('$', file);
9133 else
9134 fputs ("OFFSET FLAT:", file);
9137 if (CONST_INT_P (x))
9138 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9139 else if (flag_pic)
9140 output_pic_addr_const (file, x, code);
9141 else
9142 output_addr_const (file, x);
9146 /* Print a memory operand whose address is ADDR. */
9148 void
9149 print_operand_address (FILE *file, rtx addr)
9151 struct ix86_address parts;
9152 rtx base, index, disp;
9153 int scale;
9154 int ok = ix86_decompose_address (addr, &parts);
9156 gcc_assert (ok);
9158 base = parts.base;
9159 index = parts.index;
9160 disp = parts.disp;
9161 scale = parts.scale;
9163 switch (parts.seg)
9165 case SEG_DEFAULT:
9166 break;
9167 case SEG_FS:
9168 case SEG_GS:
9169 if (USER_LABEL_PREFIX[0] == 0)
9170 putc ('%', file);
9171 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9172 break;
9173 default:
9174 gcc_unreachable ();
9177 if (!base && !index)
9179 /* Displacement only requires special attention. */
9181 if (CONST_INT_P (disp))
9183 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9185 if (USER_LABEL_PREFIX[0] == 0)
9186 putc ('%', file);
9187 fputs ("ds:", file);
9189 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9191 else if (flag_pic)
9192 output_pic_addr_const (file, disp, 0);
9193 else
9194 output_addr_const (file, disp);
9196 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9197 if (TARGET_64BIT)
9199 if (GET_CODE (disp) == CONST
9200 && GET_CODE (XEXP (disp, 0)) == PLUS
9201 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9202 disp = XEXP (XEXP (disp, 0), 0);
9203 if (GET_CODE (disp) == LABEL_REF
9204 || (GET_CODE (disp) == SYMBOL_REF
9205 && SYMBOL_REF_TLS_MODEL (disp) == 0))
9206 fputs ("(%rip)", file);
9209 else
9211 if (ASSEMBLER_DIALECT == ASM_ATT)
9213 if (disp)
9215 if (flag_pic)
9216 output_pic_addr_const (file, disp, 0);
9217 else if (GET_CODE (disp) == LABEL_REF)
9218 output_asm_label (disp);
9219 else
9220 output_addr_const (file, disp);
9223 putc ('(', file);
9224 if (base)
9225 print_reg (base, 0, file);
9226 if (index)
9228 putc (',', file);
9229 print_reg (index, 0, file);
9230 if (scale != 1)
9231 fprintf (file, ",%d", scale);
9233 putc (')', file);
9235 else
9237 rtx offset = NULL_RTX;
9239 if (disp)
9241 /* Pull out the offset of a symbol; print any symbol itself. */
9242 if (GET_CODE (disp) == CONST
9243 && GET_CODE (XEXP (disp, 0)) == PLUS
9244 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9246 offset = XEXP (XEXP (disp, 0), 1);
9247 disp = gen_rtx_CONST (VOIDmode,
9248 XEXP (XEXP (disp, 0), 0));
9251 if (flag_pic)
9252 output_pic_addr_const (file, disp, 0);
9253 else if (GET_CODE (disp) == LABEL_REF)
9254 output_asm_label (disp);
9255 else if (CONST_INT_P (disp))
9256 offset = disp;
9257 else
9258 output_addr_const (file, disp);
9261 putc ('[', file);
9262 if (base)
9264 print_reg (base, 0, file);
9265 if (offset)
9267 if (INTVAL (offset) >= 0)
9268 putc ('+', file);
9269 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9272 else if (offset)
9273 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9274 else
9275 putc ('0', file);
9277 if (index)
9279 putc ('+', file);
9280 print_reg (index, 0, file);
9281 if (scale != 1)
9282 fprintf (file, "*%d", scale);
9284 putc (']', file);
9289 bool
9290 output_addr_const_extra (FILE *file, rtx x)
9292 rtx op;
9294 if (GET_CODE (x) != UNSPEC)
9295 return false;
9297 op = XVECEXP (x, 0, 0);
9298 switch (XINT (x, 1))
9300 case UNSPEC_GOTTPOFF:
9301 output_addr_const (file, op);
9302 /* FIXME: This might be @TPOFF in Sun ld. */
9303 fputs ("@GOTTPOFF", file);
9304 break;
9305 case UNSPEC_TPOFF:
9306 output_addr_const (file, op);
9307 fputs ("@TPOFF", file);
9308 break;
9309 case UNSPEC_NTPOFF:
9310 output_addr_const (file, op);
9311 if (TARGET_64BIT)
9312 fputs ("@TPOFF", file);
9313 else
9314 fputs ("@NTPOFF", file);
9315 break;
9316 case UNSPEC_DTPOFF:
9317 output_addr_const (file, op);
9318 fputs ("@DTPOFF", file);
9319 break;
9320 case UNSPEC_GOTNTPOFF:
9321 output_addr_const (file, op);
9322 if (TARGET_64BIT)
9323 fputs ("@GOTTPOFF(%rip)", file);
9324 else
9325 fputs ("@GOTNTPOFF", file);
9326 break;
9327 case UNSPEC_INDNTPOFF:
9328 output_addr_const (file, op);
9329 fputs ("@INDNTPOFF", file);
9330 break;
9332 default:
9333 return false;
9336 return true;
9339 /* Split one or more DImode RTL references into pairs of SImode
9340 references. The RTL can be REG, offsettable MEM, integer constant, or
9341 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9342 split and "num" is its length. lo_half and hi_half are output arrays
9343 that parallel "operands". */
9345 void
9346 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9348 while (num--)
9350 rtx op = operands[num];
9352 /* simplify_subreg refuse to split volatile memory addresses,
9353 but we still have to handle it. */
9354 if (MEM_P (op))
9356 lo_half[num] = adjust_address (op, SImode, 0);
9357 hi_half[num] = adjust_address (op, SImode, 4);
9359 else
9361 lo_half[num] = simplify_gen_subreg (SImode, op,
9362 GET_MODE (op) == VOIDmode
9363 ? DImode : GET_MODE (op), 0);
9364 hi_half[num] = simplify_gen_subreg (SImode, op,
9365 GET_MODE (op) == VOIDmode
9366 ? DImode : GET_MODE (op), 4);
9370 /* Split one or more TImode RTL references into pairs of DImode
9371 references. The RTL can be REG, offsettable MEM, integer constant, or
9372 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9373 split and "num" is its length. lo_half and hi_half are output arrays
9374 that parallel "operands". */
9376 void
9377 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9379 while (num--)
9381 rtx op = operands[num];
9383 /* simplify_subreg refuse to split volatile memory addresses, but we
9384 still have to handle it. */
9385 if (MEM_P (op))
9387 lo_half[num] = adjust_address (op, DImode, 0);
9388 hi_half[num] = adjust_address (op, DImode, 8);
9390 else
9392 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9393 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9398 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9399 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9400 is the expression of the binary operation. The output may either be
9401 emitted here, or returned to the caller, like all output_* functions.
9403 There is no guarantee that the operands are the same mode, as they
9404 might be within FLOAT or FLOAT_EXTEND expressions. */
9406 #ifndef SYSV386_COMPAT
9407 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9408 wants to fix the assemblers because that causes incompatibility
9409 with gcc. No-one wants to fix gcc because that causes
9410 incompatibility with assemblers... You can use the option of
9411 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9412 #define SYSV386_COMPAT 1
9413 #endif
9415 const char *
9416 output_387_binary_op (rtx insn, rtx *operands)
9418 static char buf[30];
9419 const char *p;
9420 const char *ssep;
9421 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9423 #ifdef ENABLE_CHECKING
9424 /* Even if we do not want to check the inputs, this documents input
9425 constraints. Which helps in understanding the following code. */
9426 if (STACK_REG_P (operands[0])
9427 && ((REG_P (operands[1])
9428 && REGNO (operands[0]) == REGNO (operands[1])
9429 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9430 || (REG_P (operands[2])
9431 && REGNO (operands[0]) == REGNO (operands[2])
9432 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9433 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9434 ; /* ok */
9435 else
9436 gcc_assert (is_sse);
9437 #endif
9439 switch (GET_CODE (operands[3]))
9441 case PLUS:
9442 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9443 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9444 p = "fiadd";
9445 else
9446 p = "fadd";
9447 ssep = "add";
9448 break;
9450 case MINUS:
9451 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9452 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9453 p = "fisub";
9454 else
9455 p = "fsub";
9456 ssep = "sub";
9457 break;
9459 case MULT:
9460 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9461 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9462 p = "fimul";
9463 else
9464 p = "fmul";
9465 ssep = "mul";
9466 break;
9468 case DIV:
9469 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9470 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9471 p = "fidiv";
9472 else
9473 p = "fdiv";
9474 ssep = "div";
9475 break;
9477 default:
9478 gcc_unreachable ();
9481 if (is_sse)
9483 strcpy (buf, ssep);
9484 if (GET_MODE (operands[0]) == SFmode)
9485 strcat (buf, "ss\t{%2, %0|%0, %2}");
9486 else
9487 strcat (buf, "sd\t{%2, %0|%0, %2}");
9488 return buf;
9490 strcpy (buf, p);
9492 switch (GET_CODE (operands[3]))
9494 case MULT:
9495 case PLUS:
9496 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9498 rtx temp = operands[2];
9499 operands[2] = operands[1];
9500 operands[1] = temp;
9503 /* know operands[0] == operands[1]. */
9505 if (MEM_P (operands[2]))
9507 p = "%z2\t%2";
9508 break;
9511 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9513 if (STACK_TOP_P (operands[0]))
9514 /* How is it that we are storing to a dead operand[2]?
9515 Well, presumably operands[1] is dead too. We can't
9516 store the result to st(0) as st(0) gets popped on this
9517 instruction. Instead store to operands[2] (which I
9518 think has to be st(1)). st(1) will be popped later.
9519 gcc <= 2.8.1 didn't have this check and generated
9520 assembly code that the Unixware assembler rejected. */
9521 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9522 else
9523 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9524 break;
9527 if (STACK_TOP_P (operands[0]))
9528 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9529 else
9530 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9531 break;
9533 case MINUS:
9534 case DIV:
9535 if (MEM_P (operands[1]))
9537 p = "r%z1\t%1";
9538 break;
9541 if (MEM_P (operands[2]))
9543 p = "%z2\t%2";
9544 break;
9547 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9549 #if SYSV386_COMPAT
9550 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9551 derived assemblers, confusingly reverse the direction of
9552 the operation for fsub{r} and fdiv{r} when the
9553 destination register is not st(0). The Intel assembler
9554 doesn't have this brain damage. Read !SYSV386_COMPAT to
9555 figure out what the hardware really does. */
9556 if (STACK_TOP_P (operands[0]))
9557 p = "{p\t%0, %2|rp\t%2, %0}";
9558 else
9559 p = "{rp\t%2, %0|p\t%0, %2}";
9560 #else
9561 if (STACK_TOP_P (operands[0]))
9562 /* As above for fmul/fadd, we can't store to st(0). */
9563 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9564 else
9565 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9566 #endif
9567 break;
9570 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9572 #if SYSV386_COMPAT
9573 if (STACK_TOP_P (operands[0]))
9574 p = "{rp\t%0, %1|p\t%1, %0}";
9575 else
9576 p = "{p\t%1, %0|rp\t%0, %1}";
9577 #else
9578 if (STACK_TOP_P (operands[0]))
9579 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9580 else
9581 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9582 #endif
9583 break;
9586 if (STACK_TOP_P (operands[0]))
9588 if (STACK_TOP_P (operands[1]))
9589 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9590 else
9591 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9592 break;
9594 else if (STACK_TOP_P (operands[1]))
9596 #if SYSV386_COMPAT
9597 p = "{\t%1, %0|r\t%0, %1}";
9598 #else
9599 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9600 #endif
9602 else
9604 #if SYSV386_COMPAT
9605 p = "{r\t%2, %0|\t%0, %2}";
9606 #else
9607 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9608 #endif
9610 break;
9612 default:
9613 gcc_unreachable ();
9616 strcat (buf, p);
9617 return buf;
9620 /* Return needed mode for entity in optimize_mode_switching pass. */
9623 ix86_mode_needed (int entity, rtx insn)
9625 enum attr_i387_cw mode;
9627 /* The mode UNINITIALIZED is used to store control word after a
9628 function call or ASM pattern. The mode ANY specify that function
9629 has no requirements on the control word and make no changes in the
9630 bits we are interested in. */
9632 if (CALL_P (insn)
9633 || (NONJUMP_INSN_P (insn)
9634 && (asm_noperands (PATTERN (insn)) >= 0
9635 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9636 return I387_CW_UNINITIALIZED;
9638 if (recog_memoized (insn) < 0)
9639 return I387_CW_ANY;
9641 mode = get_attr_i387_cw (insn);
9643 switch (entity)
9645 case I387_TRUNC:
9646 if (mode == I387_CW_TRUNC)
9647 return mode;
9648 break;
9650 case I387_FLOOR:
9651 if (mode == I387_CW_FLOOR)
9652 return mode;
9653 break;
9655 case I387_CEIL:
9656 if (mode == I387_CW_CEIL)
9657 return mode;
9658 break;
9660 case I387_MASK_PM:
9661 if (mode == I387_CW_MASK_PM)
9662 return mode;
9663 break;
9665 default:
9666 gcc_unreachable ();
9669 return I387_CW_ANY;
9672 /* Output code to initialize control word copies used by trunc?f?i and
9673 rounding patterns. CURRENT_MODE is set to current control word,
9674 while NEW_MODE is set to new control word. */
9676 void
9677 emit_i387_cw_initialization (int mode)
9679 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9680 rtx new_mode;
9682 enum ix86_stack_slot slot;
9684 rtx reg = gen_reg_rtx (HImode);
9686 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9687 emit_move_insn (reg, copy_rtx (stored_mode));
9689 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9691 switch (mode)
9693 case I387_CW_TRUNC:
9694 /* round toward zero (truncate) */
9695 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9696 slot = SLOT_CW_TRUNC;
9697 break;
9699 case I387_CW_FLOOR:
9700 /* round down toward -oo */
9701 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9702 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9703 slot = SLOT_CW_FLOOR;
9704 break;
9706 case I387_CW_CEIL:
9707 /* round up toward +oo */
9708 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9709 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9710 slot = SLOT_CW_CEIL;
9711 break;
9713 case I387_CW_MASK_PM:
9714 /* mask precision exception for nearbyint() */
9715 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9716 slot = SLOT_CW_MASK_PM;
9717 break;
9719 default:
9720 gcc_unreachable ();
9723 else
9725 switch (mode)
9727 case I387_CW_TRUNC:
9728 /* round toward zero (truncate) */
9729 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9730 slot = SLOT_CW_TRUNC;
9731 break;
9733 case I387_CW_FLOOR:
9734 /* round down toward -oo */
9735 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9736 slot = SLOT_CW_FLOOR;
9737 break;
9739 case I387_CW_CEIL:
9740 /* round up toward +oo */
9741 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9742 slot = SLOT_CW_CEIL;
9743 break;
9745 case I387_CW_MASK_PM:
9746 /* mask precision exception for nearbyint() */
9747 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9748 slot = SLOT_CW_MASK_PM;
9749 break;
9751 default:
9752 gcc_unreachable ();
9756 gcc_assert (slot < MAX_386_STACK_LOCALS);
9758 new_mode = assign_386_stack_local (HImode, slot);
9759 emit_move_insn (new_mode, reg);
9762 /* Output code for INSN to convert a float to a signed int. OPERANDS
9763 are the insn operands. The output may be [HSD]Imode and the input
9764 operand may be [SDX]Fmode. */
9766 const char *
9767 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9769 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9770 int dimode_p = GET_MODE (operands[0]) == DImode;
9771 int round_mode = get_attr_i387_cw (insn);
9773 /* Jump through a hoop or two for DImode, since the hardware has no
9774 non-popping instruction. We used to do this a different way, but
9775 that was somewhat fragile and broke with post-reload splitters. */
9776 if ((dimode_p || fisttp) && !stack_top_dies)
9777 output_asm_insn ("fld\t%y1", operands);
9779 gcc_assert (STACK_TOP_P (operands[1]));
9780 gcc_assert (MEM_P (operands[0]));
9781 gcc_assert (GET_MODE (operands[1]) != TFmode);
9783 if (fisttp)
9784 output_asm_insn ("fisttp%z0\t%0", operands);
9785 else
9787 if (round_mode != I387_CW_ANY)
9788 output_asm_insn ("fldcw\t%3", operands);
9789 if (stack_top_dies || dimode_p)
9790 output_asm_insn ("fistp%z0\t%0", operands);
9791 else
9792 output_asm_insn ("fist%z0\t%0", operands);
9793 if (round_mode != I387_CW_ANY)
9794 output_asm_insn ("fldcw\t%2", operands);
9797 return "";
9800 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9801 have the values zero or one, indicates the ffreep insn's operand
9802 from the OPERANDS array. */
9804 static const char *
9805 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9807 if (TARGET_USE_FFREEP)
9808 #if HAVE_AS_IX86_FFREEP
9809 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9810 #else
9812 static char retval[] = ".word\t0xc_df";
9813 int regno = REGNO (operands[opno]);
9815 gcc_assert (FP_REGNO_P (regno));
9817 retval[9] = '0' + (regno - FIRST_STACK_REG);
9818 return retval;
9820 #endif
9822 return opno ? "fstp\t%y1" : "fstp\t%y0";
9826 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9827 should be used. UNORDERED_P is true when fucom should be used. */
9829 const char *
9830 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9832 int stack_top_dies;
9833 rtx cmp_op0, cmp_op1;
9834 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9836 if (eflags_p)
9838 cmp_op0 = operands[0];
9839 cmp_op1 = operands[1];
9841 else
9843 cmp_op0 = operands[1];
9844 cmp_op1 = operands[2];
9847 if (is_sse)
9849 if (GET_MODE (operands[0]) == SFmode)
9850 if (unordered_p)
9851 return "ucomiss\t{%1, %0|%0, %1}";
9852 else
9853 return "comiss\t{%1, %0|%0, %1}";
9854 else
9855 if (unordered_p)
9856 return "ucomisd\t{%1, %0|%0, %1}";
9857 else
9858 return "comisd\t{%1, %0|%0, %1}";
9861 gcc_assert (STACK_TOP_P (cmp_op0));
9863 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9865 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9867 if (stack_top_dies)
9869 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9870 return output_387_ffreep (operands, 1);
9872 else
9873 return "ftst\n\tfnstsw\t%0";
9876 if (STACK_REG_P (cmp_op1)
9877 && stack_top_dies
9878 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9879 && REGNO (cmp_op1) != FIRST_STACK_REG)
9881 /* If both the top of the 387 stack dies, and the other operand
9882 is also a stack register that dies, then this must be a
9883 `fcompp' float compare */
9885 if (eflags_p)
9887 /* There is no double popping fcomi variant. Fortunately,
9888 eflags is immune from the fstp's cc clobbering. */
9889 if (unordered_p)
9890 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9891 else
9892 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9893 return output_387_ffreep (operands, 0);
9895 else
9897 if (unordered_p)
9898 return "fucompp\n\tfnstsw\t%0";
9899 else
9900 return "fcompp\n\tfnstsw\t%0";
9903 else
9905 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9907 static const char * const alt[16] =
9909 "fcom%z2\t%y2\n\tfnstsw\t%0",
9910 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9911 "fucom%z2\t%y2\n\tfnstsw\t%0",
9912 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9914 "ficom%z2\t%y2\n\tfnstsw\t%0",
9915 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9916 NULL,
9917 NULL,
9919 "fcomi\t{%y1, %0|%0, %y1}",
9920 "fcomip\t{%y1, %0|%0, %y1}",
9921 "fucomi\t{%y1, %0|%0, %y1}",
9922 "fucomip\t{%y1, %0|%0, %y1}",
9924 NULL,
9925 NULL,
9926 NULL,
9927 NULL
9930 int mask;
9931 const char *ret;
9933 mask = eflags_p << 3;
9934 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9935 mask |= unordered_p << 1;
9936 mask |= stack_top_dies;
9938 gcc_assert (mask < 16);
9939 ret = alt[mask];
9940 gcc_assert (ret);
9942 return ret;
9946 void
9947 ix86_output_addr_vec_elt (FILE *file, int value)
9949 const char *directive = ASM_LONG;
9951 #ifdef ASM_QUAD
9952 if (TARGET_64BIT)
9953 directive = ASM_QUAD;
9954 #else
9955 gcc_assert (!TARGET_64BIT);
9956 #endif
9958 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9961 void
9962 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9964 const char *directive = ASM_LONG;
9966 #ifdef ASM_QUAD
9967 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9968 directive = ASM_QUAD;
9969 #else
9970 gcc_assert (!TARGET_64BIT);
9971 #endif
9972 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9973 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9974 fprintf (file, "%s%s%d-%s%d\n",
9975 directive, LPREFIX, value, LPREFIX, rel);
9976 else if (HAVE_AS_GOTOFF_IN_DATA)
9977 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9978 #if TARGET_MACHO
9979 else if (TARGET_MACHO)
9981 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9982 machopic_output_function_base_name (file);
9983 fprintf(file, "\n");
9985 #endif
9986 else
9987 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9988 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9991 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9992 for the target. */
9994 void
9995 ix86_expand_clear (rtx dest)
9997 rtx tmp;
9999 /* We play register width games, which are only valid after reload. */
10000 gcc_assert (reload_completed);
10002 /* Avoid HImode and its attendant prefix byte. */
10003 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10004 dest = gen_rtx_REG (SImode, REGNO (dest));
10005 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10007 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10008 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10010 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10011 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10014 emit_insn (tmp);
10017 /* X is an unchanging MEM. If it is a constant pool reference, return
10018 the constant pool rtx, else NULL. */
10021 maybe_get_pool_constant (rtx x)
10023 x = ix86_delegitimize_address (XEXP (x, 0));
10025 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10026 return get_pool_constant (x);
10028 return NULL_RTX;
10031 void
10032 ix86_expand_move (enum machine_mode mode, rtx operands[])
10034 int strict = (reload_in_progress || reload_completed);
10035 rtx op0, op1;
10036 enum tls_model model;
10038 op0 = operands[0];
10039 op1 = operands[1];
10041 if (GET_CODE (op1) == SYMBOL_REF)
10043 model = SYMBOL_REF_TLS_MODEL (op1);
10044 if (model)
10046 op1 = legitimize_tls_address (op1, model, true);
10047 op1 = force_operand (op1, op0);
10048 if (op1 == op0)
10049 return;
10051 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10052 && SYMBOL_REF_DLLIMPORT_P (op1))
10053 op1 = legitimize_dllimport_symbol (op1, false);
10055 else if (GET_CODE (op1) == CONST
10056 && GET_CODE (XEXP (op1, 0)) == PLUS
10057 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10059 rtx addend = XEXP (XEXP (op1, 0), 1);
10060 rtx symbol = XEXP (XEXP (op1, 0), 0);
10061 rtx tmp = NULL;
10063 model = SYMBOL_REF_TLS_MODEL (symbol);
10064 if (model)
10065 tmp = legitimize_tls_address (symbol, model, true);
10066 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10067 && SYMBOL_REF_DLLIMPORT_P (symbol))
10068 tmp = legitimize_dllimport_symbol (symbol, true);
10070 if (tmp)
10072 tmp = force_operand (tmp, NULL);
10073 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10074 op0, 1, OPTAB_DIRECT);
10075 if (tmp == op0)
10076 return;
10080 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10082 if (TARGET_MACHO && !TARGET_64BIT)
10084 #if TARGET_MACHO
10085 if (MACHOPIC_PURE)
10087 rtx temp = ((reload_in_progress
10088 || ((op0 && REG_P (op0))
10089 && mode == Pmode))
10090 ? op0 : gen_reg_rtx (Pmode));
10091 op1 = machopic_indirect_data_reference (op1, temp);
10092 op1 = machopic_legitimize_pic_address (op1, mode,
10093 temp == op1 ? 0 : temp);
10095 else if (MACHOPIC_INDIRECT)
10096 op1 = machopic_indirect_data_reference (op1, 0);
10097 if (op0 == op1)
10098 return;
10099 #endif
10101 else
10103 if (MEM_P (op0))
10104 op1 = force_reg (Pmode, op1);
10105 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10107 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10108 op1 = legitimize_pic_address (op1, reg);
10109 if (op0 == op1)
10110 return;
10114 else
10116 if (MEM_P (op0)
10117 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10118 || !push_operand (op0, mode))
10119 && MEM_P (op1))
10120 op1 = force_reg (mode, op1);
10122 if (push_operand (op0, mode)
10123 && ! general_no_elim_operand (op1, mode))
10124 op1 = copy_to_mode_reg (mode, op1);
10126 /* Force large constants in 64bit compilation into register
10127 to get them CSEed. */
10128 if (TARGET_64BIT && mode == DImode
10129 && immediate_operand (op1, mode)
10130 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10131 && !register_operand (op0, mode)
10132 && optimize && !reload_completed && !reload_in_progress)
10133 op1 = copy_to_mode_reg (mode, op1);
10135 if (FLOAT_MODE_P (mode))
10137 /* If we are loading a floating point constant to a register,
10138 force the value to memory now, since we'll get better code
10139 out the back end. */
10141 if (strict)
10143 else if (GET_CODE (op1) == CONST_DOUBLE)
10145 op1 = validize_mem (force_const_mem (mode, op1));
10146 if (!register_operand (op0, mode))
10148 rtx temp = gen_reg_rtx (mode);
10149 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10150 emit_move_insn (op0, temp);
10151 return;
10157 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10160 void
10161 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10163 rtx op0 = operands[0], op1 = operands[1];
10164 unsigned int align = GET_MODE_ALIGNMENT (mode);
10166 /* Force constants other than zero into memory. We do not know how
10167 the instructions used to build constants modify the upper 64 bits
10168 of the register, once we have that information we may be able
10169 to handle some of them more efficiently. */
10170 if ((reload_in_progress | reload_completed) == 0
10171 && register_operand (op0, mode)
10172 && (CONSTANT_P (op1)
10173 || (GET_CODE (op1) == SUBREG
10174 && CONSTANT_P (SUBREG_REG (op1))))
10175 && standard_sse_constant_p (op1) <= 0)
10176 op1 = validize_mem (force_const_mem (mode, op1));
10178 /* TDmode values are passed as TImode on the stack. Timode values
10179 are moved via xmm registers, and moving them to stack can result in
10180 unaligned memory access. Use ix86_expand_vector_move_misalign()
10181 if memory operand is not aligned correctly. */
10182 if (can_create_pseudo_p ()
10183 && (mode == TImode) && !TARGET_64BIT
10184 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10185 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10187 rtx tmp[2];
10189 /* ix86_expand_vector_move_misalign() does not like constants ... */
10190 if (CONSTANT_P (op1)
10191 || (GET_CODE (op1) == SUBREG
10192 && CONSTANT_P (SUBREG_REG (op1))))
10193 op1 = validize_mem (force_const_mem (mode, op1));
10195 /* ... nor both arguments in memory. */
10196 if (!register_operand (op0, mode)
10197 && !register_operand (op1, mode))
10198 op1 = force_reg (mode, op1);
10200 tmp[0] = op0; tmp[1] = op1;
10201 ix86_expand_vector_move_misalign (mode, tmp);
10202 return;
10205 /* Make operand1 a register if it isn't already. */
10206 if (can_create_pseudo_p ()
10207 && !register_operand (op0, mode)
10208 && !register_operand (op1, mode))
10210 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10211 return;
10214 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10217 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10218 straight to ix86_expand_vector_move. */
10219 /* Code generation for scalar reg-reg moves of single and double precision data:
10220 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10221 movaps reg, reg
10222 else
10223 movss reg, reg
10224 if (x86_sse_partial_reg_dependency == true)
10225 movapd reg, reg
10226 else
10227 movsd reg, reg
10229 Code generation for scalar loads of double precision data:
10230 if (x86_sse_split_regs == true)
10231 movlpd mem, reg (gas syntax)
10232 else
10233 movsd mem, reg
10235 Code generation for unaligned packed loads of single precision data
10236 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10237 if (x86_sse_unaligned_move_optimal)
10238 movups mem, reg
10240 if (x86_sse_partial_reg_dependency == true)
10242 xorps reg, reg
10243 movlps mem, reg
10244 movhps mem+8, reg
10246 else
10248 movlps mem, reg
10249 movhps mem+8, reg
10252 Code generation for unaligned packed loads of double precision data
10253 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10254 if (x86_sse_unaligned_move_optimal)
10255 movupd mem, reg
10257 if (x86_sse_split_regs == true)
10259 movlpd mem, reg
10260 movhpd mem+8, reg
10262 else
10264 movsd mem, reg
10265 movhpd mem+8, reg
10269 void
10270 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10272 rtx op0, op1, m;
10274 op0 = operands[0];
10275 op1 = operands[1];
10277 if (MEM_P (op1))
10279 /* If we're optimizing for size, movups is the smallest. */
10280 if (optimize_size)
10282 op0 = gen_lowpart (V4SFmode, op0);
10283 op1 = gen_lowpart (V4SFmode, op1);
10284 emit_insn (gen_sse_movups (op0, op1));
10285 return;
10288 /* ??? If we have typed data, then it would appear that using
10289 movdqu is the only way to get unaligned data loaded with
10290 integer type. */
10291 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10293 op0 = gen_lowpart (V16QImode, op0);
10294 op1 = gen_lowpart (V16QImode, op1);
10295 emit_insn (gen_sse2_movdqu (op0, op1));
10296 return;
10299 if (TARGET_SSE2 && mode == V2DFmode)
10301 rtx zero;
10303 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10305 op0 = gen_lowpart (V2DFmode, op0);
10306 op1 = gen_lowpart (V2DFmode, op1);
10307 emit_insn (gen_sse2_movupd (op0, op1));
10308 return;
10311 /* When SSE registers are split into halves, we can avoid
10312 writing to the top half twice. */
10313 if (TARGET_SSE_SPLIT_REGS)
10315 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10316 zero = op0;
10318 else
10320 /* ??? Not sure about the best option for the Intel chips.
10321 The following would seem to satisfy; the register is
10322 entirely cleared, breaking the dependency chain. We
10323 then store to the upper half, with a dependency depth
10324 of one. A rumor has it that Intel recommends two movsd
10325 followed by an unpacklpd, but this is unconfirmed. And
10326 given that the dependency depth of the unpacklpd would
10327 still be one, I'm not sure why this would be better. */
10328 zero = CONST0_RTX (V2DFmode);
10331 m = adjust_address (op1, DFmode, 0);
10332 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10333 m = adjust_address (op1, DFmode, 8);
10334 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10336 else
10338 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10340 op0 = gen_lowpart (V4SFmode, op0);
10341 op1 = gen_lowpart (V4SFmode, op1);
10342 emit_insn (gen_sse_movups (op0, op1));
10343 return;
10346 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10347 emit_move_insn (op0, CONST0_RTX (mode));
10348 else
10349 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10351 if (mode != V4SFmode)
10352 op0 = gen_lowpart (V4SFmode, op0);
10353 m = adjust_address (op1, V2SFmode, 0);
10354 emit_insn (gen_sse_loadlps (op0, op0, m));
10355 m = adjust_address (op1, V2SFmode, 8);
10356 emit_insn (gen_sse_loadhps (op0, op0, m));
10359 else if (MEM_P (op0))
10361 /* If we're optimizing for size, movups is the smallest. */
10362 if (optimize_size)
10364 op0 = gen_lowpart (V4SFmode, op0);
10365 op1 = gen_lowpart (V4SFmode, op1);
10366 emit_insn (gen_sse_movups (op0, op1));
10367 return;
10370 /* ??? Similar to above, only less clear because of quote
10371 typeless stores unquote. */
10372 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10373 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10375 op0 = gen_lowpart (V16QImode, op0);
10376 op1 = gen_lowpart (V16QImode, op1);
10377 emit_insn (gen_sse2_movdqu (op0, op1));
10378 return;
10381 if (TARGET_SSE2 && mode == V2DFmode)
10383 m = adjust_address (op0, DFmode, 0);
10384 emit_insn (gen_sse2_storelpd (m, op1));
10385 m = adjust_address (op0, DFmode, 8);
10386 emit_insn (gen_sse2_storehpd (m, op1));
10388 else
10390 if (mode != V4SFmode)
10391 op1 = gen_lowpart (V4SFmode, op1);
10392 m = adjust_address (op0, V2SFmode, 0);
10393 emit_insn (gen_sse_storelps (m, op1));
10394 m = adjust_address (op0, V2SFmode, 8);
10395 emit_insn (gen_sse_storehps (m, op1));
10398 else
10399 gcc_unreachable ();
10402 /* Expand a push in MODE. This is some mode for which we do not support
10403 proper push instructions, at least from the registers that we expect
10404 the value to live in. */
10406 void
10407 ix86_expand_push (enum machine_mode mode, rtx x)
10409 rtx tmp;
10411 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10412 GEN_INT (-GET_MODE_SIZE (mode)),
10413 stack_pointer_rtx, 1, OPTAB_DIRECT);
10414 if (tmp != stack_pointer_rtx)
10415 emit_move_insn (stack_pointer_rtx, tmp);
10417 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10418 emit_move_insn (tmp, x);
10421 /* Helper function of ix86_fixup_binary_operands to canonicalize
10422 operand order. Returns true if the operands should be swapped. */
10424 static bool
10425 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10426 rtx operands[])
10428 rtx dst = operands[0];
10429 rtx src1 = operands[1];
10430 rtx src2 = operands[2];
10432 /* If the operation is not commutative, we can't do anything. */
10433 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10434 return false;
10436 /* Highest priority is that src1 should match dst. */
10437 if (rtx_equal_p (dst, src1))
10438 return false;
10439 if (rtx_equal_p (dst, src2))
10440 return true;
10442 /* Next highest priority is that immediate constants come second. */
10443 if (immediate_operand (src2, mode))
10444 return false;
10445 if (immediate_operand (src1, mode))
10446 return true;
10448 /* Lowest priority is that memory references should come second. */
10449 if (MEM_P (src2))
10450 return false;
10451 if (MEM_P (src1))
10452 return true;
10454 return false;
10458 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10459 destination to use for the operation. If different from the true
10460 destination in operands[0], a copy operation will be required. */
10463 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10464 rtx operands[])
10466 rtx dst = operands[0];
10467 rtx src1 = operands[1];
10468 rtx src2 = operands[2];
10470 /* Canonicalize operand order. */
10471 if (ix86_swap_binary_operands_p (code, mode, operands))
10473 rtx temp = src1;
10474 src1 = src2;
10475 src2 = temp;
10478 /* Both source operands cannot be in memory. */
10479 if (MEM_P (src1) && MEM_P (src2))
10481 /* Optimization: Only read from memory once. */
10482 if (rtx_equal_p (src1, src2))
10484 src2 = force_reg (mode, src2);
10485 src1 = src2;
10487 else
10488 src2 = force_reg (mode, src2);
10491 /* If the destination is memory, and we do not have matching source
10492 operands, do things in registers. */
10493 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10494 dst = gen_reg_rtx (mode);
10496 /* Source 1 cannot be a constant. */
10497 if (CONSTANT_P (src1))
10498 src1 = force_reg (mode, src1);
10500 /* Source 1 cannot be a non-matching memory. */
10501 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10502 src1 = force_reg (mode, src1);
10504 operands[1] = src1;
10505 operands[2] = src2;
10506 return dst;
10509 /* Similarly, but assume that the destination has already been
10510 set up properly. */
10512 void
10513 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10514 enum machine_mode mode, rtx operands[])
10516 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10517 gcc_assert (dst == operands[0]);
10520 /* Attempt to expand a binary operator. Make the expansion closer to the
10521 actual machine, then just general_operand, which will allow 3 separate
10522 memory references (one output, two input) in a single insn. */
10524 void
10525 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10526 rtx operands[])
10528 rtx src1, src2, dst, op, clob;
10530 dst = ix86_fixup_binary_operands (code, mode, operands);
10531 src1 = operands[1];
10532 src2 = operands[2];
10534 /* Emit the instruction. */
10536 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10537 if (reload_in_progress)
10539 /* Reload doesn't know about the flags register, and doesn't know that
10540 it doesn't want to clobber it. We can only do this with PLUS. */
10541 gcc_assert (code == PLUS);
10542 emit_insn (op);
10544 else
10546 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10547 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10550 /* Fix up the destination if needed. */
10551 if (dst != operands[0])
10552 emit_move_insn (operands[0], dst);
10555 /* Return TRUE or FALSE depending on whether the binary operator meets the
10556 appropriate constraints. */
10559 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10560 rtx operands[3])
10562 rtx dst = operands[0];
10563 rtx src1 = operands[1];
10564 rtx src2 = operands[2];
10566 /* Both source operands cannot be in memory. */
10567 if (MEM_P (src1) && MEM_P (src2))
10568 return 0;
10570 /* Canonicalize operand order for commutative operators. */
10571 if (ix86_swap_binary_operands_p (code, mode, operands))
10573 rtx temp = src1;
10574 src1 = src2;
10575 src2 = temp;
10578 /* If the destination is memory, we must have a matching source operand. */
10579 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10580 return 0;
10582 /* Source 1 cannot be a constant. */
10583 if (CONSTANT_P (src1))
10584 return 0;
10586 /* Source 1 cannot be a non-matching memory. */
10587 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10588 return 0;
10590 return 1;
10593 /* Attempt to expand a unary operator. Make the expansion closer to the
10594 actual machine, then just general_operand, which will allow 2 separate
10595 memory references (one output, one input) in a single insn. */
10597 void
10598 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10599 rtx operands[])
10601 int matching_memory;
10602 rtx src, dst, op, clob;
10604 dst = operands[0];
10605 src = operands[1];
10607 /* If the destination is memory, and we do not have matching source
10608 operands, do things in registers. */
10609 matching_memory = 0;
10610 if (MEM_P (dst))
10612 if (rtx_equal_p (dst, src))
10613 matching_memory = 1;
10614 else
10615 dst = gen_reg_rtx (mode);
10618 /* When source operand is memory, destination must match. */
10619 if (MEM_P (src) && !matching_memory)
10620 src = force_reg (mode, src);
10622 /* Emit the instruction. */
10624 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10625 if (reload_in_progress || code == NOT)
10627 /* Reload doesn't know about the flags register, and doesn't know that
10628 it doesn't want to clobber it. */
10629 gcc_assert (code == NOT);
10630 emit_insn (op);
10632 else
10634 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10635 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10638 /* Fix up the destination if needed. */
10639 if (dst != operands[0])
10640 emit_move_insn (operands[0], dst);
10643 /* Return TRUE or FALSE depending on whether the unary operator meets the
10644 appropriate constraints. */
10647 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10648 enum machine_mode mode ATTRIBUTE_UNUSED,
10649 rtx operands[2] ATTRIBUTE_UNUSED)
10651 /* If one of operands is memory, source and destination must match. */
10652 if ((MEM_P (operands[0])
10653 || MEM_P (operands[1]))
10654 && ! rtx_equal_p (operands[0], operands[1]))
10655 return FALSE;
10656 return TRUE;
10659 /* Post-reload splitter for converting an SF or DFmode value in an
10660 SSE register into an unsigned SImode. */
10662 void
10663 ix86_split_convert_uns_si_sse (rtx operands[])
10665 enum machine_mode vecmode;
10666 rtx value, large, zero_or_two31, input, two31, x;
10668 large = operands[1];
10669 zero_or_two31 = operands[2];
10670 input = operands[3];
10671 two31 = operands[4];
10672 vecmode = GET_MODE (large);
10673 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10675 /* Load up the value into the low element. We must ensure that the other
10676 elements are valid floats -- zero is the easiest such value. */
10677 if (MEM_P (input))
10679 if (vecmode == V4SFmode)
10680 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10681 else
10682 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10684 else
10686 input = gen_rtx_REG (vecmode, REGNO (input));
10687 emit_move_insn (value, CONST0_RTX (vecmode));
10688 if (vecmode == V4SFmode)
10689 emit_insn (gen_sse_movss (value, value, input));
10690 else
10691 emit_insn (gen_sse2_movsd (value, value, input));
10694 emit_move_insn (large, two31);
10695 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10697 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10698 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10700 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10701 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10703 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10704 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10706 large = gen_rtx_REG (V4SImode, REGNO (large));
10707 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10709 x = gen_rtx_REG (V4SImode, REGNO (value));
10710 if (vecmode == V4SFmode)
10711 emit_insn (gen_sse2_cvttps2dq (x, value));
10712 else
10713 emit_insn (gen_sse2_cvttpd2dq (x, value));
10714 value = x;
10716 emit_insn (gen_xorv4si3 (value, value, large));
10719 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10720 Expects the 64-bit DImode to be supplied in a pair of integral
10721 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10722 -mfpmath=sse, !optimize_size only. */
10724 void
10725 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10727 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10728 rtx int_xmm, fp_xmm;
10729 rtx biases, exponents;
10730 rtx x;
10732 int_xmm = gen_reg_rtx (V4SImode);
10733 if (TARGET_INTER_UNIT_MOVES)
10734 emit_insn (gen_movdi_to_sse (int_xmm, input));
10735 else if (TARGET_SSE_SPLIT_REGS)
10737 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10738 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10740 else
10742 x = gen_reg_rtx (V2DImode);
10743 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10744 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10747 x = gen_rtx_CONST_VECTOR (V4SImode,
10748 gen_rtvec (4, GEN_INT (0x43300000UL),
10749 GEN_INT (0x45300000UL),
10750 const0_rtx, const0_rtx));
10751 exponents = validize_mem (force_const_mem (V4SImode, x));
10753 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10754 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10756 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10757 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10758 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10759 (0x1.0p84 + double(fp_value_hi_xmm)).
10760 Note these exponents differ by 32. */
10762 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10764 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10765 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10766 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10767 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10768 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10769 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10770 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10771 biases = validize_mem (force_const_mem (V2DFmode, biases));
10772 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10774 /* Add the upper and lower DFmode values together. */
10775 if (TARGET_SSE3)
10776 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10777 else
10779 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10780 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10781 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10784 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10787 /* Convert an unsigned SImode value into a DFmode. Only currently used
10788 for SSE, but applicable anywhere. */
10790 void
10791 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10793 REAL_VALUE_TYPE TWO31r;
10794 rtx x, fp;
10796 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10797 NULL, 1, OPTAB_DIRECT);
10799 fp = gen_reg_rtx (DFmode);
10800 emit_insn (gen_floatsidf2 (fp, x));
10802 real_ldexp (&TWO31r, &dconst1, 31);
10803 x = const_double_from_real_value (TWO31r, DFmode);
10805 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10806 if (x != target)
10807 emit_move_insn (target, x);
10810 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10811 32-bit mode; otherwise we have a direct convert instruction. */
10813 void
10814 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10816 REAL_VALUE_TYPE TWO32r;
10817 rtx fp_lo, fp_hi, x;
10819 fp_lo = gen_reg_rtx (DFmode);
10820 fp_hi = gen_reg_rtx (DFmode);
10822 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10824 real_ldexp (&TWO32r, &dconst1, 32);
10825 x = const_double_from_real_value (TWO32r, DFmode);
10826 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10828 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10830 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10831 0, OPTAB_DIRECT);
10832 if (x != target)
10833 emit_move_insn (target, x);
10836 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10837 For x86_32, -mfpmath=sse, !optimize_size only. */
10838 void
10839 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10841 REAL_VALUE_TYPE ONE16r;
10842 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10844 real_ldexp (&ONE16r, &dconst1, 16);
10845 x = const_double_from_real_value (ONE16r, SFmode);
10846 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10847 NULL, 0, OPTAB_DIRECT);
10848 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10849 NULL, 0, OPTAB_DIRECT);
10850 fp_hi = gen_reg_rtx (SFmode);
10851 fp_lo = gen_reg_rtx (SFmode);
10852 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10853 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10854 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10855 0, OPTAB_DIRECT);
10856 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10857 0, OPTAB_DIRECT);
10858 if (!rtx_equal_p (target, fp_hi))
10859 emit_move_insn (target, fp_hi);
10862 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10863 then replicate the value for all elements of the vector
10864 register. */
10867 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10869 rtvec v;
10870 switch (mode)
10872 case SImode:
10873 gcc_assert (vect);
10874 v = gen_rtvec (4, value, value, value, value);
10875 return gen_rtx_CONST_VECTOR (V4SImode, v);
10877 case DImode:
10878 gcc_assert (vect);
10879 v = gen_rtvec (2, value, value);
10880 return gen_rtx_CONST_VECTOR (V2DImode, v);
10882 case SFmode:
10883 if (vect)
10884 v = gen_rtvec (4, value, value, value, value);
10885 else
10886 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10887 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10888 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10890 case DFmode:
10891 if (vect)
10892 v = gen_rtvec (2, value, value);
10893 else
10894 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10895 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10897 default:
10898 gcc_unreachable ();
10902 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10903 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10904 for an SSE register. If VECT is true, then replicate the mask for
10905 all elements of the vector register. If INVERT is true, then create
10906 a mask excluding the sign bit. */
10909 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10911 enum machine_mode vec_mode, imode;
10912 HOST_WIDE_INT hi, lo;
10913 int shift = 63;
10914 rtx v;
10915 rtx mask;
10917 /* Find the sign bit, sign extended to 2*HWI. */
10918 switch (mode)
10920 case SImode:
10921 case SFmode:
10922 imode = SImode;
10923 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10924 lo = 0x80000000, hi = lo < 0;
10925 break;
10927 case DImode:
10928 case DFmode:
10929 imode = DImode;
10930 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10931 if (HOST_BITS_PER_WIDE_INT >= 64)
10932 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10933 else
10934 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10935 break;
10937 case TImode:
10938 case TFmode:
10939 imode = TImode;
10940 vec_mode = VOIDmode;
10941 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10942 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10943 break;
10945 default:
10946 gcc_unreachable ();
10949 if (invert)
10950 lo = ~lo, hi = ~hi;
10952 /* Force this value into the low part of a fp vector constant. */
10953 mask = immed_double_const (lo, hi, imode);
10954 mask = gen_lowpart (mode, mask);
10956 if (vec_mode == VOIDmode)
10957 return force_reg (mode, mask);
10959 v = ix86_build_const_vector (mode, vect, mask);
10960 return force_reg (vec_mode, v);
10963 /* Generate code for floating point ABS or NEG. */
10965 void
10966 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10967 rtx operands[])
10969 rtx mask, set, use, clob, dst, src;
10970 bool matching_memory;
10971 bool use_sse = false;
10972 bool vector_mode = VECTOR_MODE_P (mode);
10973 enum machine_mode elt_mode = mode;
10975 if (vector_mode)
10977 elt_mode = GET_MODE_INNER (mode);
10978 use_sse = true;
10980 else if (mode == TFmode)
10981 use_sse = true;
10982 else if (TARGET_SSE_MATH)
10983 use_sse = SSE_FLOAT_MODE_P (mode);
10985 /* NEG and ABS performed with SSE use bitwise mask operations.
10986 Create the appropriate mask now. */
10987 if (use_sse)
10988 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10989 else
10990 mask = NULL_RTX;
10992 dst = operands[0];
10993 src = operands[1];
10995 /* If the destination is memory, and we don't have matching source
10996 operands or we're using the x87, do things in registers. */
10997 matching_memory = false;
10998 if (MEM_P (dst))
11000 if (use_sse && rtx_equal_p (dst, src))
11001 matching_memory = true;
11002 else
11003 dst = gen_reg_rtx (mode);
11005 if (MEM_P (src) && !matching_memory)
11006 src = force_reg (mode, src);
11008 if (vector_mode)
11010 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11011 set = gen_rtx_SET (VOIDmode, dst, set);
11012 emit_insn (set);
11014 else
11016 set = gen_rtx_fmt_e (code, mode, src);
11017 set = gen_rtx_SET (VOIDmode, dst, set);
11018 if (mask)
11020 use = gen_rtx_USE (VOIDmode, mask);
11021 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11022 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11023 gen_rtvec (3, set, use, clob)));
11025 else
11026 emit_insn (set);
11029 if (dst != operands[0])
11030 emit_move_insn (operands[0], dst);
11033 /* Expand a copysign operation. Special case operand 0 being a constant. */
11035 void
11036 ix86_expand_copysign (rtx operands[])
11038 enum machine_mode mode, vmode;
11039 rtx dest, op0, op1, mask, nmask;
11041 dest = operands[0];
11042 op0 = operands[1];
11043 op1 = operands[2];
11045 mode = GET_MODE (dest);
11046 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11048 if (GET_CODE (op0) == CONST_DOUBLE)
11050 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11052 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11053 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11055 if (mode == SFmode || mode == DFmode)
11057 if (op0 == CONST0_RTX (mode))
11058 op0 = CONST0_RTX (vmode);
11059 else
11061 rtvec v;
11063 if (mode == SFmode)
11064 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11065 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11066 else
11067 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11068 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11072 mask = ix86_build_signbit_mask (mode, 0, 0);
11074 if (mode == SFmode)
11075 copysign_insn = gen_copysignsf3_const;
11076 else if (mode == DFmode)
11077 copysign_insn = gen_copysigndf3_const;
11078 else
11079 copysign_insn = gen_copysigntf3_const;
11081 emit_insn (copysign_insn (dest, op0, op1, mask));
11083 else
11085 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11087 nmask = ix86_build_signbit_mask (mode, 0, 1);
11088 mask = ix86_build_signbit_mask (mode, 0, 0);
11090 if (mode == SFmode)
11091 copysign_insn = gen_copysignsf3_var;
11092 else if (mode == DFmode)
11093 copysign_insn = gen_copysigndf3_var;
11094 else
11095 copysign_insn = gen_copysigntf3_var;
11097 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11101 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11102 be a constant, and so has already been expanded into a vector constant. */
11104 void
11105 ix86_split_copysign_const (rtx operands[])
11107 enum machine_mode mode, vmode;
11108 rtx dest, op0, op1, mask, x;
11110 dest = operands[0];
11111 op0 = operands[1];
11112 op1 = operands[2];
11113 mask = operands[3];
11115 mode = GET_MODE (dest);
11116 vmode = GET_MODE (mask);
11118 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11119 x = gen_rtx_AND (vmode, dest, mask);
11120 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11122 if (op0 != CONST0_RTX (vmode))
11124 x = gen_rtx_IOR (vmode, dest, op0);
11125 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11129 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11130 so we have to do two masks. */
11132 void
11133 ix86_split_copysign_var (rtx operands[])
11135 enum machine_mode mode, vmode;
11136 rtx dest, scratch, op0, op1, mask, nmask, x;
11138 dest = operands[0];
11139 scratch = operands[1];
11140 op0 = operands[2];
11141 op1 = operands[3];
11142 nmask = operands[4];
11143 mask = operands[5];
11145 mode = GET_MODE (dest);
11146 vmode = GET_MODE (mask);
11148 if (rtx_equal_p (op0, op1))
11150 /* Shouldn't happen often (it's useless, obviously), but when it does
11151 we'd generate incorrect code if we continue below. */
11152 emit_move_insn (dest, op0);
11153 return;
11156 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11158 gcc_assert (REGNO (op1) == REGNO (scratch));
11160 x = gen_rtx_AND (vmode, scratch, mask);
11161 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11163 dest = mask;
11164 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11165 x = gen_rtx_NOT (vmode, dest);
11166 x = gen_rtx_AND (vmode, x, op0);
11167 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11169 else
11171 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11173 x = gen_rtx_AND (vmode, scratch, mask);
11175 else /* alternative 2,4 */
11177 gcc_assert (REGNO (mask) == REGNO (scratch));
11178 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11179 x = gen_rtx_AND (vmode, scratch, op1);
11181 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11183 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11185 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11186 x = gen_rtx_AND (vmode, dest, nmask);
11188 else /* alternative 3,4 */
11190 gcc_assert (REGNO (nmask) == REGNO (dest));
11191 dest = nmask;
11192 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11193 x = gen_rtx_AND (vmode, dest, op0);
11195 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11198 x = gen_rtx_IOR (vmode, dest, scratch);
11199 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11202 /* Return TRUE or FALSE depending on whether the first SET in INSN
11203 has source and destination with matching CC modes, and that the
11204 CC mode is at least as constrained as REQ_MODE. */
11207 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11209 rtx set;
11210 enum machine_mode set_mode;
11212 set = PATTERN (insn);
11213 if (GET_CODE (set) == PARALLEL)
11214 set = XVECEXP (set, 0, 0);
11215 gcc_assert (GET_CODE (set) == SET);
11216 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11218 set_mode = GET_MODE (SET_DEST (set));
11219 switch (set_mode)
11221 case CCNOmode:
11222 if (req_mode != CCNOmode
11223 && (req_mode != CCmode
11224 || XEXP (SET_SRC (set), 1) != const0_rtx))
11225 return 0;
11226 break;
11227 case CCmode:
11228 if (req_mode == CCGCmode)
11229 return 0;
11230 /* FALLTHRU */
11231 case CCGCmode:
11232 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11233 return 0;
11234 /* FALLTHRU */
11235 case CCGOCmode:
11236 if (req_mode == CCZmode)
11237 return 0;
11238 /* FALLTHRU */
11239 case CCZmode:
11240 break;
11242 default:
11243 gcc_unreachable ();
11246 return (GET_MODE (SET_SRC (set)) == set_mode);
11249 /* Generate insn patterns to do an integer compare of OPERANDS. */
11251 static rtx
11252 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11254 enum machine_mode cmpmode;
11255 rtx tmp, flags;
11257 cmpmode = SELECT_CC_MODE (code, op0, op1);
11258 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11260 /* This is very simple, but making the interface the same as in the
11261 FP case makes the rest of the code easier. */
11262 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11263 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11265 /* Return the test that should be put into the flags user, i.e.
11266 the bcc, scc, or cmov instruction. */
11267 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11270 /* Figure out whether to use ordered or unordered fp comparisons.
11271 Return the appropriate mode to use. */
11273 enum machine_mode
11274 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11276 /* ??? In order to make all comparisons reversible, we do all comparisons
11277 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11278 all forms trapping and nontrapping comparisons, we can make inequality
11279 comparisons trapping again, since it results in better code when using
11280 FCOM based compares. */
11281 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11284 enum machine_mode
11285 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11287 enum machine_mode mode = GET_MODE (op0);
11289 if (SCALAR_FLOAT_MODE_P (mode))
11291 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11292 return ix86_fp_compare_mode (code);
11295 switch (code)
11297 /* Only zero flag is needed. */
11298 case EQ: /* ZF=0 */
11299 case NE: /* ZF!=0 */
11300 return CCZmode;
11301 /* Codes needing carry flag. */
11302 case GEU: /* CF=0 */
11303 case LTU: /* CF=1 */
11304 /* Detect overflow checks. They need just the carry flag. */
11305 if (GET_CODE (op0) == PLUS
11306 && rtx_equal_p (op1, XEXP (op0, 0)))
11307 return CCCmode;
11308 else
11309 return CCmode;
11310 case GTU: /* CF=0 & ZF=0 */
11311 case LEU: /* CF=1 | ZF=1 */
11312 /* Detect overflow checks. They need just the carry flag. */
11313 if (GET_CODE (op0) == MINUS
11314 && rtx_equal_p (op1, XEXP (op0, 0)))
11315 return CCCmode;
11316 else
11317 return CCmode;
11318 /* Codes possibly doable only with sign flag when
11319 comparing against zero. */
11320 case GE: /* SF=OF or SF=0 */
11321 case LT: /* SF<>OF or SF=1 */
11322 if (op1 == const0_rtx)
11323 return CCGOCmode;
11324 else
11325 /* For other cases Carry flag is not required. */
11326 return CCGCmode;
11327 /* Codes doable only with sign flag when comparing
11328 against zero, but we miss jump instruction for it
11329 so we need to use relational tests against overflow
11330 that thus needs to be zero. */
11331 case GT: /* ZF=0 & SF=OF */
11332 case LE: /* ZF=1 | SF<>OF */
11333 if (op1 == const0_rtx)
11334 return CCNOmode;
11335 else
11336 return CCGCmode;
11337 /* strcmp pattern do (use flags) and combine may ask us for proper
11338 mode. */
11339 case USE:
11340 return CCmode;
11341 default:
11342 gcc_unreachable ();
11346 /* Return the fixed registers used for condition codes. */
11348 static bool
11349 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11351 *p1 = FLAGS_REG;
11352 *p2 = FPSR_REG;
11353 return true;
11356 /* If two condition code modes are compatible, return a condition code
11357 mode which is compatible with both. Otherwise, return
11358 VOIDmode. */
11360 static enum machine_mode
11361 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11363 if (m1 == m2)
11364 return m1;
11366 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11367 return VOIDmode;
11369 if ((m1 == CCGCmode && m2 == CCGOCmode)
11370 || (m1 == CCGOCmode && m2 == CCGCmode))
11371 return CCGCmode;
11373 switch (m1)
11375 default:
11376 gcc_unreachable ();
11378 case CCmode:
11379 case CCGCmode:
11380 case CCGOCmode:
11381 case CCNOmode:
11382 case CCAmode:
11383 case CCCmode:
11384 case CCOmode:
11385 case CCSmode:
11386 case CCZmode:
11387 switch (m2)
11389 default:
11390 return VOIDmode;
11392 case CCmode:
11393 case CCGCmode:
11394 case CCGOCmode:
11395 case CCNOmode:
11396 case CCAmode:
11397 case CCCmode:
11398 case CCOmode:
11399 case CCSmode:
11400 case CCZmode:
11401 return CCmode;
11404 case CCFPmode:
11405 case CCFPUmode:
11406 /* These are only compatible with themselves, which we already
11407 checked above. */
11408 return VOIDmode;
11412 /* Split comparison code CODE into comparisons we can do using branch
11413 instructions. BYPASS_CODE is comparison code for branch that will
11414 branch around FIRST_CODE and SECOND_CODE. If some of branches
11415 is not required, set value to UNKNOWN.
11416 We never require more than two branches. */
11418 void
11419 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11420 enum rtx_code *first_code,
11421 enum rtx_code *second_code)
11423 *first_code = code;
11424 *bypass_code = UNKNOWN;
11425 *second_code = UNKNOWN;
11427 /* The fcomi comparison sets flags as follows:
11429 cmp ZF PF CF
11430 > 0 0 0
11431 < 0 0 1
11432 = 1 0 0
11433 un 1 1 1 */
11435 switch (code)
11437 case GT: /* GTU - CF=0 & ZF=0 */
11438 case GE: /* GEU - CF=0 */
11439 case ORDERED: /* PF=0 */
11440 case UNORDERED: /* PF=1 */
11441 case UNEQ: /* EQ - ZF=1 */
11442 case UNLT: /* LTU - CF=1 */
11443 case UNLE: /* LEU - CF=1 | ZF=1 */
11444 case LTGT: /* EQ - ZF=0 */
11445 break;
11446 case LT: /* LTU - CF=1 - fails on unordered */
11447 *first_code = UNLT;
11448 *bypass_code = UNORDERED;
11449 break;
11450 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11451 *first_code = UNLE;
11452 *bypass_code = UNORDERED;
11453 break;
11454 case EQ: /* EQ - ZF=1 - fails on unordered */
11455 *first_code = UNEQ;
11456 *bypass_code = UNORDERED;
11457 break;
11458 case NE: /* NE - ZF=0 - fails on unordered */
11459 *first_code = LTGT;
11460 *second_code = UNORDERED;
11461 break;
11462 case UNGE: /* GEU - CF=0 - fails on unordered */
11463 *first_code = GE;
11464 *second_code = UNORDERED;
11465 break;
11466 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11467 *first_code = GT;
11468 *second_code = UNORDERED;
11469 break;
11470 default:
11471 gcc_unreachable ();
11473 if (!TARGET_IEEE_FP)
11475 *second_code = UNKNOWN;
11476 *bypass_code = UNKNOWN;
11480 /* Return cost of comparison done fcom + arithmetics operations on AX.
11481 All following functions do use number of instructions as a cost metrics.
11482 In future this should be tweaked to compute bytes for optimize_size and
11483 take into account performance of various instructions on various CPUs. */
11484 static int
11485 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11487 if (!TARGET_IEEE_FP)
11488 return 4;
11489 /* The cost of code output by ix86_expand_fp_compare. */
11490 switch (code)
11492 case UNLE:
11493 case UNLT:
11494 case LTGT:
11495 case GT:
11496 case GE:
11497 case UNORDERED:
11498 case ORDERED:
11499 case UNEQ:
11500 return 4;
11501 break;
11502 case LT:
11503 case NE:
11504 case EQ:
11505 case UNGE:
11506 return 5;
11507 break;
11508 case LE:
11509 case UNGT:
11510 return 6;
11511 break;
11512 default:
11513 gcc_unreachable ();
11517 /* Return cost of comparison done using fcomi operation.
11518 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11519 static int
11520 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11522 enum rtx_code bypass_code, first_code, second_code;
11523 /* Return arbitrarily high cost when instruction is not supported - this
11524 prevents gcc from using it. */
11525 if (!TARGET_CMOVE)
11526 return 1024;
11527 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11528 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11531 /* Return cost of comparison done using sahf operation.
11532 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11533 static int
11534 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11536 enum rtx_code bypass_code, first_code, second_code;
11537 /* Return arbitrarily high cost when instruction is not preferred - this
11538 avoids gcc from using it. */
11539 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11540 return 1024;
11541 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11542 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11545 /* Compute cost of the comparison done using any method.
11546 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11547 static int
11548 ix86_fp_comparison_cost (enum rtx_code code)
11550 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11551 int min;
11553 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11554 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11556 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11557 if (min > sahf_cost)
11558 min = sahf_cost;
11559 if (min > fcomi_cost)
11560 min = fcomi_cost;
11561 return min;
11564 /* Return true if we should use an FCOMI instruction for this
11565 fp comparison. */
11568 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11570 enum rtx_code swapped_code = swap_condition (code);
11572 return ((ix86_fp_comparison_cost (code)
11573 == ix86_fp_comparison_fcomi_cost (code))
11574 || (ix86_fp_comparison_cost (swapped_code)
11575 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11578 /* Swap, force into registers, or otherwise massage the two operands
11579 to a fp comparison. The operands are updated in place; the new
11580 comparison code is returned. */
11582 static enum rtx_code
11583 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11585 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11586 rtx op0 = *pop0, op1 = *pop1;
11587 enum machine_mode op_mode = GET_MODE (op0);
11588 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11590 /* All of the unordered compare instructions only work on registers.
11591 The same is true of the fcomi compare instructions. The XFmode
11592 compare instructions require registers except when comparing
11593 against zero or when converting operand 1 from fixed point to
11594 floating point. */
11596 if (!is_sse
11597 && (fpcmp_mode == CCFPUmode
11598 || (op_mode == XFmode
11599 && ! (standard_80387_constant_p (op0) == 1
11600 || standard_80387_constant_p (op1) == 1)
11601 && GET_CODE (op1) != FLOAT)
11602 || ix86_use_fcomi_compare (code)))
11604 op0 = force_reg (op_mode, op0);
11605 op1 = force_reg (op_mode, op1);
11607 else
11609 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11610 things around if they appear profitable, otherwise force op0
11611 into a register. */
11613 if (standard_80387_constant_p (op0) == 0
11614 || (MEM_P (op0)
11615 && ! (standard_80387_constant_p (op1) == 0
11616 || MEM_P (op1))))
11618 rtx tmp;
11619 tmp = op0, op0 = op1, op1 = tmp;
11620 code = swap_condition (code);
11623 if (!REG_P (op0))
11624 op0 = force_reg (op_mode, op0);
11626 if (CONSTANT_P (op1))
11628 int tmp = standard_80387_constant_p (op1);
11629 if (tmp == 0)
11630 op1 = validize_mem (force_const_mem (op_mode, op1));
11631 else if (tmp == 1)
11633 if (TARGET_CMOVE)
11634 op1 = force_reg (op_mode, op1);
11636 else
11637 op1 = force_reg (op_mode, op1);
11641 /* Try to rearrange the comparison to make it cheaper. */
11642 if (ix86_fp_comparison_cost (code)
11643 > ix86_fp_comparison_cost (swap_condition (code))
11644 && (REG_P (op1) || can_create_pseudo_p ()))
11646 rtx tmp;
11647 tmp = op0, op0 = op1, op1 = tmp;
11648 code = swap_condition (code);
11649 if (!REG_P (op0))
11650 op0 = force_reg (op_mode, op0);
11653 *pop0 = op0;
11654 *pop1 = op1;
11655 return code;
11658 /* Convert comparison codes we use to represent FP comparison to integer
11659 code that will result in proper branch. Return UNKNOWN if no such code
11660 is available. */
11662 enum rtx_code
11663 ix86_fp_compare_code_to_integer (enum rtx_code code)
11665 switch (code)
11667 case GT:
11668 return GTU;
11669 case GE:
11670 return GEU;
11671 case ORDERED:
11672 case UNORDERED:
11673 return code;
11674 break;
11675 case UNEQ:
11676 return EQ;
11677 break;
11678 case UNLT:
11679 return LTU;
11680 break;
11681 case UNLE:
11682 return LEU;
11683 break;
11684 case LTGT:
11685 return NE;
11686 break;
11687 default:
11688 return UNKNOWN;
11692 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11694 static rtx
11695 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11696 rtx *second_test, rtx *bypass_test)
11698 enum machine_mode fpcmp_mode, intcmp_mode;
11699 rtx tmp, tmp2;
11700 int cost = ix86_fp_comparison_cost (code);
11701 enum rtx_code bypass_code, first_code, second_code;
11703 fpcmp_mode = ix86_fp_compare_mode (code);
11704 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11706 if (second_test)
11707 *second_test = NULL_RTX;
11708 if (bypass_test)
11709 *bypass_test = NULL_RTX;
11711 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11713 /* Do fcomi/sahf based test when profitable. */
11714 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11715 && (bypass_code == UNKNOWN || bypass_test)
11716 && (second_code == UNKNOWN || second_test))
11718 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11719 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11720 tmp);
11721 if (TARGET_CMOVE)
11722 emit_insn (tmp);
11723 else
11725 gcc_assert (TARGET_SAHF);
11727 if (!scratch)
11728 scratch = gen_reg_rtx (HImode);
11729 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11731 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11734 /* The FP codes work out to act like unsigned. */
11735 intcmp_mode = fpcmp_mode;
11736 code = first_code;
11737 if (bypass_code != UNKNOWN)
11738 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11739 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11740 const0_rtx);
11741 if (second_code != UNKNOWN)
11742 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11743 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11744 const0_rtx);
11746 else
11748 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11749 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11750 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11751 if (!scratch)
11752 scratch = gen_reg_rtx (HImode);
11753 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11755 /* In the unordered case, we have to check C2 for NaN's, which
11756 doesn't happen to work out to anything nice combination-wise.
11757 So do some bit twiddling on the value we've got in AH to come
11758 up with an appropriate set of condition codes. */
11760 intcmp_mode = CCNOmode;
11761 switch (code)
11763 case GT:
11764 case UNGT:
11765 if (code == GT || !TARGET_IEEE_FP)
11767 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11768 code = EQ;
11770 else
11772 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11773 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11774 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11775 intcmp_mode = CCmode;
11776 code = GEU;
11778 break;
11779 case LT:
11780 case UNLT:
11781 if (code == LT && TARGET_IEEE_FP)
11783 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11784 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11785 intcmp_mode = CCmode;
11786 code = EQ;
11788 else
11790 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11791 code = NE;
11793 break;
11794 case GE:
11795 case UNGE:
11796 if (code == GE || !TARGET_IEEE_FP)
11798 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11799 code = EQ;
11801 else
11803 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11804 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11805 GEN_INT (0x01)));
11806 code = NE;
11808 break;
11809 case LE:
11810 case UNLE:
11811 if (code == LE && TARGET_IEEE_FP)
11813 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11814 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11815 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11816 intcmp_mode = CCmode;
11817 code = LTU;
11819 else
11821 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11822 code = NE;
11824 break;
11825 case EQ:
11826 case UNEQ:
11827 if (code == EQ && TARGET_IEEE_FP)
11829 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11830 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11831 intcmp_mode = CCmode;
11832 code = EQ;
11834 else
11836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11837 code = NE;
11838 break;
11840 break;
11841 case NE:
11842 case LTGT:
11843 if (code == NE && TARGET_IEEE_FP)
11845 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11846 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11847 GEN_INT (0x40)));
11848 code = NE;
11850 else
11852 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11853 code = EQ;
11855 break;
11857 case UNORDERED:
11858 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11859 code = NE;
11860 break;
11861 case ORDERED:
11862 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11863 code = EQ;
11864 break;
11866 default:
11867 gcc_unreachable ();
11871 /* Return the test that should be put into the flags user, i.e.
11872 the bcc, scc, or cmov instruction. */
11873 return gen_rtx_fmt_ee (code, VOIDmode,
11874 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11875 const0_rtx);
11879 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11881 rtx op0, op1, ret;
11882 op0 = ix86_compare_op0;
11883 op1 = ix86_compare_op1;
11885 if (second_test)
11886 *second_test = NULL_RTX;
11887 if (bypass_test)
11888 *bypass_test = NULL_RTX;
11890 if (ix86_compare_emitted)
11892 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11893 ix86_compare_emitted = NULL_RTX;
11895 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11897 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11898 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11899 second_test, bypass_test);
11901 else
11902 ret = ix86_expand_int_compare (code, op0, op1);
11904 return ret;
11907 /* Return true if the CODE will result in nontrivial jump sequence. */
11908 bool
11909 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11911 enum rtx_code bypass_code, first_code, second_code;
11912 if (!TARGET_CMOVE)
11913 return true;
11914 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11915 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11918 void
11919 ix86_expand_branch (enum rtx_code code, rtx label)
11921 rtx tmp;
11923 /* If we have emitted a compare insn, go straight to simple.
11924 ix86_expand_compare won't emit anything if ix86_compare_emitted
11925 is non NULL. */
11926 if (ix86_compare_emitted)
11927 goto simple;
11929 switch (GET_MODE (ix86_compare_op0))
11931 case QImode:
11932 case HImode:
11933 case SImode:
11934 simple:
11935 tmp = ix86_expand_compare (code, NULL, NULL);
11936 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11937 gen_rtx_LABEL_REF (VOIDmode, label),
11938 pc_rtx);
11939 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11940 return;
11942 case SFmode:
11943 case DFmode:
11944 case XFmode:
11946 rtvec vec;
11947 int use_fcomi;
11948 enum rtx_code bypass_code, first_code, second_code;
11950 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11951 &ix86_compare_op1);
11953 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11955 /* Check whether we will use the natural sequence with one jump. If
11956 so, we can expand jump early. Otherwise delay expansion by
11957 creating compound insn to not confuse optimizers. */
11958 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
11960 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11961 gen_rtx_LABEL_REF (VOIDmode, label),
11962 pc_rtx, NULL_RTX, NULL_RTX);
11964 else
11966 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11967 ix86_compare_op0, ix86_compare_op1);
11968 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11969 gen_rtx_LABEL_REF (VOIDmode, label),
11970 pc_rtx);
11971 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11973 use_fcomi = ix86_use_fcomi_compare (code);
11974 vec = rtvec_alloc (3 + !use_fcomi);
11975 RTVEC_ELT (vec, 0) = tmp;
11976 RTVEC_ELT (vec, 1)
11977 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
11978 RTVEC_ELT (vec, 2)
11979 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
11980 if (! use_fcomi)
11981 RTVEC_ELT (vec, 3)
11982 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11984 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11986 return;
11989 case DImode:
11990 if (TARGET_64BIT)
11991 goto simple;
11992 case TImode:
11993 /* Expand DImode branch into multiple compare+branch. */
11995 rtx lo[2], hi[2], label2;
11996 enum rtx_code code1, code2, code3;
11997 enum machine_mode submode;
11999 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12001 tmp = ix86_compare_op0;
12002 ix86_compare_op0 = ix86_compare_op1;
12003 ix86_compare_op1 = tmp;
12004 code = swap_condition (code);
12006 if (GET_MODE (ix86_compare_op0) == DImode)
12008 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12009 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12010 submode = SImode;
12012 else
12014 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12015 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12016 submode = DImode;
12019 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12020 avoid two branches. This costs one extra insn, so disable when
12021 optimizing for size. */
12023 if ((code == EQ || code == NE)
12024 && (!optimize_size
12025 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12027 rtx xor0, xor1;
12029 xor1 = hi[0];
12030 if (hi[1] != const0_rtx)
12031 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12032 NULL_RTX, 0, OPTAB_WIDEN);
12034 xor0 = lo[0];
12035 if (lo[1] != const0_rtx)
12036 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12037 NULL_RTX, 0, OPTAB_WIDEN);
12039 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12040 NULL_RTX, 0, OPTAB_WIDEN);
12042 ix86_compare_op0 = tmp;
12043 ix86_compare_op1 = const0_rtx;
12044 ix86_expand_branch (code, label);
12045 return;
12048 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12049 op1 is a constant and the low word is zero, then we can just
12050 examine the high word. */
12052 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
12053 switch (code)
12055 case LT: case LTU: case GE: case GEU:
12056 ix86_compare_op0 = hi[0];
12057 ix86_compare_op1 = hi[1];
12058 ix86_expand_branch (code, label);
12059 return;
12060 default:
12061 break;
12064 /* Otherwise, we need two or three jumps. */
12066 label2 = gen_label_rtx ();
12068 code1 = code;
12069 code2 = swap_condition (code);
12070 code3 = unsigned_condition (code);
12072 switch (code)
12074 case LT: case GT: case LTU: case GTU:
12075 break;
12077 case LE: code1 = LT; code2 = GT; break;
12078 case GE: code1 = GT; code2 = LT; break;
12079 case LEU: code1 = LTU; code2 = GTU; break;
12080 case GEU: code1 = GTU; code2 = LTU; break;
12082 case EQ: code1 = UNKNOWN; code2 = NE; break;
12083 case NE: code2 = UNKNOWN; break;
12085 default:
12086 gcc_unreachable ();
12090 * a < b =>
12091 * if (hi(a) < hi(b)) goto true;
12092 * if (hi(a) > hi(b)) goto false;
12093 * if (lo(a) < lo(b)) goto true;
12094 * false:
12097 ix86_compare_op0 = hi[0];
12098 ix86_compare_op1 = hi[1];
12100 if (code1 != UNKNOWN)
12101 ix86_expand_branch (code1, label);
12102 if (code2 != UNKNOWN)
12103 ix86_expand_branch (code2, label2);
12105 ix86_compare_op0 = lo[0];
12106 ix86_compare_op1 = lo[1];
12107 ix86_expand_branch (code3, label);
12109 if (code2 != UNKNOWN)
12110 emit_label (label2);
12111 return;
12114 default:
12115 gcc_unreachable ();
12119 /* Split branch based on floating point condition. */
12120 void
12121 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12122 rtx target1, rtx target2, rtx tmp, rtx pushed)
12124 rtx second, bypass;
12125 rtx label = NULL_RTX;
12126 rtx condition;
12127 int bypass_probability = -1, second_probability = -1, probability = -1;
12128 rtx i;
12130 if (target2 != pc_rtx)
12132 rtx tmp = target2;
12133 code = reverse_condition_maybe_unordered (code);
12134 target2 = target1;
12135 target1 = tmp;
12138 condition = ix86_expand_fp_compare (code, op1, op2,
12139 tmp, &second, &bypass);
12141 /* Remove pushed operand from stack. */
12142 if (pushed)
12143 ix86_free_from_memory (GET_MODE (pushed));
12145 if (split_branch_probability >= 0)
12147 /* Distribute the probabilities across the jumps.
12148 Assume the BYPASS and SECOND to be always test
12149 for UNORDERED. */
12150 probability = split_branch_probability;
12152 /* Value of 1 is low enough to make no need for probability
12153 to be updated. Later we may run some experiments and see
12154 if unordered values are more frequent in practice. */
12155 if (bypass)
12156 bypass_probability = 1;
12157 if (second)
12158 second_probability = 1;
12160 if (bypass != NULL_RTX)
12162 label = gen_label_rtx ();
12163 i = emit_jump_insn (gen_rtx_SET
12164 (VOIDmode, pc_rtx,
12165 gen_rtx_IF_THEN_ELSE (VOIDmode,
12166 bypass,
12167 gen_rtx_LABEL_REF (VOIDmode,
12168 label),
12169 pc_rtx)));
12170 if (bypass_probability >= 0)
12171 REG_NOTES (i)
12172 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12173 GEN_INT (bypass_probability),
12174 REG_NOTES (i));
12176 i = emit_jump_insn (gen_rtx_SET
12177 (VOIDmode, pc_rtx,
12178 gen_rtx_IF_THEN_ELSE (VOIDmode,
12179 condition, target1, target2)));
12180 if (probability >= 0)
12181 REG_NOTES (i)
12182 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12183 GEN_INT (probability),
12184 REG_NOTES (i));
12185 if (second != NULL_RTX)
12187 i = emit_jump_insn (gen_rtx_SET
12188 (VOIDmode, pc_rtx,
12189 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12190 target2)));
12191 if (second_probability >= 0)
12192 REG_NOTES (i)
12193 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12194 GEN_INT (second_probability),
12195 REG_NOTES (i));
12197 if (label != NULL_RTX)
12198 emit_label (label);
12202 ix86_expand_setcc (enum rtx_code code, rtx dest)
12204 rtx ret, tmp, tmpreg, equiv;
12205 rtx second_test, bypass_test;
12207 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12208 return 0; /* FAIL */
12210 gcc_assert (GET_MODE (dest) == QImode);
12212 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12213 PUT_MODE (ret, QImode);
12215 tmp = dest;
12216 tmpreg = dest;
12218 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12219 if (bypass_test || second_test)
12221 rtx test = second_test;
12222 int bypass = 0;
12223 rtx tmp2 = gen_reg_rtx (QImode);
12224 if (bypass_test)
12226 gcc_assert (!second_test);
12227 test = bypass_test;
12228 bypass = 1;
12229 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12231 PUT_MODE (test, QImode);
12232 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12234 if (bypass)
12235 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12236 else
12237 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12240 /* Attach a REG_EQUAL note describing the comparison result. */
12241 if (ix86_compare_op0 && ix86_compare_op1)
12243 equiv = simplify_gen_relational (code, QImode,
12244 GET_MODE (ix86_compare_op0),
12245 ix86_compare_op0, ix86_compare_op1);
12246 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12249 return 1; /* DONE */
12252 /* Expand comparison setting or clearing carry flag. Return true when
12253 successful and set pop for the operation. */
12254 static bool
12255 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12257 enum machine_mode mode =
12258 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12260 /* Do not handle DImode compares that go through special path. */
12261 if (mode == (TARGET_64BIT ? TImode : DImode))
12262 return false;
12264 if (SCALAR_FLOAT_MODE_P (mode))
12266 rtx second_test = NULL, bypass_test = NULL;
12267 rtx compare_op, compare_seq;
12269 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12271 /* Shortcut: following common codes never translate
12272 into carry flag compares. */
12273 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12274 || code == ORDERED || code == UNORDERED)
12275 return false;
12277 /* These comparisons require zero flag; swap operands so they won't. */
12278 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12279 && !TARGET_IEEE_FP)
12281 rtx tmp = op0;
12282 op0 = op1;
12283 op1 = tmp;
12284 code = swap_condition (code);
12287 /* Try to expand the comparison and verify that we end up with
12288 carry flag based comparison. This fails to be true only when
12289 we decide to expand comparison using arithmetic that is not
12290 too common scenario. */
12291 start_sequence ();
12292 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12293 &second_test, &bypass_test);
12294 compare_seq = get_insns ();
12295 end_sequence ();
12297 if (second_test || bypass_test)
12298 return false;
12300 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12301 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12302 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12303 else
12304 code = GET_CODE (compare_op);
12306 if (code != LTU && code != GEU)
12307 return false;
12309 emit_insn (compare_seq);
12310 *pop = compare_op;
12311 return true;
12314 if (!INTEGRAL_MODE_P (mode))
12315 return false;
12317 switch (code)
12319 case LTU:
12320 case GEU:
12321 break;
12323 /* Convert a==0 into (unsigned)a<1. */
12324 case EQ:
12325 case NE:
12326 if (op1 != const0_rtx)
12327 return false;
12328 op1 = const1_rtx;
12329 code = (code == EQ ? LTU : GEU);
12330 break;
12332 /* Convert a>b into b<a or a>=b-1. */
12333 case GTU:
12334 case LEU:
12335 if (CONST_INT_P (op1))
12337 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12338 /* Bail out on overflow. We still can swap operands but that
12339 would force loading of the constant into register. */
12340 if (op1 == const0_rtx
12341 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12342 return false;
12343 code = (code == GTU ? GEU : LTU);
12345 else
12347 rtx tmp = op1;
12348 op1 = op0;
12349 op0 = tmp;
12350 code = (code == GTU ? LTU : GEU);
12352 break;
12354 /* Convert a>=0 into (unsigned)a<0x80000000. */
12355 case LT:
12356 case GE:
12357 if (mode == DImode || op1 != const0_rtx)
12358 return false;
12359 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12360 code = (code == LT ? GEU : LTU);
12361 break;
12362 case LE:
12363 case GT:
12364 if (mode == DImode || op1 != constm1_rtx)
12365 return false;
12366 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12367 code = (code == LE ? GEU : LTU);
12368 break;
12370 default:
12371 return false;
12373 /* Swapping operands may cause constant to appear as first operand. */
12374 if (!nonimmediate_operand (op0, VOIDmode))
12376 if (!can_create_pseudo_p ())
12377 return false;
12378 op0 = force_reg (mode, op0);
12380 ix86_compare_op0 = op0;
12381 ix86_compare_op1 = op1;
12382 *pop = ix86_expand_compare (code, NULL, NULL);
12383 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12384 return true;
12388 ix86_expand_int_movcc (rtx operands[])
12390 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12391 rtx compare_seq, compare_op;
12392 rtx second_test, bypass_test;
12393 enum machine_mode mode = GET_MODE (operands[0]);
12394 bool sign_bit_compare_p = false;;
12396 start_sequence ();
12397 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12398 compare_seq = get_insns ();
12399 end_sequence ();
12401 compare_code = GET_CODE (compare_op);
12403 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12404 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12405 sign_bit_compare_p = true;
12407 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12408 HImode insns, we'd be swallowed in word prefix ops. */
12410 if ((mode != HImode || TARGET_FAST_PREFIX)
12411 && (mode != (TARGET_64BIT ? TImode : DImode))
12412 && CONST_INT_P (operands[2])
12413 && CONST_INT_P (operands[3]))
12415 rtx out = operands[0];
12416 HOST_WIDE_INT ct = INTVAL (operands[2]);
12417 HOST_WIDE_INT cf = INTVAL (operands[3]);
12418 HOST_WIDE_INT diff;
12420 diff = ct - cf;
12421 /* Sign bit compares are better done using shifts than we do by using
12422 sbb. */
12423 if (sign_bit_compare_p
12424 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12425 ix86_compare_op1, &compare_op))
12427 /* Detect overlap between destination and compare sources. */
12428 rtx tmp = out;
12430 if (!sign_bit_compare_p)
12432 bool fpcmp = false;
12434 compare_code = GET_CODE (compare_op);
12436 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12437 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12439 fpcmp = true;
12440 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12443 /* To simplify rest of code, restrict to the GEU case. */
12444 if (compare_code == LTU)
12446 HOST_WIDE_INT tmp = ct;
12447 ct = cf;
12448 cf = tmp;
12449 compare_code = reverse_condition (compare_code);
12450 code = reverse_condition (code);
12452 else
12454 if (fpcmp)
12455 PUT_CODE (compare_op,
12456 reverse_condition_maybe_unordered
12457 (GET_CODE (compare_op)));
12458 else
12459 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12461 diff = ct - cf;
12463 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12464 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12465 tmp = gen_reg_rtx (mode);
12467 if (mode == DImode)
12468 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12469 else
12470 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12472 else
12474 if (code == GT || code == GE)
12475 code = reverse_condition (code);
12476 else
12478 HOST_WIDE_INT tmp = ct;
12479 ct = cf;
12480 cf = tmp;
12481 diff = ct - cf;
12483 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12484 ix86_compare_op1, VOIDmode, 0, -1);
12487 if (diff == 1)
12490 * cmpl op0,op1
12491 * sbbl dest,dest
12492 * [addl dest, ct]
12494 * Size 5 - 8.
12496 if (ct)
12497 tmp = expand_simple_binop (mode, PLUS,
12498 tmp, GEN_INT (ct),
12499 copy_rtx (tmp), 1, OPTAB_DIRECT);
12501 else if (cf == -1)
12504 * cmpl op0,op1
12505 * sbbl dest,dest
12506 * orl $ct, dest
12508 * Size 8.
12510 tmp = expand_simple_binop (mode, IOR,
12511 tmp, GEN_INT (ct),
12512 copy_rtx (tmp), 1, OPTAB_DIRECT);
12514 else if (diff == -1 && ct)
12517 * cmpl op0,op1
12518 * sbbl dest,dest
12519 * notl dest
12520 * [addl dest, cf]
12522 * Size 8 - 11.
12524 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12525 if (cf)
12526 tmp = expand_simple_binop (mode, PLUS,
12527 copy_rtx (tmp), GEN_INT (cf),
12528 copy_rtx (tmp), 1, OPTAB_DIRECT);
12530 else
12533 * cmpl op0,op1
12534 * sbbl dest,dest
12535 * [notl dest]
12536 * andl cf - ct, dest
12537 * [addl dest, ct]
12539 * Size 8 - 11.
12542 if (cf == 0)
12544 cf = ct;
12545 ct = 0;
12546 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12549 tmp = expand_simple_binop (mode, AND,
12550 copy_rtx (tmp),
12551 gen_int_mode (cf - ct, mode),
12552 copy_rtx (tmp), 1, OPTAB_DIRECT);
12553 if (ct)
12554 tmp = expand_simple_binop (mode, PLUS,
12555 copy_rtx (tmp), GEN_INT (ct),
12556 copy_rtx (tmp), 1, OPTAB_DIRECT);
12559 if (!rtx_equal_p (tmp, out))
12560 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12562 return 1; /* DONE */
12565 if (diff < 0)
12567 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12569 HOST_WIDE_INT tmp;
12570 tmp = ct, ct = cf, cf = tmp;
12571 diff = -diff;
12573 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12575 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12577 /* We may be reversing unordered compare to normal compare, that
12578 is not valid in general (we may convert non-trapping condition
12579 to trapping one), however on i386 we currently emit all
12580 comparisons unordered. */
12581 compare_code = reverse_condition_maybe_unordered (compare_code);
12582 code = reverse_condition_maybe_unordered (code);
12584 else
12586 compare_code = reverse_condition (compare_code);
12587 code = reverse_condition (code);
12591 compare_code = UNKNOWN;
12592 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12593 && CONST_INT_P (ix86_compare_op1))
12595 if (ix86_compare_op1 == const0_rtx
12596 && (code == LT || code == GE))
12597 compare_code = code;
12598 else if (ix86_compare_op1 == constm1_rtx)
12600 if (code == LE)
12601 compare_code = LT;
12602 else if (code == GT)
12603 compare_code = GE;
12607 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12608 if (compare_code != UNKNOWN
12609 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12610 && (cf == -1 || ct == -1))
12612 /* If lea code below could be used, only optimize
12613 if it results in a 2 insn sequence. */
12615 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12616 || diff == 3 || diff == 5 || diff == 9)
12617 || (compare_code == LT && ct == -1)
12618 || (compare_code == GE && cf == -1))
12621 * notl op1 (if necessary)
12622 * sarl $31, op1
12623 * orl cf, op1
12625 if (ct != -1)
12627 cf = ct;
12628 ct = -1;
12629 code = reverse_condition (code);
12632 out = emit_store_flag (out, code, ix86_compare_op0,
12633 ix86_compare_op1, VOIDmode, 0, -1);
12635 out = expand_simple_binop (mode, IOR,
12636 out, GEN_INT (cf),
12637 out, 1, OPTAB_DIRECT);
12638 if (out != operands[0])
12639 emit_move_insn (operands[0], out);
12641 return 1; /* DONE */
12646 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12647 || diff == 3 || diff == 5 || diff == 9)
12648 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12649 && (mode != DImode
12650 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12653 * xorl dest,dest
12654 * cmpl op1,op2
12655 * setcc dest
12656 * lea cf(dest*(ct-cf)),dest
12658 * Size 14.
12660 * This also catches the degenerate setcc-only case.
12663 rtx tmp;
12664 int nops;
12666 out = emit_store_flag (out, code, ix86_compare_op0,
12667 ix86_compare_op1, VOIDmode, 0, 1);
12669 nops = 0;
12670 /* On x86_64 the lea instruction operates on Pmode, so we need
12671 to get arithmetics done in proper mode to match. */
12672 if (diff == 1)
12673 tmp = copy_rtx (out);
12674 else
12676 rtx out1;
12677 out1 = copy_rtx (out);
12678 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12679 nops++;
12680 if (diff & 1)
12682 tmp = gen_rtx_PLUS (mode, tmp, out1);
12683 nops++;
12686 if (cf != 0)
12688 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12689 nops++;
12691 if (!rtx_equal_p (tmp, out))
12693 if (nops == 1)
12694 out = force_operand (tmp, copy_rtx (out));
12695 else
12696 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12698 if (!rtx_equal_p (out, operands[0]))
12699 emit_move_insn (operands[0], copy_rtx (out));
12701 return 1; /* DONE */
12705 * General case: Jumpful:
12706 * xorl dest,dest cmpl op1, op2
12707 * cmpl op1, op2 movl ct, dest
12708 * setcc dest jcc 1f
12709 * decl dest movl cf, dest
12710 * andl (cf-ct),dest 1:
12711 * addl ct,dest
12713 * Size 20. Size 14.
12715 * This is reasonably steep, but branch mispredict costs are
12716 * high on modern cpus, so consider failing only if optimizing
12717 * for space.
12720 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12721 && BRANCH_COST >= 2)
12723 if (cf == 0)
12725 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12727 cf = ct;
12728 ct = 0;
12730 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12732 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12734 /* We may be reversing unordered compare to normal compare,
12735 that is not valid in general (we may convert non-trapping
12736 condition to trapping one), however on i386 we currently
12737 emit all comparisons unordered. */
12738 code = reverse_condition_maybe_unordered (code);
12740 else
12742 code = reverse_condition (code);
12743 if (compare_code != UNKNOWN)
12744 compare_code = reverse_condition (compare_code);
12748 if (compare_code != UNKNOWN)
12750 /* notl op1 (if needed)
12751 sarl $31, op1
12752 andl (cf-ct), op1
12753 addl ct, op1
12755 For x < 0 (resp. x <= -1) there will be no notl,
12756 so if possible swap the constants to get rid of the
12757 complement.
12758 True/false will be -1/0 while code below (store flag
12759 followed by decrement) is 0/-1, so the constants need
12760 to be exchanged once more. */
12762 if (compare_code == GE || !cf)
12764 code = reverse_condition (code);
12765 compare_code = LT;
12767 else
12769 HOST_WIDE_INT tmp = cf;
12770 cf = ct;
12771 ct = tmp;
12774 out = emit_store_flag (out, code, ix86_compare_op0,
12775 ix86_compare_op1, VOIDmode, 0, -1);
12777 else
12779 out = emit_store_flag (out, code, ix86_compare_op0,
12780 ix86_compare_op1, VOIDmode, 0, 1);
12782 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12783 copy_rtx (out), 1, OPTAB_DIRECT);
12786 out = expand_simple_binop (mode, AND, copy_rtx (out),
12787 gen_int_mode (cf - ct, mode),
12788 copy_rtx (out), 1, OPTAB_DIRECT);
12789 if (ct)
12790 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12791 copy_rtx (out), 1, OPTAB_DIRECT);
12792 if (!rtx_equal_p (out, operands[0]))
12793 emit_move_insn (operands[0], copy_rtx (out));
12795 return 1; /* DONE */
12799 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12801 /* Try a few things more with specific constants and a variable. */
12803 optab op;
12804 rtx var, orig_out, out, tmp;
12806 if (BRANCH_COST <= 2)
12807 return 0; /* FAIL */
12809 /* If one of the two operands is an interesting constant, load a
12810 constant with the above and mask it in with a logical operation. */
12812 if (CONST_INT_P (operands[2]))
12814 var = operands[3];
12815 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12816 operands[3] = constm1_rtx, op = and_optab;
12817 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12818 operands[3] = const0_rtx, op = ior_optab;
12819 else
12820 return 0; /* FAIL */
12822 else if (CONST_INT_P (operands[3]))
12824 var = operands[2];
12825 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12826 operands[2] = constm1_rtx, op = and_optab;
12827 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12828 operands[2] = const0_rtx, op = ior_optab;
12829 else
12830 return 0; /* FAIL */
12832 else
12833 return 0; /* FAIL */
12835 orig_out = operands[0];
12836 tmp = gen_reg_rtx (mode);
12837 operands[0] = tmp;
12839 /* Recurse to get the constant loaded. */
12840 if (ix86_expand_int_movcc (operands) == 0)
12841 return 0; /* FAIL */
12843 /* Mask in the interesting variable. */
12844 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12845 OPTAB_WIDEN);
12846 if (!rtx_equal_p (out, orig_out))
12847 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12849 return 1; /* DONE */
12853 * For comparison with above,
12855 * movl cf,dest
12856 * movl ct,tmp
12857 * cmpl op1,op2
12858 * cmovcc tmp,dest
12860 * Size 15.
12863 if (! nonimmediate_operand (operands[2], mode))
12864 operands[2] = force_reg (mode, operands[2]);
12865 if (! nonimmediate_operand (operands[3], mode))
12866 operands[3] = force_reg (mode, operands[3]);
12868 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12870 rtx tmp = gen_reg_rtx (mode);
12871 emit_move_insn (tmp, operands[3]);
12872 operands[3] = tmp;
12874 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12876 rtx tmp = gen_reg_rtx (mode);
12877 emit_move_insn (tmp, operands[2]);
12878 operands[2] = tmp;
12881 if (! register_operand (operands[2], VOIDmode)
12882 && (mode == QImode
12883 || ! register_operand (operands[3], VOIDmode)))
12884 operands[2] = force_reg (mode, operands[2]);
12886 if (mode == QImode
12887 && ! register_operand (operands[3], VOIDmode))
12888 operands[3] = force_reg (mode, operands[3]);
12890 emit_insn (compare_seq);
12891 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12892 gen_rtx_IF_THEN_ELSE (mode,
12893 compare_op, operands[2],
12894 operands[3])));
12895 if (bypass_test)
12896 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12897 gen_rtx_IF_THEN_ELSE (mode,
12898 bypass_test,
12899 copy_rtx (operands[3]),
12900 copy_rtx (operands[0]))));
12901 if (second_test)
12902 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12903 gen_rtx_IF_THEN_ELSE (mode,
12904 second_test,
12905 copy_rtx (operands[2]),
12906 copy_rtx (operands[0]))));
12908 return 1; /* DONE */
12911 /* Swap, force into registers, or otherwise massage the two operands
12912 to an sse comparison with a mask result. Thus we differ a bit from
12913 ix86_prepare_fp_compare_args which expects to produce a flags result.
12915 The DEST operand exists to help determine whether to commute commutative
12916 operators. The POP0/POP1 operands are updated in place. The new
12917 comparison code is returned, or UNKNOWN if not implementable. */
12919 static enum rtx_code
12920 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12921 rtx *pop0, rtx *pop1)
12923 rtx tmp;
12925 switch (code)
12927 case LTGT:
12928 case UNEQ:
12929 /* We have no LTGT as an operator. We could implement it with
12930 NE & ORDERED, but this requires an extra temporary. It's
12931 not clear that it's worth it. */
12932 return UNKNOWN;
12934 case LT:
12935 case LE:
12936 case UNGT:
12937 case UNGE:
12938 /* These are supported directly. */
12939 break;
12941 case EQ:
12942 case NE:
12943 case UNORDERED:
12944 case ORDERED:
12945 /* For commutative operators, try to canonicalize the destination
12946 operand to be first in the comparison - this helps reload to
12947 avoid extra moves. */
12948 if (!dest || !rtx_equal_p (dest, *pop1))
12949 break;
12950 /* FALLTHRU */
12952 case GE:
12953 case GT:
12954 case UNLE:
12955 case UNLT:
12956 /* These are not supported directly. Swap the comparison operands
12957 to transform into something that is supported. */
12958 tmp = *pop0;
12959 *pop0 = *pop1;
12960 *pop1 = tmp;
12961 code = swap_condition (code);
12962 break;
12964 default:
12965 gcc_unreachable ();
12968 return code;
12971 /* Detect conditional moves that exactly match min/max operational
12972 semantics. Note that this is IEEE safe, as long as we don't
12973 interchange the operands.
12975 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12976 and TRUE if the operation is successful and instructions are emitted. */
12978 static bool
12979 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12980 rtx cmp_op1, rtx if_true, rtx if_false)
12982 enum machine_mode mode;
12983 bool is_min;
12984 rtx tmp;
12986 if (code == LT)
12988 else if (code == UNGE)
12990 tmp = if_true;
12991 if_true = if_false;
12992 if_false = tmp;
12994 else
12995 return false;
12997 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12998 is_min = true;
12999 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13000 is_min = false;
13001 else
13002 return false;
13004 mode = GET_MODE (dest);
13006 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13007 but MODE may be a vector mode and thus not appropriate. */
13008 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13010 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13011 rtvec v;
13013 if_true = force_reg (mode, if_true);
13014 v = gen_rtvec (2, if_true, if_false);
13015 tmp = gen_rtx_UNSPEC (mode, v, u);
13017 else
13019 code = is_min ? SMIN : SMAX;
13020 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13023 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13024 return true;
13027 /* Expand an sse vector comparison. Return the register with the result. */
13029 static rtx
13030 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13031 rtx op_true, rtx op_false)
13033 enum machine_mode mode = GET_MODE (dest);
13034 rtx x;
13036 cmp_op0 = force_reg (mode, cmp_op0);
13037 if (!nonimmediate_operand (cmp_op1, mode))
13038 cmp_op1 = force_reg (mode, cmp_op1);
13040 if (optimize
13041 || reg_overlap_mentioned_p (dest, op_true)
13042 || reg_overlap_mentioned_p (dest, op_false))
13043 dest = gen_reg_rtx (mode);
13045 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13046 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13048 return dest;
13051 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13052 operations. This is used for both scalar and vector conditional moves. */
13054 static void
13055 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13057 enum machine_mode mode = GET_MODE (dest);
13058 rtx t2, t3, x;
13060 if (TARGET_SSE5)
13062 rtx pcmov = gen_rtx_SET (mode, dest,
13063 gen_rtx_IF_THEN_ELSE (mode, cmp,
13064 op_true,
13065 op_false));
13066 emit_insn (pcmov);
13068 else if (op_false == CONST0_RTX (mode))
13070 op_true = force_reg (mode, op_true);
13071 x = gen_rtx_AND (mode, cmp, op_true);
13072 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13074 else if (op_true == CONST0_RTX (mode))
13076 op_false = force_reg (mode, op_false);
13077 x = gen_rtx_NOT (mode, cmp);
13078 x = gen_rtx_AND (mode, x, op_false);
13079 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13081 else
13083 op_true = force_reg (mode, op_true);
13084 op_false = force_reg (mode, op_false);
13086 t2 = gen_reg_rtx (mode);
13087 if (optimize)
13088 t3 = gen_reg_rtx (mode);
13089 else
13090 t3 = dest;
13092 x = gen_rtx_AND (mode, op_true, cmp);
13093 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13095 x = gen_rtx_NOT (mode, cmp);
13096 x = gen_rtx_AND (mode, x, op_false);
13097 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13099 x = gen_rtx_IOR (mode, t3, t2);
13100 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13104 /* Expand a floating-point conditional move. Return true if successful. */
13107 ix86_expand_fp_movcc (rtx operands[])
13109 enum machine_mode mode = GET_MODE (operands[0]);
13110 enum rtx_code code = GET_CODE (operands[1]);
13111 rtx tmp, compare_op, second_test, bypass_test;
13113 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13115 enum machine_mode cmode;
13117 /* Since we've no cmove for sse registers, don't force bad register
13118 allocation just to gain access to it. Deny movcc when the
13119 comparison mode doesn't match the move mode. */
13120 cmode = GET_MODE (ix86_compare_op0);
13121 if (cmode == VOIDmode)
13122 cmode = GET_MODE (ix86_compare_op1);
13123 if (cmode != mode)
13124 return 0;
13126 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13127 &ix86_compare_op0,
13128 &ix86_compare_op1);
13129 if (code == UNKNOWN)
13130 return 0;
13132 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13133 ix86_compare_op1, operands[2],
13134 operands[3]))
13135 return 1;
13137 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13138 ix86_compare_op1, operands[2], operands[3]);
13139 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13140 return 1;
13143 /* The floating point conditional move instructions don't directly
13144 support conditions resulting from a signed integer comparison. */
13146 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13148 /* The floating point conditional move instructions don't directly
13149 support signed integer comparisons. */
13151 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13153 gcc_assert (!second_test && !bypass_test);
13154 tmp = gen_reg_rtx (QImode);
13155 ix86_expand_setcc (code, tmp);
13156 code = NE;
13157 ix86_compare_op0 = tmp;
13158 ix86_compare_op1 = const0_rtx;
13159 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13161 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13163 tmp = gen_reg_rtx (mode);
13164 emit_move_insn (tmp, operands[3]);
13165 operands[3] = tmp;
13167 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13169 tmp = gen_reg_rtx (mode);
13170 emit_move_insn (tmp, operands[2]);
13171 operands[2] = tmp;
13174 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13175 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13176 operands[2], operands[3])));
13177 if (bypass_test)
13178 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13179 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13180 operands[3], operands[0])));
13181 if (second_test)
13182 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13183 gen_rtx_IF_THEN_ELSE (mode, second_test,
13184 operands[2], operands[0])));
13186 return 1;
13189 /* Expand a floating-point vector conditional move; a vcond operation
13190 rather than a movcc operation. */
13192 bool
13193 ix86_expand_fp_vcond (rtx operands[])
13195 enum rtx_code code = GET_CODE (operands[3]);
13196 rtx cmp;
13198 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13199 &operands[4], &operands[5]);
13200 if (code == UNKNOWN)
13201 return false;
13203 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13204 operands[5], operands[1], operands[2]))
13205 return true;
13207 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13208 operands[1], operands[2]);
13209 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13210 return true;
13213 /* Expand a signed/unsigned integral vector conditional move. */
13215 bool
13216 ix86_expand_int_vcond (rtx operands[])
13218 enum machine_mode mode = GET_MODE (operands[0]);
13219 enum rtx_code code = GET_CODE (operands[3]);
13220 bool negate = false;
13221 rtx x, cop0, cop1;
13223 cop0 = operands[4];
13224 cop1 = operands[5];
13226 /* Canonicalize the comparison to EQ, GT, GTU. */
13227 switch (code)
13229 case EQ:
13230 case GT:
13231 case GTU:
13232 break;
13234 case NE:
13235 case LE:
13236 case LEU:
13237 code = reverse_condition (code);
13238 negate = true;
13239 break;
13241 case GE:
13242 case GEU:
13243 code = reverse_condition (code);
13244 negate = true;
13245 /* FALLTHRU */
13247 case LT:
13248 case LTU:
13249 code = swap_condition (code);
13250 x = cop0, cop0 = cop1, cop1 = x;
13251 break;
13253 default:
13254 gcc_unreachable ();
13257 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13258 if (mode == V2DImode)
13260 switch (code)
13262 case EQ:
13263 /* SSE4.1 supports EQ. */
13264 if (!TARGET_SSE4_1)
13265 return false;
13266 break;
13268 case GT:
13269 case GTU:
13270 /* SSE4.2 supports GT/GTU. */
13271 if (!TARGET_SSE4_2)
13272 return false;
13273 break;
13275 default:
13276 gcc_unreachable ();
13280 /* Unsigned parallel compare is not supported by the hardware. Play some
13281 tricks to turn this into a signed comparison against 0. */
13282 if (code == GTU)
13284 cop0 = force_reg (mode, cop0);
13286 switch (mode)
13288 case V4SImode:
13289 case V2DImode:
13291 rtx t1, t2, mask;
13293 /* Perform a parallel modulo subtraction. */
13294 t1 = gen_reg_rtx (mode);
13295 emit_insn ((mode == V4SImode
13296 ? gen_subv4si3
13297 : gen_subv2di3) (t1, cop0, cop1));
13299 /* Extract the original sign bit of op0. */
13300 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13301 true, false);
13302 t2 = gen_reg_rtx (mode);
13303 emit_insn ((mode == V4SImode
13304 ? gen_andv4si3
13305 : gen_andv2di3) (t2, cop0, mask));
13307 /* XOR it back into the result of the subtraction. This results
13308 in the sign bit set iff we saw unsigned underflow. */
13309 x = gen_reg_rtx (mode);
13310 emit_insn ((mode == V4SImode
13311 ? gen_xorv4si3
13312 : gen_xorv2di3) (x, t1, t2));
13314 code = GT;
13316 break;
13318 case V16QImode:
13319 case V8HImode:
13320 /* Perform a parallel unsigned saturating subtraction. */
13321 x = gen_reg_rtx (mode);
13322 emit_insn (gen_rtx_SET (VOIDmode, x,
13323 gen_rtx_US_MINUS (mode, cop0, cop1)));
13325 code = EQ;
13326 negate = !negate;
13327 break;
13329 default:
13330 gcc_unreachable ();
13333 cop0 = x;
13334 cop1 = CONST0_RTX (mode);
13337 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13338 operands[1+negate], operands[2-negate]);
13340 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13341 operands[2-negate]);
13342 return true;
13345 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13346 true if we should do zero extension, else sign extension. HIGH_P is
13347 true if we want the N/2 high elements, else the low elements. */
13349 void
13350 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13352 enum machine_mode imode = GET_MODE (operands[1]);
13353 rtx (*unpack)(rtx, rtx, rtx);
13354 rtx se, dest;
13356 switch (imode)
13358 case V16QImode:
13359 if (high_p)
13360 unpack = gen_vec_interleave_highv16qi;
13361 else
13362 unpack = gen_vec_interleave_lowv16qi;
13363 break;
13364 case V8HImode:
13365 if (high_p)
13366 unpack = gen_vec_interleave_highv8hi;
13367 else
13368 unpack = gen_vec_interleave_lowv8hi;
13369 break;
13370 case V4SImode:
13371 if (high_p)
13372 unpack = gen_vec_interleave_highv4si;
13373 else
13374 unpack = gen_vec_interleave_lowv4si;
13375 break;
13376 default:
13377 gcc_unreachable ();
13380 dest = gen_lowpart (imode, operands[0]);
13382 if (unsigned_p)
13383 se = force_reg (imode, CONST0_RTX (imode));
13384 else
13385 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13386 operands[1], pc_rtx, pc_rtx);
13388 emit_insn (unpack (dest, operands[1], se));
13391 /* This function performs the same task as ix86_expand_sse_unpack,
13392 but with SSE4.1 instructions. */
13394 void
13395 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13397 enum machine_mode imode = GET_MODE (operands[1]);
13398 rtx (*unpack)(rtx, rtx);
13399 rtx src, dest;
13401 switch (imode)
13403 case V16QImode:
13404 if (unsigned_p)
13405 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13406 else
13407 unpack = gen_sse4_1_extendv8qiv8hi2;
13408 break;
13409 case V8HImode:
13410 if (unsigned_p)
13411 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13412 else
13413 unpack = gen_sse4_1_extendv4hiv4si2;
13414 break;
13415 case V4SImode:
13416 if (unsigned_p)
13417 unpack = gen_sse4_1_zero_extendv2siv2di2;
13418 else
13419 unpack = gen_sse4_1_extendv2siv2di2;
13420 break;
13421 default:
13422 gcc_unreachable ();
13425 dest = operands[0];
13426 if (high_p)
13428 /* Shift higher 8 bytes to lower 8 bytes. */
13429 src = gen_reg_rtx (imode);
13430 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13431 gen_lowpart (TImode, operands[1]),
13432 GEN_INT (64)));
13434 else
13435 src = operands[1];
13437 emit_insn (unpack (dest, src));
13440 /* This function performs the same task as ix86_expand_sse_unpack,
13441 but with amdfam15 instructions. */
13443 #define PPERM_SRC 0x00 /* copy source */
13444 #define PPERM_INVERT 0x20 /* invert source */
13445 #define PPERM_REVERSE 0x40 /* bit reverse source */
13446 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13447 #define PPERM_ZERO 0x80 /* all 0's */
13448 #define PPERM_ONES 0xa0 /* all 1's */
13449 #define PPERM_SIGN 0xc0 /* propigate sign bit */
13450 #define PPERM_INV_SIGN 0xe0 /* invert & propigate sign */
13452 #define PPERM_SRC1 0x00 /* use first source byte */
13453 #define PPERM_SRC2 0x10 /* use second source byte */
13455 void
13456 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13458 enum machine_mode imode = GET_MODE (operands[1]);
13459 int pperm_bytes[16];
13460 int i;
13461 int h = (high_p) ? 8 : 0;
13462 int h2;
13463 int sign_extend;
13464 rtvec v = rtvec_alloc (16);
13465 rtvec vs;
13466 rtx x, p;
13467 rtx op0 = operands[0], op1 = operands[1];
13469 switch (imode)
13471 case V16QImode:
13472 vs = rtvec_alloc (8);
13473 h2 = (high_p) ? 8 : 0;
13474 for (i = 0; i < 8; i++)
13476 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13477 pperm_bytes[2*i+1] = ((unsigned_p)
13478 ? PPERM_ZERO
13479 : PPERM_SIGN | PPERM_SRC2 | i | h);
13482 for (i = 0; i < 16; i++)
13483 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13485 for (i = 0; i < 8; i++)
13486 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13488 p = gen_rtx_PARALLEL (VOIDmode, vs);
13489 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13490 if (unsigned_p)
13491 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13492 else
13493 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13494 break;
13496 case V8HImode:
13497 vs = rtvec_alloc (4);
13498 h2 = (high_p) ? 4 : 0;
13499 for (i = 0; i < 4; i++)
13501 sign_extend = ((unsigned_p)
13502 ? PPERM_ZERO
13503 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13504 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13505 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13506 pperm_bytes[4*i+2] = sign_extend;
13507 pperm_bytes[4*i+3] = sign_extend;
13510 for (i = 0; i < 16; i++)
13511 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13513 for (i = 0; i < 4; i++)
13514 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13516 p = gen_rtx_PARALLEL (VOIDmode, vs);
13517 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13518 if (unsigned_p)
13519 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13520 else
13521 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13522 break;
13524 case V4SImode:
13525 vs = rtvec_alloc (2);
13526 h2 = (high_p) ? 2 : 0;
13527 for (i = 0; i < 2; i++)
13529 sign_extend = ((unsigned_p)
13530 ? PPERM_ZERO
13531 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13532 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13533 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13534 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13535 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13536 pperm_bytes[8*i+4] = sign_extend;
13537 pperm_bytes[8*i+5] = sign_extend;
13538 pperm_bytes[8*i+6] = sign_extend;
13539 pperm_bytes[8*i+7] = sign_extend;
13542 for (i = 0; i < 16; i++)
13543 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13545 for (i = 0; i < 2; i++)
13546 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13548 p = gen_rtx_PARALLEL (VOIDmode, vs);
13549 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13550 if (unsigned_p)
13551 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13552 else
13553 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13554 break;
13556 default:
13557 gcc_unreachable ();
13560 return;
13563 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13564 next narrower integer vector type */
13565 void
13566 ix86_expand_sse5_pack (rtx operands[3])
13568 enum machine_mode imode = GET_MODE (operands[0]);
13569 int pperm_bytes[16];
13570 int i;
13571 rtvec v = rtvec_alloc (16);
13572 rtx x;
13573 rtx op0 = operands[0];
13574 rtx op1 = operands[1];
13575 rtx op2 = operands[2];
13577 switch (imode)
13579 case V16QImode:
13580 for (i = 0; i < 8; i++)
13582 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13583 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13586 for (i = 0; i < 16; i++)
13587 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13589 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13590 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13591 break;
13593 case V8HImode:
13594 for (i = 0; i < 4; i++)
13596 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13597 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13598 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13599 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13602 for (i = 0; i < 16; i++)
13603 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13605 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13606 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13607 break;
13609 case V4SImode:
13610 for (i = 0; i < 2; i++)
13612 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13613 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13614 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13615 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13616 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13617 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13618 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13619 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13622 for (i = 0; i < 16; i++)
13623 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13625 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13626 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13627 break;
13629 default:
13630 gcc_unreachable ();
13633 return;
13636 /* Expand conditional increment or decrement using adb/sbb instructions.
13637 The default case using setcc followed by the conditional move can be
13638 done by generic code. */
13640 ix86_expand_int_addcc (rtx operands[])
13642 enum rtx_code code = GET_CODE (operands[1]);
13643 rtx compare_op;
13644 rtx val = const0_rtx;
13645 bool fpcmp = false;
13646 enum machine_mode mode = GET_MODE (operands[0]);
13648 if (operands[3] != const1_rtx
13649 && operands[3] != constm1_rtx)
13650 return 0;
13651 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13652 ix86_compare_op1, &compare_op))
13653 return 0;
13654 code = GET_CODE (compare_op);
13656 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13657 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13659 fpcmp = true;
13660 code = ix86_fp_compare_code_to_integer (code);
13663 if (code != LTU)
13665 val = constm1_rtx;
13666 if (fpcmp)
13667 PUT_CODE (compare_op,
13668 reverse_condition_maybe_unordered
13669 (GET_CODE (compare_op)));
13670 else
13671 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13673 PUT_MODE (compare_op, mode);
13675 /* Construct either adc or sbb insn. */
13676 if ((code == LTU) == (operands[3] == constm1_rtx))
13678 switch (GET_MODE (operands[0]))
13680 case QImode:
13681 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13682 break;
13683 case HImode:
13684 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13685 break;
13686 case SImode:
13687 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13688 break;
13689 case DImode:
13690 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13691 break;
13692 default:
13693 gcc_unreachable ();
13696 else
13698 switch (GET_MODE (operands[0]))
13700 case QImode:
13701 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13702 break;
13703 case HImode:
13704 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13705 break;
13706 case SImode:
13707 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13708 break;
13709 case DImode:
13710 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13711 break;
13712 default:
13713 gcc_unreachable ();
13716 return 1; /* DONE */
13720 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13721 works for floating pointer parameters and nonoffsetable memories.
13722 For pushes, it returns just stack offsets; the values will be saved
13723 in the right order. Maximally three parts are generated. */
13725 static int
13726 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13728 int size;
13730 if (!TARGET_64BIT)
13731 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13732 else
13733 size = (GET_MODE_SIZE (mode) + 4) / 8;
13735 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13736 gcc_assert (size >= 2 && size <= 3);
13738 /* Optimize constant pool reference to immediates. This is used by fp
13739 moves, that force all constants to memory to allow combining. */
13740 if (MEM_P (operand) && MEM_READONLY_P (operand))
13742 rtx tmp = maybe_get_pool_constant (operand);
13743 if (tmp)
13744 operand = tmp;
13747 if (MEM_P (operand) && !offsettable_memref_p (operand))
13749 /* The only non-offsetable memories we handle are pushes. */
13750 int ok = push_operand (operand, VOIDmode);
13752 gcc_assert (ok);
13754 operand = copy_rtx (operand);
13755 PUT_MODE (operand, Pmode);
13756 parts[0] = parts[1] = parts[2] = operand;
13757 return size;
13760 if (GET_CODE (operand) == CONST_VECTOR)
13762 enum machine_mode imode = int_mode_for_mode (mode);
13763 /* Caution: if we looked through a constant pool memory above,
13764 the operand may actually have a different mode now. That's
13765 ok, since we want to pun this all the way back to an integer. */
13766 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13767 gcc_assert (operand != NULL);
13768 mode = imode;
13771 if (!TARGET_64BIT)
13773 if (mode == DImode)
13774 split_di (&operand, 1, &parts[0], &parts[1]);
13775 else
13777 if (REG_P (operand))
13779 gcc_assert (reload_completed);
13780 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13781 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13782 if (size == 3)
13783 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13785 else if (offsettable_memref_p (operand))
13787 operand = adjust_address (operand, SImode, 0);
13788 parts[0] = operand;
13789 parts[1] = adjust_address (operand, SImode, 4);
13790 if (size == 3)
13791 parts[2] = adjust_address (operand, SImode, 8);
13793 else if (GET_CODE (operand) == CONST_DOUBLE)
13795 REAL_VALUE_TYPE r;
13796 long l[4];
13798 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13799 switch (mode)
13801 case XFmode:
13802 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13803 parts[2] = gen_int_mode (l[2], SImode);
13804 break;
13805 case DFmode:
13806 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13807 break;
13808 default:
13809 gcc_unreachable ();
13811 parts[1] = gen_int_mode (l[1], SImode);
13812 parts[0] = gen_int_mode (l[0], SImode);
13814 else
13815 gcc_unreachable ();
13818 else
13820 if (mode == TImode)
13821 split_ti (&operand, 1, &parts[0], &parts[1]);
13822 if (mode == XFmode || mode == TFmode)
13824 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13825 if (REG_P (operand))
13827 gcc_assert (reload_completed);
13828 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13829 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13831 else if (offsettable_memref_p (operand))
13833 operand = adjust_address (operand, DImode, 0);
13834 parts[0] = operand;
13835 parts[1] = adjust_address (operand, upper_mode, 8);
13837 else if (GET_CODE (operand) == CONST_DOUBLE)
13839 REAL_VALUE_TYPE r;
13840 long l[4];
13842 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13843 real_to_target (l, &r, mode);
13845 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13846 if (HOST_BITS_PER_WIDE_INT >= 64)
13847 parts[0]
13848 = gen_int_mode
13849 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13850 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13851 DImode);
13852 else
13853 parts[0] = immed_double_const (l[0], l[1], DImode);
13855 if (upper_mode == SImode)
13856 parts[1] = gen_int_mode (l[2], SImode);
13857 else if (HOST_BITS_PER_WIDE_INT >= 64)
13858 parts[1]
13859 = gen_int_mode
13860 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13861 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13862 DImode);
13863 else
13864 parts[1] = immed_double_const (l[2], l[3], DImode);
13866 else
13867 gcc_unreachable ();
13871 return size;
13874 /* Emit insns to perform a move or push of DI, DF, and XF values.
13875 Return false when normal moves are needed; true when all required
13876 insns have been emitted. Operands 2-4 contain the input values
13877 int the correct order; operands 5-7 contain the output values. */
13879 void
13880 ix86_split_long_move (rtx operands[])
13882 rtx part[2][3];
13883 int nparts;
13884 int push = 0;
13885 int collisions = 0;
13886 enum machine_mode mode = GET_MODE (operands[0]);
13888 /* The DFmode expanders may ask us to move double.
13889 For 64bit target this is single move. By hiding the fact
13890 here we simplify i386.md splitters. */
13891 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13893 /* Optimize constant pool reference to immediates. This is used by
13894 fp moves, that force all constants to memory to allow combining. */
13896 if (MEM_P (operands[1])
13897 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13898 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13899 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13900 if (push_operand (operands[0], VOIDmode))
13902 operands[0] = copy_rtx (operands[0]);
13903 PUT_MODE (operands[0], Pmode);
13905 else
13906 operands[0] = gen_lowpart (DImode, operands[0]);
13907 operands[1] = gen_lowpart (DImode, operands[1]);
13908 emit_move_insn (operands[0], operands[1]);
13909 return;
13912 /* The only non-offsettable memory we handle is push. */
13913 if (push_operand (operands[0], VOIDmode))
13914 push = 1;
13915 else
13916 gcc_assert (!MEM_P (operands[0])
13917 || offsettable_memref_p (operands[0]));
13919 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13920 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13922 /* When emitting push, take care for source operands on the stack. */
13923 if (push && MEM_P (operands[1])
13924 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13926 if (nparts == 3)
13927 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13928 XEXP (part[1][2], 0));
13929 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13930 XEXP (part[1][1], 0));
13933 /* We need to do copy in the right order in case an address register
13934 of the source overlaps the destination. */
13935 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13937 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13938 collisions++;
13939 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13940 collisions++;
13941 if (nparts == 3
13942 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13943 collisions++;
13945 /* Collision in the middle part can be handled by reordering. */
13946 if (collisions == 1 && nparts == 3
13947 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13949 rtx tmp;
13950 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13951 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13954 /* If there are more collisions, we can't handle it by reordering.
13955 Do an lea to the last part and use only one colliding move. */
13956 else if (collisions > 1)
13958 rtx base;
13960 collisions = 1;
13962 base = part[0][nparts - 1];
13964 /* Handle the case when the last part isn't valid for lea.
13965 Happens in 64-bit mode storing the 12-byte XFmode. */
13966 if (GET_MODE (base) != Pmode)
13967 base = gen_rtx_REG (Pmode, REGNO (base));
13969 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13970 part[1][0] = replace_equiv_address (part[1][0], base);
13971 part[1][1] = replace_equiv_address (part[1][1],
13972 plus_constant (base, UNITS_PER_WORD));
13973 if (nparts == 3)
13974 part[1][2] = replace_equiv_address (part[1][2],
13975 plus_constant (base, 8));
13979 if (push)
13981 if (!TARGET_64BIT)
13983 if (nparts == 3)
13985 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13986 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13987 emit_move_insn (part[0][2], part[1][2]);
13990 else
13992 /* In 64bit mode we don't have 32bit push available. In case this is
13993 register, it is OK - we will just use larger counterpart. We also
13994 retype memory - these comes from attempt to avoid REX prefix on
13995 moving of second half of TFmode value. */
13996 if (GET_MODE (part[1][1]) == SImode)
13998 switch (GET_CODE (part[1][1]))
14000 case MEM:
14001 part[1][1] = adjust_address (part[1][1], DImode, 0);
14002 break;
14004 case REG:
14005 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14006 break;
14008 default:
14009 gcc_unreachable ();
14012 if (GET_MODE (part[1][0]) == SImode)
14013 part[1][0] = part[1][1];
14016 emit_move_insn (part[0][1], part[1][1]);
14017 emit_move_insn (part[0][0], part[1][0]);
14018 return;
14021 /* Choose correct order to not overwrite the source before it is copied. */
14022 if ((REG_P (part[0][0])
14023 && REG_P (part[1][1])
14024 && (REGNO (part[0][0]) == REGNO (part[1][1])
14025 || (nparts == 3
14026 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14027 || (collisions > 0
14028 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14030 if (nparts == 3)
14032 operands[2] = part[0][2];
14033 operands[3] = part[0][1];
14034 operands[4] = part[0][0];
14035 operands[5] = part[1][2];
14036 operands[6] = part[1][1];
14037 operands[7] = part[1][0];
14039 else
14041 operands[2] = part[0][1];
14042 operands[3] = part[0][0];
14043 operands[5] = part[1][1];
14044 operands[6] = part[1][0];
14047 else
14049 if (nparts == 3)
14051 operands[2] = part[0][0];
14052 operands[3] = part[0][1];
14053 operands[4] = part[0][2];
14054 operands[5] = part[1][0];
14055 operands[6] = part[1][1];
14056 operands[7] = part[1][2];
14058 else
14060 operands[2] = part[0][0];
14061 operands[3] = part[0][1];
14062 operands[5] = part[1][0];
14063 operands[6] = part[1][1];
14067 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14068 if (optimize_size)
14070 if (CONST_INT_P (operands[5])
14071 && operands[5] != const0_rtx
14072 && REG_P (operands[2]))
14074 if (CONST_INT_P (operands[6])
14075 && INTVAL (operands[6]) == INTVAL (operands[5]))
14076 operands[6] = operands[2];
14078 if (nparts == 3
14079 && CONST_INT_P (operands[7])
14080 && INTVAL (operands[7]) == INTVAL (operands[5]))
14081 operands[7] = operands[2];
14084 if (nparts == 3
14085 && CONST_INT_P (operands[6])
14086 && operands[6] != const0_rtx
14087 && REG_P (operands[3])
14088 && CONST_INT_P (operands[7])
14089 && INTVAL (operands[7]) == INTVAL (operands[6]))
14090 operands[7] = operands[3];
14093 emit_move_insn (operands[2], operands[5]);
14094 emit_move_insn (operands[3], operands[6]);
14095 if (nparts == 3)
14096 emit_move_insn (operands[4], operands[7]);
14098 return;
14101 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14102 left shift by a constant, either using a single shift or
14103 a sequence of add instructions. */
14105 static void
14106 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14108 if (count == 1)
14110 emit_insn ((mode == DImode
14111 ? gen_addsi3
14112 : gen_adddi3) (operand, operand, operand));
14114 else if (!optimize_size
14115 && count * ix86_cost->add <= ix86_cost->shift_const)
14117 int i;
14118 for (i=0; i<count; i++)
14120 emit_insn ((mode == DImode
14121 ? gen_addsi3
14122 : gen_adddi3) (operand, operand, operand));
14125 else
14126 emit_insn ((mode == DImode
14127 ? gen_ashlsi3
14128 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14131 void
14132 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14134 rtx low[2], high[2];
14135 int count;
14136 const int single_width = mode == DImode ? 32 : 64;
14138 if (CONST_INT_P (operands[2]))
14140 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14141 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14143 if (count >= single_width)
14145 emit_move_insn (high[0], low[1]);
14146 emit_move_insn (low[0], const0_rtx);
14148 if (count > single_width)
14149 ix86_expand_ashl_const (high[0], count - single_width, mode);
14151 else
14153 if (!rtx_equal_p (operands[0], operands[1]))
14154 emit_move_insn (operands[0], operands[1]);
14155 emit_insn ((mode == DImode
14156 ? gen_x86_shld_1
14157 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14158 ix86_expand_ashl_const (low[0], count, mode);
14160 return;
14163 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14165 if (operands[1] == const1_rtx)
14167 /* Assuming we've chosen a QImode capable registers, then 1 << N
14168 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14169 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14171 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14173 ix86_expand_clear (low[0]);
14174 ix86_expand_clear (high[0]);
14175 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14177 d = gen_lowpart (QImode, low[0]);
14178 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14179 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14180 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14182 d = gen_lowpart (QImode, high[0]);
14183 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14184 s = gen_rtx_NE (QImode, flags, const0_rtx);
14185 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14188 /* Otherwise, we can get the same results by manually performing
14189 a bit extract operation on bit 5/6, and then performing the two
14190 shifts. The two methods of getting 0/1 into low/high are exactly
14191 the same size. Avoiding the shift in the bit extract case helps
14192 pentium4 a bit; no one else seems to care much either way. */
14193 else
14195 rtx x;
14197 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14198 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14199 else
14200 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14201 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14203 emit_insn ((mode == DImode
14204 ? gen_lshrsi3
14205 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14206 emit_insn ((mode == DImode
14207 ? gen_andsi3
14208 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14209 emit_move_insn (low[0], high[0]);
14210 emit_insn ((mode == DImode
14211 ? gen_xorsi3
14212 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14215 emit_insn ((mode == DImode
14216 ? gen_ashlsi3
14217 : gen_ashldi3) (low[0], low[0], operands[2]));
14218 emit_insn ((mode == DImode
14219 ? gen_ashlsi3
14220 : gen_ashldi3) (high[0], high[0], operands[2]));
14221 return;
14224 if (operands[1] == constm1_rtx)
14226 /* For -1 << N, we can avoid the shld instruction, because we
14227 know that we're shifting 0...31/63 ones into a -1. */
14228 emit_move_insn (low[0], constm1_rtx);
14229 if (optimize_size)
14230 emit_move_insn (high[0], low[0]);
14231 else
14232 emit_move_insn (high[0], constm1_rtx);
14234 else
14236 if (!rtx_equal_p (operands[0], operands[1]))
14237 emit_move_insn (operands[0], operands[1]);
14239 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14240 emit_insn ((mode == DImode
14241 ? gen_x86_shld_1
14242 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14245 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14247 if (TARGET_CMOVE && scratch)
14249 ix86_expand_clear (scratch);
14250 emit_insn ((mode == DImode
14251 ? gen_x86_shift_adj_1
14252 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14254 else
14255 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14258 void
14259 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14261 rtx low[2], high[2];
14262 int count;
14263 const int single_width = mode == DImode ? 32 : 64;
14265 if (CONST_INT_P (operands[2]))
14267 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14268 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14270 if (count == single_width * 2 - 1)
14272 emit_move_insn (high[0], high[1]);
14273 emit_insn ((mode == DImode
14274 ? gen_ashrsi3
14275 : gen_ashrdi3) (high[0], high[0],
14276 GEN_INT (single_width - 1)));
14277 emit_move_insn (low[0], high[0]);
14280 else if (count >= single_width)
14282 emit_move_insn (low[0], high[1]);
14283 emit_move_insn (high[0], low[0]);
14284 emit_insn ((mode == DImode
14285 ? gen_ashrsi3
14286 : gen_ashrdi3) (high[0], high[0],
14287 GEN_INT (single_width - 1)));
14288 if (count > single_width)
14289 emit_insn ((mode == DImode
14290 ? gen_ashrsi3
14291 : gen_ashrdi3) (low[0], low[0],
14292 GEN_INT (count - single_width)));
14294 else
14296 if (!rtx_equal_p (operands[0], operands[1]))
14297 emit_move_insn (operands[0], operands[1]);
14298 emit_insn ((mode == DImode
14299 ? gen_x86_shrd_1
14300 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14301 emit_insn ((mode == DImode
14302 ? gen_ashrsi3
14303 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14306 else
14308 if (!rtx_equal_p (operands[0], operands[1]))
14309 emit_move_insn (operands[0], operands[1]);
14311 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14313 emit_insn ((mode == DImode
14314 ? gen_x86_shrd_1
14315 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14316 emit_insn ((mode == DImode
14317 ? gen_ashrsi3
14318 : gen_ashrdi3) (high[0], high[0], operands[2]));
14320 if (TARGET_CMOVE && scratch)
14322 emit_move_insn (scratch, high[0]);
14323 emit_insn ((mode == DImode
14324 ? gen_ashrsi3
14325 : gen_ashrdi3) (scratch, scratch,
14326 GEN_INT (single_width - 1)));
14327 emit_insn ((mode == DImode
14328 ? gen_x86_shift_adj_1
14329 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14330 scratch));
14332 else
14333 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14337 void
14338 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14340 rtx low[2], high[2];
14341 int count;
14342 const int single_width = mode == DImode ? 32 : 64;
14344 if (CONST_INT_P (operands[2]))
14346 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14347 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14349 if (count >= single_width)
14351 emit_move_insn (low[0], high[1]);
14352 ix86_expand_clear (high[0]);
14354 if (count > single_width)
14355 emit_insn ((mode == DImode
14356 ? gen_lshrsi3
14357 : gen_lshrdi3) (low[0], low[0],
14358 GEN_INT (count - single_width)));
14360 else
14362 if (!rtx_equal_p (operands[0], operands[1]))
14363 emit_move_insn (operands[0], operands[1]);
14364 emit_insn ((mode == DImode
14365 ? gen_x86_shrd_1
14366 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14367 emit_insn ((mode == DImode
14368 ? gen_lshrsi3
14369 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14372 else
14374 if (!rtx_equal_p (operands[0], operands[1]))
14375 emit_move_insn (operands[0], operands[1]);
14377 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14379 emit_insn ((mode == DImode
14380 ? gen_x86_shrd_1
14381 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14382 emit_insn ((mode == DImode
14383 ? gen_lshrsi3
14384 : gen_lshrdi3) (high[0], high[0], operands[2]));
14386 /* Heh. By reversing the arguments, we can reuse this pattern. */
14387 if (TARGET_CMOVE && scratch)
14389 ix86_expand_clear (scratch);
14390 emit_insn ((mode == DImode
14391 ? gen_x86_shift_adj_1
14392 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14393 scratch));
14395 else
14396 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14400 /* Predict just emitted jump instruction to be taken with probability PROB. */
14401 static void
14402 predict_jump (int prob)
14404 rtx insn = get_last_insn ();
14405 gcc_assert (JUMP_P (insn));
14406 REG_NOTES (insn)
14407 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14408 GEN_INT (prob),
14409 REG_NOTES (insn));
14412 /* Helper function for the string operations below. Dest VARIABLE whether
14413 it is aligned to VALUE bytes. If true, jump to the label. */
14414 static rtx
14415 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14417 rtx label = gen_label_rtx ();
14418 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14419 if (GET_MODE (variable) == DImode)
14420 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14421 else
14422 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14423 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14424 1, label);
14425 if (epilogue)
14426 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14427 else
14428 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14429 return label;
14432 /* Adjust COUNTER by the VALUE. */
14433 static void
14434 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14436 if (GET_MODE (countreg) == DImode)
14437 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14438 else
14439 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14442 /* Zero extend possibly SImode EXP to Pmode register. */
14444 ix86_zero_extend_to_Pmode (rtx exp)
14446 rtx r;
14447 if (GET_MODE (exp) == VOIDmode)
14448 return force_reg (Pmode, exp);
14449 if (GET_MODE (exp) == Pmode)
14450 return copy_to_mode_reg (Pmode, exp);
14451 r = gen_reg_rtx (Pmode);
14452 emit_insn (gen_zero_extendsidi2 (r, exp));
14453 return r;
14456 /* Divide COUNTREG by SCALE. */
14457 static rtx
14458 scale_counter (rtx countreg, int scale)
14460 rtx sc;
14461 rtx piece_size_mask;
14463 if (scale == 1)
14464 return countreg;
14465 if (CONST_INT_P (countreg))
14466 return GEN_INT (INTVAL (countreg) / scale);
14467 gcc_assert (REG_P (countreg));
14469 piece_size_mask = GEN_INT (scale - 1);
14470 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14471 GEN_INT (exact_log2 (scale)),
14472 NULL, 1, OPTAB_DIRECT);
14473 return sc;
14476 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14477 DImode for constant loop counts. */
14479 static enum machine_mode
14480 counter_mode (rtx count_exp)
14482 if (GET_MODE (count_exp) != VOIDmode)
14483 return GET_MODE (count_exp);
14484 if (GET_CODE (count_exp) != CONST_INT)
14485 return Pmode;
14486 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14487 return DImode;
14488 return SImode;
14491 /* When SRCPTR is non-NULL, output simple loop to move memory
14492 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14493 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14494 equivalent loop to set memory by VALUE (supposed to be in MODE).
14496 The size is rounded down to whole number of chunk size moved at once.
14497 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14500 static void
14501 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14502 rtx destptr, rtx srcptr, rtx value,
14503 rtx count, enum machine_mode mode, int unroll,
14504 int expected_size)
14506 rtx out_label, top_label, iter, tmp;
14507 enum machine_mode iter_mode = counter_mode (count);
14508 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14509 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14510 rtx size;
14511 rtx x_addr;
14512 rtx y_addr;
14513 int i;
14515 top_label = gen_label_rtx ();
14516 out_label = gen_label_rtx ();
14517 iter = gen_reg_rtx (iter_mode);
14519 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14520 NULL, 1, OPTAB_DIRECT);
14521 /* Those two should combine. */
14522 if (piece_size == const1_rtx)
14524 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14525 true, out_label);
14526 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14528 emit_move_insn (iter, const0_rtx);
14530 emit_label (top_label);
14532 tmp = convert_modes (Pmode, iter_mode, iter, true);
14533 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14534 destmem = change_address (destmem, mode, x_addr);
14536 if (srcmem)
14538 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14539 srcmem = change_address (srcmem, mode, y_addr);
14541 /* When unrolling for chips that reorder memory reads and writes,
14542 we can save registers by using single temporary.
14543 Also using 4 temporaries is overkill in 32bit mode. */
14544 if (!TARGET_64BIT && 0)
14546 for (i = 0; i < unroll; i++)
14548 if (i)
14550 destmem =
14551 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14552 srcmem =
14553 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14555 emit_move_insn (destmem, srcmem);
14558 else
14560 rtx tmpreg[4];
14561 gcc_assert (unroll <= 4);
14562 for (i = 0; i < unroll; i++)
14564 tmpreg[i] = gen_reg_rtx (mode);
14565 if (i)
14567 srcmem =
14568 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14570 emit_move_insn (tmpreg[i], srcmem);
14572 for (i = 0; i < unroll; i++)
14574 if (i)
14576 destmem =
14577 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14579 emit_move_insn (destmem, tmpreg[i]);
14583 else
14584 for (i = 0; i < unroll; i++)
14586 if (i)
14587 destmem =
14588 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14589 emit_move_insn (destmem, value);
14592 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14593 true, OPTAB_LIB_WIDEN);
14594 if (tmp != iter)
14595 emit_move_insn (iter, tmp);
14597 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14598 true, top_label);
14599 if (expected_size != -1)
14601 expected_size /= GET_MODE_SIZE (mode) * unroll;
14602 if (expected_size == 0)
14603 predict_jump (0);
14604 else if (expected_size > REG_BR_PROB_BASE)
14605 predict_jump (REG_BR_PROB_BASE - 1);
14606 else
14607 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14609 else
14610 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14611 iter = ix86_zero_extend_to_Pmode (iter);
14612 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14613 true, OPTAB_LIB_WIDEN);
14614 if (tmp != destptr)
14615 emit_move_insn (destptr, tmp);
14616 if (srcptr)
14618 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14619 true, OPTAB_LIB_WIDEN);
14620 if (tmp != srcptr)
14621 emit_move_insn (srcptr, tmp);
14623 emit_label (out_label);
14626 /* Output "rep; mov" instruction.
14627 Arguments have same meaning as for previous function */
14628 static void
14629 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14630 rtx destptr, rtx srcptr,
14631 rtx count,
14632 enum machine_mode mode)
14634 rtx destexp;
14635 rtx srcexp;
14636 rtx countreg;
14638 /* If the size is known, it is shorter to use rep movs. */
14639 if (mode == QImode && CONST_INT_P (count)
14640 && !(INTVAL (count) & 3))
14641 mode = SImode;
14643 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14644 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14645 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14646 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14647 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14648 if (mode != QImode)
14650 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14651 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14652 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14653 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14654 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14655 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14657 else
14659 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14660 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14662 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14663 destexp, srcexp));
14666 /* Output "rep; stos" instruction.
14667 Arguments have same meaning as for previous function */
14668 static void
14669 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14670 rtx count,
14671 enum machine_mode mode)
14673 rtx destexp;
14674 rtx countreg;
14676 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14677 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14678 value = force_reg (mode, gen_lowpart (mode, value));
14679 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14680 if (mode != QImode)
14682 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14683 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14684 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14686 else
14687 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14688 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14691 static void
14692 emit_strmov (rtx destmem, rtx srcmem,
14693 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14695 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14696 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14697 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14700 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14701 static void
14702 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14703 rtx destptr, rtx srcptr, rtx count, int max_size)
14705 rtx src, dest;
14706 if (CONST_INT_P (count))
14708 HOST_WIDE_INT countval = INTVAL (count);
14709 int offset = 0;
14711 if ((countval & 0x10) && max_size > 16)
14713 if (TARGET_64BIT)
14715 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14716 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14718 else
14719 gcc_unreachable ();
14720 offset += 16;
14722 if ((countval & 0x08) && max_size > 8)
14724 if (TARGET_64BIT)
14725 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14726 else
14728 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14729 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14731 offset += 8;
14733 if ((countval & 0x04) && max_size > 4)
14735 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14736 offset += 4;
14738 if ((countval & 0x02) && max_size > 2)
14740 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14741 offset += 2;
14743 if ((countval & 0x01) && max_size > 1)
14745 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14746 offset += 1;
14748 return;
14750 if (max_size > 8)
14752 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14753 count, 1, OPTAB_DIRECT);
14754 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14755 count, QImode, 1, 4);
14756 return;
14759 /* When there are stringops, we can cheaply increase dest and src pointers.
14760 Otherwise we save code size by maintaining offset (zero is readily
14761 available from preceding rep operation) and using x86 addressing modes.
14763 if (TARGET_SINGLE_STRINGOP)
14765 if (max_size > 4)
14767 rtx label = ix86_expand_aligntest (count, 4, true);
14768 src = change_address (srcmem, SImode, srcptr);
14769 dest = change_address (destmem, SImode, destptr);
14770 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14771 emit_label (label);
14772 LABEL_NUSES (label) = 1;
14774 if (max_size > 2)
14776 rtx label = ix86_expand_aligntest (count, 2, true);
14777 src = change_address (srcmem, HImode, srcptr);
14778 dest = change_address (destmem, HImode, destptr);
14779 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14780 emit_label (label);
14781 LABEL_NUSES (label) = 1;
14783 if (max_size > 1)
14785 rtx label = ix86_expand_aligntest (count, 1, true);
14786 src = change_address (srcmem, QImode, srcptr);
14787 dest = change_address (destmem, QImode, destptr);
14788 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14789 emit_label (label);
14790 LABEL_NUSES (label) = 1;
14793 else
14795 rtx offset = force_reg (Pmode, const0_rtx);
14796 rtx tmp;
14798 if (max_size > 4)
14800 rtx label = ix86_expand_aligntest (count, 4, true);
14801 src = change_address (srcmem, SImode, srcptr);
14802 dest = change_address (destmem, SImode, destptr);
14803 emit_move_insn (dest, src);
14804 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14805 true, OPTAB_LIB_WIDEN);
14806 if (tmp != offset)
14807 emit_move_insn (offset, tmp);
14808 emit_label (label);
14809 LABEL_NUSES (label) = 1;
14811 if (max_size > 2)
14813 rtx label = ix86_expand_aligntest (count, 2, true);
14814 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14815 src = change_address (srcmem, HImode, tmp);
14816 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14817 dest = change_address (destmem, HImode, tmp);
14818 emit_move_insn (dest, src);
14819 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14820 true, OPTAB_LIB_WIDEN);
14821 if (tmp != offset)
14822 emit_move_insn (offset, tmp);
14823 emit_label (label);
14824 LABEL_NUSES (label) = 1;
14826 if (max_size > 1)
14828 rtx label = ix86_expand_aligntest (count, 1, true);
14829 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14830 src = change_address (srcmem, QImode, tmp);
14831 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14832 dest = change_address (destmem, QImode, tmp);
14833 emit_move_insn (dest, src);
14834 emit_label (label);
14835 LABEL_NUSES (label) = 1;
14840 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14841 static void
14842 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14843 rtx count, int max_size)
14845 count =
14846 expand_simple_binop (counter_mode (count), AND, count,
14847 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14848 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14849 gen_lowpart (QImode, value), count, QImode,
14850 1, max_size / 2);
14853 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14854 static void
14855 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14857 rtx dest;
14859 if (CONST_INT_P (count))
14861 HOST_WIDE_INT countval = INTVAL (count);
14862 int offset = 0;
14864 if ((countval & 0x10) && max_size > 16)
14866 if (TARGET_64BIT)
14868 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14869 emit_insn (gen_strset (destptr, dest, value));
14870 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14871 emit_insn (gen_strset (destptr, dest, value));
14873 else
14874 gcc_unreachable ();
14875 offset += 16;
14877 if ((countval & 0x08) && max_size > 8)
14879 if (TARGET_64BIT)
14881 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14882 emit_insn (gen_strset (destptr, dest, value));
14884 else
14886 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14887 emit_insn (gen_strset (destptr, dest, value));
14888 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14889 emit_insn (gen_strset (destptr, dest, value));
14891 offset += 8;
14893 if ((countval & 0x04) && max_size > 4)
14895 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14896 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14897 offset += 4;
14899 if ((countval & 0x02) && max_size > 2)
14901 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14902 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14903 offset += 2;
14905 if ((countval & 0x01) && max_size > 1)
14907 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14908 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14909 offset += 1;
14911 return;
14913 if (max_size > 32)
14915 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14916 return;
14918 if (max_size > 16)
14920 rtx label = ix86_expand_aligntest (count, 16, true);
14921 if (TARGET_64BIT)
14923 dest = change_address (destmem, DImode, destptr);
14924 emit_insn (gen_strset (destptr, dest, value));
14925 emit_insn (gen_strset (destptr, dest, value));
14927 else
14929 dest = change_address (destmem, SImode, destptr);
14930 emit_insn (gen_strset (destptr, dest, value));
14931 emit_insn (gen_strset (destptr, dest, value));
14932 emit_insn (gen_strset (destptr, dest, value));
14933 emit_insn (gen_strset (destptr, dest, value));
14935 emit_label (label);
14936 LABEL_NUSES (label) = 1;
14938 if (max_size > 8)
14940 rtx label = ix86_expand_aligntest (count, 8, true);
14941 if (TARGET_64BIT)
14943 dest = change_address (destmem, DImode, destptr);
14944 emit_insn (gen_strset (destptr, dest, value));
14946 else
14948 dest = change_address (destmem, SImode, destptr);
14949 emit_insn (gen_strset (destptr, dest, value));
14950 emit_insn (gen_strset (destptr, dest, value));
14952 emit_label (label);
14953 LABEL_NUSES (label) = 1;
14955 if (max_size > 4)
14957 rtx label = ix86_expand_aligntest (count, 4, true);
14958 dest = change_address (destmem, SImode, destptr);
14959 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14960 emit_label (label);
14961 LABEL_NUSES (label) = 1;
14963 if (max_size > 2)
14965 rtx label = ix86_expand_aligntest (count, 2, true);
14966 dest = change_address (destmem, HImode, destptr);
14967 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14968 emit_label (label);
14969 LABEL_NUSES (label) = 1;
14971 if (max_size > 1)
14973 rtx label = ix86_expand_aligntest (count, 1, true);
14974 dest = change_address (destmem, QImode, destptr);
14975 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14976 emit_label (label);
14977 LABEL_NUSES (label) = 1;
14981 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14982 DESIRED_ALIGNMENT. */
14983 static void
14984 expand_movmem_prologue (rtx destmem, rtx srcmem,
14985 rtx destptr, rtx srcptr, rtx count,
14986 int align, int desired_alignment)
14988 if (align <= 1 && desired_alignment > 1)
14990 rtx label = ix86_expand_aligntest (destptr, 1, false);
14991 srcmem = change_address (srcmem, QImode, srcptr);
14992 destmem = change_address (destmem, QImode, destptr);
14993 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14994 ix86_adjust_counter (count, 1);
14995 emit_label (label);
14996 LABEL_NUSES (label) = 1;
14998 if (align <= 2 && desired_alignment > 2)
15000 rtx label = ix86_expand_aligntest (destptr, 2, false);
15001 srcmem = change_address (srcmem, HImode, srcptr);
15002 destmem = change_address (destmem, HImode, destptr);
15003 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15004 ix86_adjust_counter (count, 2);
15005 emit_label (label);
15006 LABEL_NUSES (label) = 1;
15008 if (align <= 4 && desired_alignment > 4)
15010 rtx label = ix86_expand_aligntest (destptr, 4, false);
15011 srcmem = change_address (srcmem, SImode, srcptr);
15012 destmem = change_address (destmem, SImode, destptr);
15013 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15014 ix86_adjust_counter (count, 4);
15015 emit_label (label);
15016 LABEL_NUSES (label) = 1;
15018 gcc_assert (desired_alignment <= 8);
15021 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15022 DESIRED_ALIGNMENT. */
15023 static void
15024 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15025 int align, int desired_alignment)
15027 if (align <= 1 && desired_alignment > 1)
15029 rtx label = ix86_expand_aligntest (destptr, 1, false);
15030 destmem = change_address (destmem, QImode, destptr);
15031 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15032 ix86_adjust_counter (count, 1);
15033 emit_label (label);
15034 LABEL_NUSES (label) = 1;
15036 if (align <= 2 && desired_alignment > 2)
15038 rtx label = ix86_expand_aligntest (destptr, 2, false);
15039 destmem = change_address (destmem, HImode, destptr);
15040 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15041 ix86_adjust_counter (count, 2);
15042 emit_label (label);
15043 LABEL_NUSES (label) = 1;
15045 if (align <= 4 && desired_alignment > 4)
15047 rtx label = ix86_expand_aligntest (destptr, 4, false);
15048 destmem = change_address (destmem, SImode, destptr);
15049 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15050 ix86_adjust_counter (count, 4);
15051 emit_label (label);
15052 LABEL_NUSES (label) = 1;
15054 gcc_assert (desired_alignment <= 8);
15057 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15058 static enum stringop_alg
15059 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15060 int *dynamic_check)
15062 const struct stringop_algs * algs;
15064 *dynamic_check = -1;
15065 if (memset)
15066 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15067 else
15068 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15069 if (stringop_alg != no_stringop)
15070 return stringop_alg;
15071 /* rep; movq or rep; movl is the smallest variant. */
15072 else if (optimize_size)
15074 if (!count || (count & 3))
15075 return rep_prefix_1_byte;
15076 else
15077 return rep_prefix_4_byte;
15079 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15081 else if (expected_size != -1 && expected_size < 4)
15082 return loop_1_byte;
15083 else if (expected_size != -1)
15085 unsigned int i;
15086 enum stringop_alg alg = libcall;
15087 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15089 gcc_assert (algs->size[i].max);
15090 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15092 if (algs->size[i].alg != libcall)
15093 alg = algs->size[i].alg;
15094 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15095 last non-libcall inline algorithm. */
15096 if (TARGET_INLINE_ALL_STRINGOPS)
15098 /* When the current size is best to be copied by a libcall,
15099 but we are still forced to inline, run the heuristic bellow
15100 that will pick code for medium sized blocks. */
15101 if (alg != libcall)
15102 return alg;
15103 break;
15105 else
15106 return algs->size[i].alg;
15109 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
15111 /* When asked to inline the call anyway, try to pick meaningful choice.
15112 We look for maximal size of block that is faster to copy by hand and
15113 take blocks of at most of that size guessing that average size will
15114 be roughly half of the block.
15116 If this turns out to be bad, we might simply specify the preferred
15117 choice in ix86_costs. */
15118 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15119 && algs->unknown_size == libcall)
15121 int max = -1;
15122 enum stringop_alg alg;
15123 int i;
15125 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15126 if (algs->size[i].alg != libcall && algs->size[i].alg)
15127 max = algs->size[i].max;
15128 if (max == -1)
15129 max = 4096;
15130 alg = decide_alg (count, max / 2, memset, dynamic_check);
15131 gcc_assert (*dynamic_check == -1);
15132 gcc_assert (alg != libcall);
15133 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15134 *dynamic_check = max;
15135 return alg;
15137 return algs->unknown_size;
15140 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15141 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15142 static int
15143 decide_alignment (int align,
15144 enum stringop_alg alg,
15145 int expected_size)
15147 int desired_align = 0;
15148 switch (alg)
15150 case no_stringop:
15151 gcc_unreachable ();
15152 case loop:
15153 case unrolled_loop:
15154 desired_align = GET_MODE_SIZE (Pmode);
15155 break;
15156 case rep_prefix_8_byte:
15157 desired_align = 8;
15158 break;
15159 case rep_prefix_4_byte:
15160 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15161 copying whole cacheline at once. */
15162 if (TARGET_PENTIUMPRO)
15163 desired_align = 8;
15164 else
15165 desired_align = 4;
15166 break;
15167 case rep_prefix_1_byte:
15168 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15169 copying whole cacheline at once. */
15170 if (TARGET_PENTIUMPRO)
15171 desired_align = 8;
15172 else
15173 desired_align = 1;
15174 break;
15175 case loop_1_byte:
15176 desired_align = 1;
15177 break;
15178 case libcall:
15179 return 0;
15182 if (optimize_size)
15183 desired_align = 1;
15184 if (desired_align < align)
15185 desired_align = align;
15186 if (expected_size != -1 && expected_size < 4)
15187 desired_align = align;
15188 return desired_align;
15191 /* Return the smallest power of 2 greater than VAL. */
15192 static int
15193 smallest_pow2_greater_than (int val)
15195 int ret = 1;
15196 while (ret <= val)
15197 ret <<= 1;
15198 return ret;
15201 /* Expand string move (memcpy) operation. Use i386 string operations when
15202 profitable. expand_clrmem contains similar code. The code depends upon
15203 architecture, block size and alignment, but always has the same
15204 overall structure:
15206 1) Prologue guard: Conditional that jumps up to epilogues for small
15207 blocks that can be handled by epilogue alone. This is faster but
15208 also needed for correctness, since prologue assume the block is larger
15209 than the desired alignment.
15211 Optional dynamic check for size and libcall for large
15212 blocks is emitted here too, with -minline-stringops-dynamically.
15214 2) Prologue: copy first few bytes in order to get destination aligned
15215 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15216 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15217 We emit either a jump tree on power of two sized blocks, or a byte loop.
15219 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15220 with specified algorithm.
15222 4) Epilogue: code copying tail of the block that is too small to be
15223 handled by main body (or up to size guarded by prologue guard). */
15226 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15227 rtx expected_align_exp, rtx expected_size_exp)
15229 rtx destreg;
15230 rtx srcreg;
15231 rtx label = NULL;
15232 rtx tmp;
15233 rtx jump_around_label = NULL;
15234 HOST_WIDE_INT align = 1;
15235 unsigned HOST_WIDE_INT count = 0;
15236 HOST_WIDE_INT expected_size = -1;
15237 int size_needed = 0, epilogue_size_needed;
15238 int desired_align = 0;
15239 enum stringop_alg alg;
15240 int dynamic_check;
15242 if (CONST_INT_P (align_exp))
15243 align = INTVAL (align_exp);
15244 /* i386 can do misaligned access on reasonably increased cost. */
15245 if (CONST_INT_P (expected_align_exp)
15246 && INTVAL (expected_align_exp) > align)
15247 align = INTVAL (expected_align_exp);
15248 if (CONST_INT_P (count_exp))
15249 count = expected_size = INTVAL (count_exp);
15250 if (CONST_INT_P (expected_size_exp) && count == 0)
15251 expected_size = INTVAL (expected_size_exp);
15253 /* Step 0: Decide on preferred algorithm, desired alignment and
15254 size of chunks to be copied by main loop. */
15256 alg = decide_alg (count, expected_size, false, &dynamic_check);
15257 desired_align = decide_alignment (align, alg, expected_size);
15259 if (!TARGET_ALIGN_STRINGOPS)
15260 align = desired_align;
15262 if (alg == libcall)
15263 return 0;
15264 gcc_assert (alg != no_stringop);
15265 if (!count)
15266 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15267 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15268 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15269 switch (alg)
15271 case libcall:
15272 case no_stringop:
15273 gcc_unreachable ();
15274 case loop:
15275 size_needed = GET_MODE_SIZE (Pmode);
15276 break;
15277 case unrolled_loop:
15278 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15279 break;
15280 case rep_prefix_8_byte:
15281 size_needed = 8;
15282 break;
15283 case rep_prefix_4_byte:
15284 size_needed = 4;
15285 break;
15286 case rep_prefix_1_byte:
15287 case loop_1_byte:
15288 size_needed = 1;
15289 break;
15292 epilogue_size_needed = size_needed;
15294 /* Step 1: Prologue guard. */
15296 /* Alignment code needs count to be in register. */
15297 if (CONST_INT_P (count_exp) && desired_align > align)
15299 enum machine_mode mode = SImode;
15300 if (TARGET_64BIT && (count & ~0xffffffff))
15301 mode = DImode;
15302 count_exp = force_reg (mode, count_exp);
15304 gcc_assert (desired_align >= 1 && align >= 1);
15306 /* Ensure that alignment prologue won't copy past end of block. */
15307 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15309 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15310 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15311 Make sure it is power of 2. */
15312 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15314 label = gen_label_rtx ();
15315 emit_cmp_and_jump_insns (count_exp,
15316 GEN_INT (epilogue_size_needed),
15317 LTU, 0, counter_mode (count_exp), 1, label);
15318 if (GET_CODE (count_exp) == CONST_INT)
15320 else if (expected_size == -1 || expected_size < epilogue_size_needed)
15321 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15322 else
15323 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15325 /* Emit code to decide on runtime whether library call or inline should be
15326 used. */
15327 if (dynamic_check != -1)
15329 rtx hot_label = gen_label_rtx ();
15330 jump_around_label = gen_label_rtx ();
15331 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15332 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15333 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15334 emit_block_move_via_libcall (dst, src, count_exp, false);
15335 emit_jump (jump_around_label);
15336 emit_label (hot_label);
15339 /* Step 2: Alignment prologue. */
15341 if (desired_align > align)
15343 /* Except for the first move in epilogue, we no longer know
15344 constant offset in aliasing info. It don't seems to worth
15345 the pain to maintain it for the first move, so throw away
15346 the info early. */
15347 src = change_address (src, BLKmode, srcreg);
15348 dst = change_address (dst, BLKmode, destreg);
15349 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15350 desired_align);
15352 if (label && size_needed == 1)
15354 emit_label (label);
15355 LABEL_NUSES (label) = 1;
15356 label = NULL;
15359 /* Step 3: Main loop. */
15361 switch (alg)
15363 case libcall:
15364 case no_stringop:
15365 gcc_unreachable ();
15366 case loop_1_byte:
15367 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15368 count_exp, QImode, 1, expected_size);
15369 break;
15370 case loop:
15371 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15372 count_exp, Pmode, 1, expected_size);
15373 break;
15374 case unrolled_loop:
15375 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15376 registers for 4 temporaries anyway. */
15377 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15378 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15379 expected_size);
15380 break;
15381 case rep_prefix_8_byte:
15382 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15383 DImode);
15384 break;
15385 case rep_prefix_4_byte:
15386 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15387 SImode);
15388 break;
15389 case rep_prefix_1_byte:
15390 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15391 QImode);
15392 break;
15394 /* Adjust properly the offset of src and dest memory for aliasing. */
15395 if (CONST_INT_P (count_exp))
15397 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15398 (count / size_needed) * size_needed);
15399 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15400 (count / size_needed) * size_needed);
15402 else
15404 src = change_address (src, BLKmode, srcreg);
15405 dst = change_address (dst, BLKmode, destreg);
15408 /* Step 4: Epilogue to copy the remaining bytes. */
15410 if (label)
15412 /* When the main loop is done, COUNT_EXP might hold original count,
15413 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15414 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15415 bytes. Compensate if needed. */
15417 if (size_needed < epilogue_size_needed)
15419 tmp =
15420 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15421 GEN_INT (size_needed - 1), count_exp, 1,
15422 OPTAB_DIRECT);
15423 if (tmp != count_exp)
15424 emit_move_insn (count_exp, tmp);
15426 emit_label (label);
15427 LABEL_NUSES (label) = 1;
15430 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15431 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15432 epilogue_size_needed);
15433 if (jump_around_label)
15434 emit_label (jump_around_label);
15435 return 1;
15438 /* Helper function for memcpy. For QImode value 0xXY produce
15439 0xXYXYXYXY of wide specified by MODE. This is essentially
15440 a * 0x10101010, but we can do slightly better than
15441 synth_mult by unwinding the sequence by hand on CPUs with
15442 slow multiply. */
15443 static rtx
15444 promote_duplicated_reg (enum machine_mode mode, rtx val)
15446 enum machine_mode valmode = GET_MODE (val);
15447 rtx tmp;
15448 int nops = mode == DImode ? 3 : 2;
15450 gcc_assert (mode == SImode || mode == DImode);
15451 if (val == const0_rtx)
15452 return copy_to_mode_reg (mode, const0_rtx);
15453 if (CONST_INT_P (val))
15455 HOST_WIDE_INT v = INTVAL (val) & 255;
15457 v |= v << 8;
15458 v |= v << 16;
15459 if (mode == DImode)
15460 v |= (v << 16) << 16;
15461 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15464 if (valmode == VOIDmode)
15465 valmode = QImode;
15466 if (valmode != QImode)
15467 val = gen_lowpart (QImode, val);
15468 if (mode == QImode)
15469 return val;
15470 if (!TARGET_PARTIAL_REG_STALL)
15471 nops--;
15472 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15473 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15474 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15475 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15477 rtx reg = convert_modes (mode, QImode, val, true);
15478 tmp = promote_duplicated_reg (mode, const1_rtx);
15479 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15480 OPTAB_DIRECT);
15482 else
15484 rtx reg = convert_modes (mode, QImode, val, true);
15486 if (!TARGET_PARTIAL_REG_STALL)
15487 if (mode == SImode)
15488 emit_insn (gen_movsi_insv_1 (reg, reg));
15489 else
15490 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15491 else
15493 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15494 NULL, 1, OPTAB_DIRECT);
15495 reg =
15496 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15498 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15499 NULL, 1, OPTAB_DIRECT);
15500 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15501 if (mode == SImode)
15502 return reg;
15503 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15504 NULL, 1, OPTAB_DIRECT);
15505 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15506 return reg;
15510 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15511 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15512 alignment from ALIGN to DESIRED_ALIGN. */
15513 static rtx
15514 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15516 rtx promoted_val;
15518 if (TARGET_64BIT
15519 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15520 promoted_val = promote_duplicated_reg (DImode, val);
15521 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15522 promoted_val = promote_duplicated_reg (SImode, val);
15523 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15524 promoted_val = promote_duplicated_reg (HImode, val);
15525 else
15526 promoted_val = val;
15528 return promoted_val;
15531 /* Expand string clear operation (bzero). Use i386 string operations when
15532 profitable. See expand_movmem comment for explanation of individual
15533 steps performed. */
15535 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15536 rtx expected_align_exp, rtx expected_size_exp)
15538 rtx destreg;
15539 rtx label = NULL;
15540 rtx tmp;
15541 rtx jump_around_label = NULL;
15542 HOST_WIDE_INT align = 1;
15543 unsigned HOST_WIDE_INT count = 0;
15544 HOST_WIDE_INT expected_size = -1;
15545 int size_needed = 0, epilogue_size_needed;
15546 int desired_align = 0;
15547 enum stringop_alg alg;
15548 rtx promoted_val = NULL;
15549 bool force_loopy_epilogue = false;
15550 int dynamic_check;
15552 if (CONST_INT_P (align_exp))
15553 align = INTVAL (align_exp);
15554 /* i386 can do misaligned access on reasonably increased cost. */
15555 if (CONST_INT_P (expected_align_exp)
15556 && INTVAL (expected_align_exp) > align)
15557 align = INTVAL (expected_align_exp);
15558 if (CONST_INT_P (count_exp))
15559 count = expected_size = INTVAL (count_exp);
15560 if (CONST_INT_P (expected_size_exp) && count == 0)
15561 expected_size = INTVAL (expected_size_exp);
15563 /* Step 0: Decide on preferred algorithm, desired alignment and
15564 size of chunks to be copied by main loop. */
15566 alg = decide_alg (count, expected_size, true, &dynamic_check);
15567 desired_align = decide_alignment (align, alg, expected_size);
15569 if (!TARGET_ALIGN_STRINGOPS)
15570 align = desired_align;
15572 if (alg == libcall)
15573 return 0;
15574 gcc_assert (alg != no_stringop);
15575 if (!count)
15576 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15577 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15578 switch (alg)
15580 case libcall:
15581 case no_stringop:
15582 gcc_unreachable ();
15583 case loop:
15584 size_needed = GET_MODE_SIZE (Pmode);
15585 break;
15586 case unrolled_loop:
15587 size_needed = GET_MODE_SIZE (Pmode) * 4;
15588 break;
15589 case rep_prefix_8_byte:
15590 size_needed = 8;
15591 break;
15592 case rep_prefix_4_byte:
15593 size_needed = 4;
15594 break;
15595 case rep_prefix_1_byte:
15596 case loop_1_byte:
15597 size_needed = 1;
15598 break;
15600 epilogue_size_needed = size_needed;
15602 /* Step 1: Prologue guard. */
15604 /* Alignment code needs count to be in register. */
15605 if (CONST_INT_P (count_exp) && desired_align > align)
15607 enum machine_mode mode = SImode;
15608 if (TARGET_64BIT && (count & ~0xffffffff))
15609 mode = DImode;
15610 count_exp = force_reg (mode, count_exp);
15612 /* Do the cheap promotion to allow better CSE across the
15613 main loop and epilogue (ie one load of the big constant in the
15614 front of all code. */
15615 if (CONST_INT_P (val_exp))
15616 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15617 desired_align, align);
15618 /* Ensure that alignment prologue won't copy past end of block. */
15619 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15621 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15622 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15623 Make sure it is power of 2. */
15624 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15626 /* To improve performance of small blocks, we jump around the VAL
15627 promoting mode. This mean that if the promoted VAL is not constant,
15628 we might not use it in the epilogue and have to use byte
15629 loop variant. */
15630 if (epilogue_size_needed > 2 && !promoted_val)
15631 force_loopy_epilogue = true;
15632 label = gen_label_rtx ();
15633 emit_cmp_and_jump_insns (count_exp,
15634 GEN_INT (epilogue_size_needed),
15635 LTU, 0, counter_mode (count_exp), 1, label);
15636 if (GET_CODE (count_exp) == CONST_INT)
15638 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15639 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15640 else
15641 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15643 if (dynamic_check != -1)
15645 rtx hot_label = gen_label_rtx ();
15646 jump_around_label = gen_label_rtx ();
15647 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15648 LEU, 0, counter_mode (count_exp), 1, hot_label);
15649 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15650 set_storage_via_libcall (dst, count_exp, val_exp, false);
15651 emit_jump (jump_around_label);
15652 emit_label (hot_label);
15655 /* Step 2: Alignment prologue. */
15657 /* Do the expensive promotion once we branched off the small blocks. */
15658 if (!promoted_val)
15659 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15660 desired_align, align);
15661 gcc_assert (desired_align >= 1 && align >= 1);
15663 if (desired_align > align)
15665 /* Except for the first move in epilogue, we no longer know
15666 constant offset in aliasing info. It don't seems to worth
15667 the pain to maintain it for the first move, so throw away
15668 the info early. */
15669 dst = change_address (dst, BLKmode, destreg);
15670 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15671 desired_align);
15673 if (label && size_needed == 1)
15675 emit_label (label);
15676 LABEL_NUSES (label) = 1;
15677 label = NULL;
15680 /* Step 3: Main loop. */
15682 switch (alg)
15684 case libcall:
15685 case no_stringop:
15686 gcc_unreachable ();
15687 case loop_1_byte:
15688 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15689 count_exp, QImode, 1, expected_size);
15690 break;
15691 case loop:
15692 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15693 count_exp, Pmode, 1, expected_size);
15694 break;
15695 case unrolled_loop:
15696 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15697 count_exp, Pmode, 4, expected_size);
15698 break;
15699 case rep_prefix_8_byte:
15700 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15701 DImode);
15702 break;
15703 case rep_prefix_4_byte:
15704 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15705 SImode);
15706 break;
15707 case rep_prefix_1_byte:
15708 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15709 QImode);
15710 break;
15712 /* Adjust properly the offset of src and dest memory for aliasing. */
15713 if (CONST_INT_P (count_exp))
15714 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15715 (count / size_needed) * size_needed);
15716 else
15717 dst = change_address (dst, BLKmode, destreg);
15719 /* Step 4: Epilogue to copy the remaining bytes. */
15721 if (label)
15723 /* When the main loop is done, COUNT_EXP might hold original count,
15724 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15725 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15726 bytes. Compensate if needed. */
15728 if (size_needed < desired_align - align)
15730 tmp =
15731 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15732 GEN_INT (size_needed - 1), count_exp, 1,
15733 OPTAB_DIRECT);
15734 size_needed = desired_align - align + 1;
15735 if (tmp != count_exp)
15736 emit_move_insn (count_exp, tmp);
15738 emit_label (label);
15739 LABEL_NUSES (label) = 1;
15741 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15743 if (force_loopy_epilogue)
15744 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15745 size_needed);
15746 else
15747 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15748 size_needed);
15750 if (jump_around_label)
15751 emit_label (jump_around_label);
15752 return 1;
15755 /* Expand the appropriate insns for doing strlen if not just doing
15756 repnz; scasb
15758 out = result, initialized with the start address
15759 align_rtx = alignment of the address.
15760 scratch = scratch register, initialized with the startaddress when
15761 not aligned, otherwise undefined
15763 This is just the body. It needs the initializations mentioned above and
15764 some address computing at the end. These things are done in i386.md. */
15766 static void
15767 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15769 int align;
15770 rtx tmp;
15771 rtx align_2_label = NULL_RTX;
15772 rtx align_3_label = NULL_RTX;
15773 rtx align_4_label = gen_label_rtx ();
15774 rtx end_0_label = gen_label_rtx ();
15775 rtx mem;
15776 rtx tmpreg = gen_reg_rtx (SImode);
15777 rtx scratch = gen_reg_rtx (SImode);
15778 rtx cmp;
15780 align = 0;
15781 if (CONST_INT_P (align_rtx))
15782 align = INTVAL (align_rtx);
15784 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15786 /* Is there a known alignment and is it less than 4? */
15787 if (align < 4)
15789 rtx scratch1 = gen_reg_rtx (Pmode);
15790 emit_move_insn (scratch1, out);
15791 /* Is there a known alignment and is it not 2? */
15792 if (align != 2)
15794 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15795 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15797 /* Leave just the 3 lower bits. */
15798 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15799 NULL_RTX, 0, OPTAB_WIDEN);
15801 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15802 Pmode, 1, align_4_label);
15803 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15804 Pmode, 1, align_2_label);
15805 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15806 Pmode, 1, align_3_label);
15808 else
15810 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15811 check if is aligned to 4 - byte. */
15813 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15814 NULL_RTX, 0, OPTAB_WIDEN);
15816 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15817 Pmode, 1, align_4_label);
15820 mem = change_address (src, QImode, out);
15822 /* Now compare the bytes. */
15824 /* Compare the first n unaligned byte on a byte per byte basis. */
15825 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15826 QImode, 1, end_0_label);
15828 /* Increment the address. */
15829 if (TARGET_64BIT)
15830 emit_insn (gen_adddi3 (out, out, const1_rtx));
15831 else
15832 emit_insn (gen_addsi3 (out, out, const1_rtx));
15834 /* Not needed with an alignment of 2 */
15835 if (align != 2)
15837 emit_label (align_2_label);
15839 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15840 end_0_label);
15842 if (TARGET_64BIT)
15843 emit_insn (gen_adddi3 (out, out, const1_rtx));
15844 else
15845 emit_insn (gen_addsi3 (out, out, const1_rtx));
15847 emit_label (align_3_label);
15850 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15851 end_0_label);
15853 if (TARGET_64BIT)
15854 emit_insn (gen_adddi3 (out, out, const1_rtx));
15855 else
15856 emit_insn (gen_addsi3 (out, out, const1_rtx));
15859 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15860 align this loop. It gives only huge programs, but does not help to
15861 speed up. */
15862 emit_label (align_4_label);
15864 mem = change_address (src, SImode, out);
15865 emit_move_insn (scratch, mem);
15866 if (TARGET_64BIT)
15867 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15868 else
15869 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15871 /* This formula yields a nonzero result iff one of the bytes is zero.
15872 This saves three branches inside loop and many cycles. */
15874 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15875 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15876 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15877 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15878 gen_int_mode (0x80808080, SImode)));
15879 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15880 align_4_label);
15882 if (TARGET_CMOVE)
15884 rtx reg = gen_reg_rtx (SImode);
15885 rtx reg2 = gen_reg_rtx (Pmode);
15886 emit_move_insn (reg, tmpreg);
15887 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15889 /* If zero is not in the first two bytes, move two bytes forward. */
15890 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15891 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15892 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15893 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15894 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15895 reg,
15896 tmpreg)));
15897 /* Emit lea manually to avoid clobbering of flags. */
15898 emit_insn (gen_rtx_SET (SImode, reg2,
15899 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15901 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15902 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15903 emit_insn (gen_rtx_SET (VOIDmode, out,
15904 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15905 reg2,
15906 out)));
15909 else
15911 rtx end_2_label = gen_label_rtx ();
15912 /* Is zero in the first two bytes? */
15914 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15915 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15916 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15917 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15918 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15919 pc_rtx);
15920 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15921 JUMP_LABEL (tmp) = end_2_label;
15923 /* Not in the first two. Move two bytes forward. */
15924 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15925 if (TARGET_64BIT)
15926 emit_insn (gen_adddi3 (out, out, const2_rtx));
15927 else
15928 emit_insn (gen_addsi3 (out, out, const2_rtx));
15930 emit_label (end_2_label);
15934 /* Avoid branch in fixing the byte. */
15935 tmpreg = gen_lowpart (QImode, tmpreg);
15936 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15937 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
15938 if (TARGET_64BIT)
15939 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15940 else
15941 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15943 emit_label (end_0_label);
15946 /* Expand strlen. */
15949 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15951 rtx addr, scratch1, scratch2, scratch3, scratch4;
15953 /* The generic case of strlen expander is long. Avoid it's
15954 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15956 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15957 && !TARGET_INLINE_ALL_STRINGOPS
15958 && !optimize_size
15959 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15960 return 0;
15962 addr = force_reg (Pmode, XEXP (src, 0));
15963 scratch1 = gen_reg_rtx (Pmode);
15965 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15966 && !optimize_size)
15968 /* Well it seems that some optimizer does not combine a call like
15969 foo(strlen(bar), strlen(bar));
15970 when the move and the subtraction is done here. It does calculate
15971 the length just once when these instructions are done inside of
15972 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15973 often used and I use one fewer register for the lifetime of
15974 output_strlen_unroll() this is better. */
15976 emit_move_insn (out, addr);
15978 ix86_expand_strlensi_unroll_1 (out, src, align);
15980 /* strlensi_unroll_1 returns the address of the zero at the end of
15981 the string, like memchr(), so compute the length by subtracting
15982 the start address. */
15983 if (TARGET_64BIT)
15984 emit_insn (gen_subdi3 (out, out, addr));
15985 else
15986 emit_insn (gen_subsi3 (out, out, addr));
15988 else
15990 rtx unspec;
15991 scratch2 = gen_reg_rtx (Pmode);
15992 scratch3 = gen_reg_rtx (Pmode);
15993 scratch4 = force_reg (Pmode, constm1_rtx);
15995 emit_move_insn (scratch3, addr);
15996 eoschar = force_reg (QImode, eoschar);
15998 src = replace_equiv_address_nv (src, scratch3);
16000 /* If .md starts supporting :P, this can be done in .md. */
16001 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16002 scratch4), UNSPEC_SCAS);
16003 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16004 if (TARGET_64BIT)
16006 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16007 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16009 else
16011 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16012 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16015 return 1;
16018 /* For given symbol (function) construct code to compute address of it's PLT
16019 entry in large x86-64 PIC model. */
16021 construct_plt_address (rtx symbol)
16023 rtx tmp = gen_reg_rtx (Pmode);
16024 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16026 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16027 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16029 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16030 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16031 return tmp;
16034 void
16035 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16036 rtx callarg2 ATTRIBUTE_UNUSED,
16037 rtx pop, int sibcall)
16039 rtx use = NULL, call;
16041 if (pop == const0_rtx)
16042 pop = NULL;
16043 gcc_assert (!TARGET_64BIT || !pop);
16045 if (TARGET_MACHO && !TARGET_64BIT)
16047 #if TARGET_MACHO
16048 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16049 fnaddr = machopic_indirect_call_target (fnaddr);
16050 #endif
16052 else
16054 /* Static functions and indirect calls don't need the pic register. */
16055 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16056 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16057 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16058 use_reg (&use, pic_offset_table_rtx);
16061 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16063 rtx al = gen_rtx_REG (QImode, 0);
16064 emit_move_insn (al, callarg2);
16065 use_reg (&use, al);
16068 if (ix86_cmodel == CM_LARGE_PIC
16069 && GET_CODE (fnaddr) == MEM
16070 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16071 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16072 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16073 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16075 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16076 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16078 if (sibcall && TARGET_64BIT
16079 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16081 rtx addr;
16082 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16083 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16084 emit_move_insn (fnaddr, addr);
16085 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16088 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16089 if (retval)
16090 call = gen_rtx_SET (VOIDmode, retval, call);
16091 if (pop)
16093 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16094 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16095 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16098 call = emit_call_insn (call);
16099 if (use)
16100 CALL_INSN_FUNCTION_USAGE (call) = use;
16104 /* Clear stack slot assignments remembered from previous functions.
16105 This is called from INIT_EXPANDERS once before RTL is emitted for each
16106 function. */
16108 static struct machine_function *
16109 ix86_init_machine_status (void)
16111 struct machine_function *f;
16113 f = GGC_CNEW (struct machine_function);
16114 f->use_fast_prologue_epilogue_nregs = -1;
16115 f->tls_descriptor_call_expanded_p = 0;
16117 return f;
16120 /* Return a MEM corresponding to a stack slot with mode MODE.
16121 Allocate a new slot if necessary.
16123 The RTL for a function can have several slots available: N is
16124 which slot to use. */
16127 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16129 struct stack_local_entry *s;
16131 gcc_assert (n < MAX_386_STACK_LOCALS);
16133 /* Virtual slot is valid only before vregs are instantiated. */
16134 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16136 for (s = ix86_stack_locals; s; s = s->next)
16137 if (s->mode == mode && s->n == n)
16138 return copy_rtx (s->rtl);
16140 s = (struct stack_local_entry *)
16141 ggc_alloc (sizeof (struct stack_local_entry));
16142 s->n = n;
16143 s->mode = mode;
16144 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16146 s->next = ix86_stack_locals;
16147 ix86_stack_locals = s;
16148 return s->rtl;
16151 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16153 static GTY(()) rtx ix86_tls_symbol;
16155 ix86_tls_get_addr (void)
16158 if (!ix86_tls_symbol)
16160 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16161 (TARGET_ANY_GNU_TLS
16162 && !TARGET_64BIT)
16163 ? "___tls_get_addr"
16164 : "__tls_get_addr");
16167 return ix86_tls_symbol;
16170 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16172 static GTY(()) rtx ix86_tls_module_base_symbol;
16174 ix86_tls_module_base (void)
16177 if (!ix86_tls_module_base_symbol)
16179 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16180 "_TLS_MODULE_BASE_");
16181 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16182 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16185 return ix86_tls_module_base_symbol;
16188 /* Calculate the length of the memory address in the instruction
16189 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16192 memory_address_length (rtx addr)
16194 struct ix86_address parts;
16195 rtx base, index, disp;
16196 int len;
16197 int ok;
16199 if (GET_CODE (addr) == PRE_DEC
16200 || GET_CODE (addr) == POST_INC
16201 || GET_CODE (addr) == PRE_MODIFY
16202 || GET_CODE (addr) == POST_MODIFY)
16203 return 0;
16205 ok = ix86_decompose_address (addr, &parts);
16206 gcc_assert (ok);
16208 if (parts.base && GET_CODE (parts.base) == SUBREG)
16209 parts.base = SUBREG_REG (parts.base);
16210 if (parts.index && GET_CODE (parts.index) == SUBREG)
16211 parts.index = SUBREG_REG (parts.index);
16213 base = parts.base;
16214 index = parts.index;
16215 disp = parts.disp;
16216 len = 0;
16218 /* Rule of thumb:
16219 - esp as the base always wants an index,
16220 - ebp as the base always wants a displacement. */
16222 /* Register Indirect. */
16223 if (base && !index && !disp)
16225 /* esp (for its index) and ebp (for its displacement) need
16226 the two-byte modrm form. */
16227 if (addr == stack_pointer_rtx
16228 || addr == arg_pointer_rtx
16229 || addr == frame_pointer_rtx
16230 || addr == hard_frame_pointer_rtx)
16231 len = 1;
16234 /* Direct Addressing. */
16235 else if (disp && !base && !index)
16236 len = 4;
16238 else
16240 /* Find the length of the displacement constant. */
16241 if (disp)
16243 if (base && satisfies_constraint_K (disp))
16244 len = 1;
16245 else
16246 len = 4;
16248 /* ebp always wants a displacement. */
16249 else if (base == hard_frame_pointer_rtx)
16250 len = 1;
16252 /* An index requires the two-byte modrm form.... */
16253 if (index
16254 /* ...like esp, which always wants an index. */
16255 || base == stack_pointer_rtx
16256 || base == arg_pointer_rtx
16257 || base == frame_pointer_rtx)
16258 len += 1;
16261 return len;
16264 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16265 is set, expect that insn have 8bit immediate alternative. */
16267 ix86_attr_length_immediate_default (rtx insn, int shortform)
16269 int len = 0;
16270 int i;
16271 extract_insn_cached (insn);
16272 for (i = recog_data.n_operands - 1; i >= 0; --i)
16273 if (CONSTANT_P (recog_data.operand[i]))
16275 gcc_assert (!len);
16276 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16277 len = 1;
16278 else
16280 switch (get_attr_mode (insn))
16282 case MODE_QI:
16283 len+=1;
16284 break;
16285 case MODE_HI:
16286 len+=2;
16287 break;
16288 case MODE_SI:
16289 len+=4;
16290 break;
16291 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16292 case MODE_DI:
16293 len+=4;
16294 break;
16295 default:
16296 fatal_insn ("unknown insn mode", insn);
16300 return len;
16302 /* Compute default value for "length_address" attribute. */
16304 ix86_attr_length_address_default (rtx insn)
16306 int i;
16308 if (get_attr_type (insn) == TYPE_LEA)
16310 rtx set = PATTERN (insn);
16312 if (GET_CODE (set) == PARALLEL)
16313 set = XVECEXP (set, 0, 0);
16315 gcc_assert (GET_CODE (set) == SET);
16317 return memory_address_length (SET_SRC (set));
16320 extract_insn_cached (insn);
16321 for (i = recog_data.n_operands - 1; i >= 0; --i)
16322 if (MEM_P (recog_data.operand[i]))
16324 return memory_address_length (XEXP (recog_data.operand[i], 0));
16325 break;
16327 return 0;
16330 /* Return the maximum number of instructions a cpu can issue. */
16332 static int
16333 ix86_issue_rate (void)
16335 switch (ix86_tune)
16337 case PROCESSOR_PENTIUM:
16338 case PROCESSOR_K6:
16339 return 2;
16341 case PROCESSOR_PENTIUMPRO:
16342 case PROCESSOR_PENTIUM4:
16343 case PROCESSOR_ATHLON:
16344 case PROCESSOR_K8:
16345 case PROCESSOR_AMDFAM10:
16346 case PROCESSOR_NOCONA:
16347 case PROCESSOR_GENERIC32:
16348 case PROCESSOR_GENERIC64:
16349 return 3;
16351 case PROCESSOR_CORE2:
16352 return 4;
16354 default:
16355 return 1;
16359 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16360 by DEP_INSN and nothing set by DEP_INSN. */
16362 static int
16363 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16365 rtx set, set2;
16367 /* Simplify the test for uninteresting insns. */
16368 if (insn_type != TYPE_SETCC
16369 && insn_type != TYPE_ICMOV
16370 && insn_type != TYPE_FCMOV
16371 && insn_type != TYPE_IBR)
16372 return 0;
16374 if ((set = single_set (dep_insn)) != 0)
16376 set = SET_DEST (set);
16377 set2 = NULL_RTX;
16379 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16380 && XVECLEN (PATTERN (dep_insn), 0) == 2
16381 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16382 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16384 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16385 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16387 else
16388 return 0;
16390 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16391 return 0;
16393 /* This test is true if the dependent insn reads the flags but
16394 not any other potentially set register. */
16395 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16396 return 0;
16398 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16399 return 0;
16401 return 1;
16404 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16405 address with operands set by DEP_INSN. */
16407 static int
16408 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16410 rtx addr;
16412 if (insn_type == TYPE_LEA
16413 && TARGET_PENTIUM)
16415 addr = PATTERN (insn);
16417 if (GET_CODE (addr) == PARALLEL)
16418 addr = XVECEXP (addr, 0, 0);
16420 gcc_assert (GET_CODE (addr) == SET);
16422 addr = SET_SRC (addr);
16424 else
16426 int i;
16427 extract_insn_cached (insn);
16428 for (i = recog_data.n_operands - 1; i >= 0; --i)
16429 if (MEM_P (recog_data.operand[i]))
16431 addr = XEXP (recog_data.operand[i], 0);
16432 goto found;
16434 return 0;
16435 found:;
16438 return modified_in_p (addr, dep_insn);
16441 static int
16442 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16444 enum attr_type insn_type, dep_insn_type;
16445 enum attr_memory memory;
16446 rtx set, set2;
16447 int dep_insn_code_number;
16449 /* Anti and output dependencies have zero cost on all CPUs. */
16450 if (REG_NOTE_KIND (link) != 0)
16451 return 0;
16453 dep_insn_code_number = recog_memoized (dep_insn);
16455 /* If we can't recognize the insns, we can't really do anything. */
16456 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16457 return cost;
16459 insn_type = get_attr_type (insn);
16460 dep_insn_type = get_attr_type (dep_insn);
16462 switch (ix86_tune)
16464 case PROCESSOR_PENTIUM:
16465 /* Address Generation Interlock adds a cycle of latency. */
16466 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16467 cost += 1;
16469 /* ??? Compares pair with jump/setcc. */
16470 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16471 cost = 0;
16473 /* Floating point stores require value to be ready one cycle earlier. */
16474 if (insn_type == TYPE_FMOV
16475 && get_attr_memory (insn) == MEMORY_STORE
16476 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16477 cost += 1;
16478 break;
16480 case PROCESSOR_PENTIUMPRO:
16481 memory = get_attr_memory (insn);
16483 /* INT->FP conversion is expensive. */
16484 if (get_attr_fp_int_src (dep_insn))
16485 cost += 5;
16487 /* There is one cycle extra latency between an FP op and a store. */
16488 if (insn_type == TYPE_FMOV
16489 && (set = single_set (dep_insn)) != NULL_RTX
16490 && (set2 = single_set (insn)) != NULL_RTX
16491 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16492 && MEM_P (SET_DEST (set2)))
16493 cost += 1;
16495 /* Show ability of reorder buffer to hide latency of load by executing
16496 in parallel with previous instruction in case
16497 previous instruction is not needed to compute the address. */
16498 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16499 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16501 /* Claim moves to take one cycle, as core can issue one load
16502 at time and the next load can start cycle later. */
16503 if (dep_insn_type == TYPE_IMOV
16504 || dep_insn_type == TYPE_FMOV)
16505 cost = 1;
16506 else if (cost > 1)
16507 cost--;
16509 break;
16511 case PROCESSOR_K6:
16512 memory = get_attr_memory (insn);
16514 /* The esp dependency is resolved before the instruction is really
16515 finished. */
16516 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16517 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16518 return 1;
16520 /* INT->FP conversion is expensive. */
16521 if (get_attr_fp_int_src (dep_insn))
16522 cost += 5;
16524 /* Show ability of reorder buffer to hide latency of load by executing
16525 in parallel with previous instruction in case
16526 previous instruction is not needed to compute the address. */
16527 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16528 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16530 /* Claim moves to take one cycle, as core can issue one load
16531 at time and the next load can start cycle later. */
16532 if (dep_insn_type == TYPE_IMOV
16533 || dep_insn_type == TYPE_FMOV)
16534 cost = 1;
16535 else if (cost > 2)
16536 cost -= 2;
16537 else
16538 cost = 1;
16540 break;
16542 case PROCESSOR_ATHLON:
16543 case PROCESSOR_K8:
16544 case PROCESSOR_AMDFAM10:
16545 case PROCESSOR_GENERIC32:
16546 case PROCESSOR_GENERIC64:
16547 memory = get_attr_memory (insn);
16549 /* Show ability of reorder buffer to hide latency of load by executing
16550 in parallel with previous instruction in case
16551 previous instruction is not needed to compute the address. */
16552 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16553 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16555 enum attr_unit unit = get_attr_unit (insn);
16556 int loadcost = 3;
16558 /* Because of the difference between the length of integer and
16559 floating unit pipeline preparation stages, the memory operands
16560 for floating point are cheaper.
16562 ??? For Athlon it the difference is most probably 2. */
16563 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16564 loadcost = 3;
16565 else
16566 loadcost = TARGET_ATHLON ? 2 : 0;
16568 if (cost >= loadcost)
16569 cost -= loadcost;
16570 else
16571 cost = 0;
16574 default:
16575 break;
16578 return cost;
16581 /* How many alternative schedules to try. This should be as wide as the
16582 scheduling freedom in the DFA, but no wider. Making this value too
16583 large results extra work for the scheduler. */
16585 static int
16586 ia32_multipass_dfa_lookahead (void)
16588 if (ix86_tune == PROCESSOR_PENTIUM)
16589 return 2;
16591 if (ix86_tune == PROCESSOR_PENTIUMPRO
16592 || ix86_tune == PROCESSOR_K6)
16593 return 1;
16595 else
16596 return 0;
16600 /* Compute the alignment given to a constant that is being placed in memory.
16601 EXP is the constant and ALIGN is the alignment that the object would
16602 ordinarily have.
16603 The value of this function is used instead of that alignment to align
16604 the object. */
16607 ix86_constant_alignment (tree exp, int align)
16609 if (TREE_CODE (exp) == REAL_CST)
16611 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16612 return 64;
16613 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16614 return 128;
16616 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16617 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16618 return BITS_PER_WORD;
16620 return align;
16623 /* Compute the alignment for a static variable.
16624 TYPE is the data type, and ALIGN is the alignment that
16625 the object would ordinarily have. The value of this function is used
16626 instead of that alignment to align the object. */
16629 ix86_data_alignment (tree type, int align)
16631 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16633 if (AGGREGATE_TYPE_P (type)
16634 && TYPE_SIZE (type)
16635 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16636 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16637 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16638 && align < max_align)
16639 align = max_align;
16641 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16642 to 16byte boundary. */
16643 if (TARGET_64BIT)
16645 if (AGGREGATE_TYPE_P (type)
16646 && TYPE_SIZE (type)
16647 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16648 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16649 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16650 return 128;
16653 if (TREE_CODE (type) == ARRAY_TYPE)
16655 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16656 return 64;
16657 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16658 return 128;
16660 else if (TREE_CODE (type) == COMPLEX_TYPE)
16663 if (TYPE_MODE (type) == DCmode && align < 64)
16664 return 64;
16665 if (TYPE_MODE (type) == XCmode && align < 128)
16666 return 128;
16668 else if ((TREE_CODE (type) == RECORD_TYPE
16669 || TREE_CODE (type) == UNION_TYPE
16670 || TREE_CODE (type) == QUAL_UNION_TYPE)
16671 && TYPE_FIELDS (type))
16673 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16674 return 64;
16675 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16676 return 128;
16678 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16679 || TREE_CODE (type) == INTEGER_TYPE)
16681 if (TYPE_MODE (type) == DFmode && align < 64)
16682 return 64;
16683 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16684 return 128;
16687 return align;
16690 /* Compute the alignment for a local variable.
16691 TYPE is the data type, and ALIGN is the alignment that
16692 the object would ordinarily have. The value of this macro is used
16693 instead of that alignment to align the object. */
16696 ix86_local_alignment (tree type, int align)
16698 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16699 to 16byte boundary. */
16700 if (TARGET_64BIT)
16702 if (AGGREGATE_TYPE_P (type)
16703 && TYPE_SIZE (type)
16704 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16705 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16706 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16707 return 128;
16709 if (TREE_CODE (type) == ARRAY_TYPE)
16711 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16712 return 64;
16713 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16714 return 128;
16716 else if (TREE_CODE (type) == COMPLEX_TYPE)
16718 if (TYPE_MODE (type) == DCmode && align < 64)
16719 return 64;
16720 if (TYPE_MODE (type) == XCmode && align < 128)
16721 return 128;
16723 else if ((TREE_CODE (type) == RECORD_TYPE
16724 || TREE_CODE (type) == UNION_TYPE
16725 || TREE_CODE (type) == QUAL_UNION_TYPE)
16726 && TYPE_FIELDS (type))
16728 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16729 return 64;
16730 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16731 return 128;
16733 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16734 || TREE_CODE (type) == INTEGER_TYPE)
16737 if (TYPE_MODE (type) == DFmode && align < 64)
16738 return 64;
16739 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16740 return 128;
16742 return align;
16745 /* Emit RTL insns to initialize the variable parts of a trampoline.
16746 FNADDR is an RTX for the address of the function's pure code.
16747 CXT is an RTX for the static chain value for the function. */
16748 void
16749 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16751 if (!TARGET_64BIT)
16753 /* Compute offset from the end of the jmp to the target function. */
16754 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16755 plus_constant (tramp, 10),
16756 NULL_RTX, 1, OPTAB_DIRECT);
16757 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16758 gen_int_mode (0xb9, QImode));
16759 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16760 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16761 gen_int_mode (0xe9, QImode));
16762 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16764 else
16766 int offset = 0;
16767 /* Try to load address using shorter movl instead of movabs.
16768 We may want to support movq for kernel mode, but kernel does not use
16769 trampolines at the moment. */
16770 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16772 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16773 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16774 gen_int_mode (0xbb41, HImode));
16775 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16776 gen_lowpart (SImode, fnaddr));
16777 offset += 6;
16779 else
16781 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16782 gen_int_mode (0xbb49, HImode));
16783 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16784 fnaddr);
16785 offset += 10;
16787 /* Load static chain using movabs to r10. */
16788 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16789 gen_int_mode (0xba49, HImode));
16790 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16791 cxt);
16792 offset += 10;
16793 /* Jump to the r11 */
16794 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16795 gen_int_mode (0xff49, HImode));
16796 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16797 gen_int_mode (0xe3, QImode));
16798 offset += 3;
16799 gcc_assert (offset <= TRAMPOLINE_SIZE);
16802 #ifdef ENABLE_EXECUTE_STACK
16803 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16804 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16805 #endif
16808 /* Codes for all the SSE/MMX builtins. */
16809 enum ix86_builtins
16811 IX86_BUILTIN_ADDPS,
16812 IX86_BUILTIN_ADDSS,
16813 IX86_BUILTIN_DIVPS,
16814 IX86_BUILTIN_DIVSS,
16815 IX86_BUILTIN_MULPS,
16816 IX86_BUILTIN_MULSS,
16817 IX86_BUILTIN_SUBPS,
16818 IX86_BUILTIN_SUBSS,
16820 IX86_BUILTIN_CMPEQPS,
16821 IX86_BUILTIN_CMPLTPS,
16822 IX86_BUILTIN_CMPLEPS,
16823 IX86_BUILTIN_CMPGTPS,
16824 IX86_BUILTIN_CMPGEPS,
16825 IX86_BUILTIN_CMPNEQPS,
16826 IX86_BUILTIN_CMPNLTPS,
16827 IX86_BUILTIN_CMPNLEPS,
16828 IX86_BUILTIN_CMPNGTPS,
16829 IX86_BUILTIN_CMPNGEPS,
16830 IX86_BUILTIN_CMPORDPS,
16831 IX86_BUILTIN_CMPUNORDPS,
16832 IX86_BUILTIN_CMPEQSS,
16833 IX86_BUILTIN_CMPLTSS,
16834 IX86_BUILTIN_CMPLESS,
16835 IX86_BUILTIN_CMPNEQSS,
16836 IX86_BUILTIN_CMPNLTSS,
16837 IX86_BUILTIN_CMPNLESS,
16838 IX86_BUILTIN_CMPNGTSS,
16839 IX86_BUILTIN_CMPNGESS,
16840 IX86_BUILTIN_CMPORDSS,
16841 IX86_BUILTIN_CMPUNORDSS,
16843 IX86_BUILTIN_COMIEQSS,
16844 IX86_BUILTIN_COMILTSS,
16845 IX86_BUILTIN_COMILESS,
16846 IX86_BUILTIN_COMIGTSS,
16847 IX86_BUILTIN_COMIGESS,
16848 IX86_BUILTIN_COMINEQSS,
16849 IX86_BUILTIN_UCOMIEQSS,
16850 IX86_BUILTIN_UCOMILTSS,
16851 IX86_BUILTIN_UCOMILESS,
16852 IX86_BUILTIN_UCOMIGTSS,
16853 IX86_BUILTIN_UCOMIGESS,
16854 IX86_BUILTIN_UCOMINEQSS,
16856 IX86_BUILTIN_CVTPI2PS,
16857 IX86_BUILTIN_CVTPS2PI,
16858 IX86_BUILTIN_CVTSI2SS,
16859 IX86_BUILTIN_CVTSI642SS,
16860 IX86_BUILTIN_CVTSS2SI,
16861 IX86_BUILTIN_CVTSS2SI64,
16862 IX86_BUILTIN_CVTTPS2PI,
16863 IX86_BUILTIN_CVTTSS2SI,
16864 IX86_BUILTIN_CVTTSS2SI64,
16866 IX86_BUILTIN_MAXPS,
16867 IX86_BUILTIN_MAXSS,
16868 IX86_BUILTIN_MINPS,
16869 IX86_BUILTIN_MINSS,
16871 IX86_BUILTIN_LOADUPS,
16872 IX86_BUILTIN_STOREUPS,
16873 IX86_BUILTIN_MOVSS,
16875 IX86_BUILTIN_MOVHLPS,
16876 IX86_BUILTIN_MOVLHPS,
16877 IX86_BUILTIN_LOADHPS,
16878 IX86_BUILTIN_LOADLPS,
16879 IX86_BUILTIN_STOREHPS,
16880 IX86_BUILTIN_STORELPS,
16882 IX86_BUILTIN_MASKMOVQ,
16883 IX86_BUILTIN_MOVMSKPS,
16884 IX86_BUILTIN_PMOVMSKB,
16886 IX86_BUILTIN_MOVNTPS,
16887 IX86_BUILTIN_MOVNTQ,
16889 IX86_BUILTIN_LOADDQU,
16890 IX86_BUILTIN_STOREDQU,
16892 IX86_BUILTIN_PACKSSWB,
16893 IX86_BUILTIN_PACKSSDW,
16894 IX86_BUILTIN_PACKUSWB,
16896 IX86_BUILTIN_PADDB,
16897 IX86_BUILTIN_PADDW,
16898 IX86_BUILTIN_PADDD,
16899 IX86_BUILTIN_PADDQ,
16900 IX86_BUILTIN_PADDSB,
16901 IX86_BUILTIN_PADDSW,
16902 IX86_BUILTIN_PADDUSB,
16903 IX86_BUILTIN_PADDUSW,
16904 IX86_BUILTIN_PSUBB,
16905 IX86_BUILTIN_PSUBW,
16906 IX86_BUILTIN_PSUBD,
16907 IX86_BUILTIN_PSUBQ,
16908 IX86_BUILTIN_PSUBSB,
16909 IX86_BUILTIN_PSUBSW,
16910 IX86_BUILTIN_PSUBUSB,
16911 IX86_BUILTIN_PSUBUSW,
16913 IX86_BUILTIN_PAND,
16914 IX86_BUILTIN_PANDN,
16915 IX86_BUILTIN_POR,
16916 IX86_BUILTIN_PXOR,
16918 IX86_BUILTIN_PAVGB,
16919 IX86_BUILTIN_PAVGW,
16921 IX86_BUILTIN_PCMPEQB,
16922 IX86_BUILTIN_PCMPEQW,
16923 IX86_BUILTIN_PCMPEQD,
16924 IX86_BUILTIN_PCMPGTB,
16925 IX86_BUILTIN_PCMPGTW,
16926 IX86_BUILTIN_PCMPGTD,
16928 IX86_BUILTIN_PMADDWD,
16930 IX86_BUILTIN_PMAXSW,
16931 IX86_BUILTIN_PMAXUB,
16932 IX86_BUILTIN_PMINSW,
16933 IX86_BUILTIN_PMINUB,
16935 IX86_BUILTIN_PMULHUW,
16936 IX86_BUILTIN_PMULHW,
16937 IX86_BUILTIN_PMULLW,
16939 IX86_BUILTIN_PSADBW,
16940 IX86_BUILTIN_PSHUFW,
16942 IX86_BUILTIN_PSLLW,
16943 IX86_BUILTIN_PSLLD,
16944 IX86_BUILTIN_PSLLQ,
16945 IX86_BUILTIN_PSRAW,
16946 IX86_BUILTIN_PSRAD,
16947 IX86_BUILTIN_PSRLW,
16948 IX86_BUILTIN_PSRLD,
16949 IX86_BUILTIN_PSRLQ,
16950 IX86_BUILTIN_PSLLWI,
16951 IX86_BUILTIN_PSLLDI,
16952 IX86_BUILTIN_PSLLQI,
16953 IX86_BUILTIN_PSRAWI,
16954 IX86_BUILTIN_PSRADI,
16955 IX86_BUILTIN_PSRLWI,
16956 IX86_BUILTIN_PSRLDI,
16957 IX86_BUILTIN_PSRLQI,
16959 IX86_BUILTIN_PUNPCKHBW,
16960 IX86_BUILTIN_PUNPCKHWD,
16961 IX86_BUILTIN_PUNPCKHDQ,
16962 IX86_BUILTIN_PUNPCKLBW,
16963 IX86_BUILTIN_PUNPCKLWD,
16964 IX86_BUILTIN_PUNPCKLDQ,
16966 IX86_BUILTIN_SHUFPS,
16968 IX86_BUILTIN_RCPPS,
16969 IX86_BUILTIN_RCPSS,
16970 IX86_BUILTIN_RSQRTPS,
16971 IX86_BUILTIN_RSQRTSS,
16972 IX86_BUILTIN_RSQRTF,
16973 IX86_BUILTIN_SQRTPS,
16974 IX86_BUILTIN_SQRTSS,
16976 IX86_BUILTIN_UNPCKHPS,
16977 IX86_BUILTIN_UNPCKLPS,
16979 IX86_BUILTIN_ANDPS,
16980 IX86_BUILTIN_ANDNPS,
16981 IX86_BUILTIN_ORPS,
16982 IX86_BUILTIN_XORPS,
16984 IX86_BUILTIN_EMMS,
16985 IX86_BUILTIN_LDMXCSR,
16986 IX86_BUILTIN_STMXCSR,
16987 IX86_BUILTIN_SFENCE,
16989 /* 3DNow! Original */
16990 IX86_BUILTIN_FEMMS,
16991 IX86_BUILTIN_PAVGUSB,
16992 IX86_BUILTIN_PF2ID,
16993 IX86_BUILTIN_PFACC,
16994 IX86_BUILTIN_PFADD,
16995 IX86_BUILTIN_PFCMPEQ,
16996 IX86_BUILTIN_PFCMPGE,
16997 IX86_BUILTIN_PFCMPGT,
16998 IX86_BUILTIN_PFMAX,
16999 IX86_BUILTIN_PFMIN,
17000 IX86_BUILTIN_PFMUL,
17001 IX86_BUILTIN_PFRCP,
17002 IX86_BUILTIN_PFRCPIT1,
17003 IX86_BUILTIN_PFRCPIT2,
17004 IX86_BUILTIN_PFRSQIT1,
17005 IX86_BUILTIN_PFRSQRT,
17006 IX86_BUILTIN_PFSUB,
17007 IX86_BUILTIN_PFSUBR,
17008 IX86_BUILTIN_PI2FD,
17009 IX86_BUILTIN_PMULHRW,
17011 /* 3DNow! Athlon Extensions */
17012 IX86_BUILTIN_PF2IW,
17013 IX86_BUILTIN_PFNACC,
17014 IX86_BUILTIN_PFPNACC,
17015 IX86_BUILTIN_PI2FW,
17016 IX86_BUILTIN_PSWAPDSI,
17017 IX86_BUILTIN_PSWAPDSF,
17019 /* SSE2 */
17020 IX86_BUILTIN_ADDPD,
17021 IX86_BUILTIN_ADDSD,
17022 IX86_BUILTIN_DIVPD,
17023 IX86_BUILTIN_DIVSD,
17024 IX86_BUILTIN_MULPD,
17025 IX86_BUILTIN_MULSD,
17026 IX86_BUILTIN_SUBPD,
17027 IX86_BUILTIN_SUBSD,
17029 IX86_BUILTIN_CMPEQPD,
17030 IX86_BUILTIN_CMPLTPD,
17031 IX86_BUILTIN_CMPLEPD,
17032 IX86_BUILTIN_CMPGTPD,
17033 IX86_BUILTIN_CMPGEPD,
17034 IX86_BUILTIN_CMPNEQPD,
17035 IX86_BUILTIN_CMPNLTPD,
17036 IX86_BUILTIN_CMPNLEPD,
17037 IX86_BUILTIN_CMPNGTPD,
17038 IX86_BUILTIN_CMPNGEPD,
17039 IX86_BUILTIN_CMPORDPD,
17040 IX86_BUILTIN_CMPUNORDPD,
17041 IX86_BUILTIN_CMPEQSD,
17042 IX86_BUILTIN_CMPLTSD,
17043 IX86_BUILTIN_CMPLESD,
17044 IX86_BUILTIN_CMPNEQSD,
17045 IX86_BUILTIN_CMPNLTSD,
17046 IX86_BUILTIN_CMPNLESD,
17047 IX86_BUILTIN_CMPORDSD,
17048 IX86_BUILTIN_CMPUNORDSD,
17050 IX86_BUILTIN_COMIEQSD,
17051 IX86_BUILTIN_COMILTSD,
17052 IX86_BUILTIN_COMILESD,
17053 IX86_BUILTIN_COMIGTSD,
17054 IX86_BUILTIN_COMIGESD,
17055 IX86_BUILTIN_COMINEQSD,
17056 IX86_BUILTIN_UCOMIEQSD,
17057 IX86_BUILTIN_UCOMILTSD,
17058 IX86_BUILTIN_UCOMILESD,
17059 IX86_BUILTIN_UCOMIGTSD,
17060 IX86_BUILTIN_UCOMIGESD,
17061 IX86_BUILTIN_UCOMINEQSD,
17063 IX86_BUILTIN_MAXPD,
17064 IX86_BUILTIN_MAXSD,
17065 IX86_BUILTIN_MINPD,
17066 IX86_BUILTIN_MINSD,
17068 IX86_BUILTIN_ANDPD,
17069 IX86_BUILTIN_ANDNPD,
17070 IX86_BUILTIN_ORPD,
17071 IX86_BUILTIN_XORPD,
17073 IX86_BUILTIN_SQRTPD,
17074 IX86_BUILTIN_SQRTSD,
17076 IX86_BUILTIN_UNPCKHPD,
17077 IX86_BUILTIN_UNPCKLPD,
17079 IX86_BUILTIN_SHUFPD,
17081 IX86_BUILTIN_LOADUPD,
17082 IX86_BUILTIN_STOREUPD,
17083 IX86_BUILTIN_MOVSD,
17085 IX86_BUILTIN_LOADHPD,
17086 IX86_BUILTIN_LOADLPD,
17088 IX86_BUILTIN_CVTDQ2PD,
17089 IX86_BUILTIN_CVTDQ2PS,
17091 IX86_BUILTIN_CVTPD2DQ,
17092 IX86_BUILTIN_CVTPD2PI,
17093 IX86_BUILTIN_CVTPD2PS,
17094 IX86_BUILTIN_CVTTPD2DQ,
17095 IX86_BUILTIN_CVTTPD2PI,
17097 IX86_BUILTIN_CVTPI2PD,
17098 IX86_BUILTIN_CVTSI2SD,
17099 IX86_BUILTIN_CVTSI642SD,
17101 IX86_BUILTIN_CVTSD2SI,
17102 IX86_BUILTIN_CVTSD2SI64,
17103 IX86_BUILTIN_CVTSD2SS,
17104 IX86_BUILTIN_CVTSS2SD,
17105 IX86_BUILTIN_CVTTSD2SI,
17106 IX86_BUILTIN_CVTTSD2SI64,
17108 IX86_BUILTIN_CVTPS2DQ,
17109 IX86_BUILTIN_CVTPS2PD,
17110 IX86_BUILTIN_CVTTPS2DQ,
17112 IX86_BUILTIN_MOVNTI,
17113 IX86_BUILTIN_MOVNTPD,
17114 IX86_BUILTIN_MOVNTDQ,
17116 /* SSE2 MMX */
17117 IX86_BUILTIN_MASKMOVDQU,
17118 IX86_BUILTIN_MOVMSKPD,
17119 IX86_BUILTIN_PMOVMSKB128,
17121 IX86_BUILTIN_PACKSSWB128,
17122 IX86_BUILTIN_PACKSSDW128,
17123 IX86_BUILTIN_PACKUSWB128,
17125 IX86_BUILTIN_PADDB128,
17126 IX86_BUILTIN_PADDW128,
17127 IX86_BUILTIN_PADDD128,
17128 IX86_BUILTIN_PADDQ128,
17129 IX86_BUILTIN_PADDSB128,
17130 IX86_BUILTIN_PADDSW128,
17131 IX86_BUILTIN_PADDUSB128,
17132 IX86_BUILTIN_PADDUSW128,
17133 IX86_BUILTIN_PSUBB128,
17134 IX86_BUILTIN_PSUBW128,
17135 IX86_BUILTIN_PSUBD128,
17136 IX86_BUILTIN_PSUBQ128,
17137 IX86_BUILTIN_PSUBSB128,
17138 IX86_BUILTIN_PSUBSW128,
17139 IX86_BUILTIN_PSUBUSB128,
17140 IX86_BUILTIN_PSUBUSW128,
17142 IX86_BUILTIN_PAND128,
17143 IX86_BUILTIN_PANDN128,
17144 IX86_BUILTIN_POR128,
17145 IX86_BUILTIN_PXOR128,
17147 IX86_BUILTIN_PAVGB128,
17148 IX86_BUILTIN_PAVGW128,
17150 IX86_BUILTIN_PCMPEQB128,
17151 IX86_BUILTIN_PCMPEQW128,
17152 IX86_BUILTIN_PCMPEQD128,
17153 IX86_BUILTIN_PCMPGTB128,
17154 IX86_BUILTIN_PCMPGTW128,
17155 IX86_BUILTIN_PCMPGTD128,
17157 IX86_BUILTIN_PMADDWD128,
17159 IX86_BUILTIN_PMAXSW128,
17160 IX86_BUILTIN_PMAXUB128,
17161 IX86_BUILTIN_PMINSW128,
17162 IX86_BUILTIN_PMINUB128,
17164 IX86_BUILTIN_PMULUDQ,
17165 IX86_BUILTIN_PMULUDQ128,
17166 IX86_BUILTIN_PMULHUW128,
17167 IX86_BUILTIN_PMULHW128,
17168 IX86_BUILTIN_PMULLW128,
17170 IX86_BUILTIN_PSADBW128,
17171 IX86_BUILTIN_PSHUFHW,
17172 IX86_BUILTIN_PSHUFLW,
17173 IX86_BUILTIN_PSHUFD,
17175 IX86_BUILTIN_PSLLDQI128,
17176 IX86_BUILTIN_PSLLWI128,
17177 IX86_BUILTIN_PSLLDI128,
17178 IX86_BUILTIN_PSLLQI128,
17179 IX86_BUILTIN_PSRAWI128,
17180 IX86_BUILTIN_PSRADI128,
17181 IX86_BUILTIN_PSRLDQI128,
17182 IX86_BUILTIN_PSRLWI128,
17183 IX86_BUILTIN_PSRLDI128,
17184 IX86_BUILTIN_PSRLQI128,
17186 IX86_BUILTIN_PSLLDQ128,
17187 IX86_BUILTIN_PSLLW128,
17188 IX86_BUILTIN_PSLLD128,
17189 IX86_BUILTIN_PSLLQ128,
17190 IX86_BUILTIN_PSRAW128,
17191 IX86_BUILTIN_PSRAD128,
17192 IX86_BUILTIN_PSRLW128,
17193 IX86_BUILTIN_PSRLD128,
17194 IX86_BUILTIN_PSRLQ128,
17196 IX86_BUILTIN_PUNPCKHBW128,
17197 IX86_BUILTIN_PUNPCKHWD128,
17198 IX86_BUILTIN_PUNPCKHDQ128,
17199 IX86_BUILTIN_PUNPCKHQDQ128,
17200 IX86_BUILTIN_PUNPCKLBW128,
17201 IX86_BUILTIN_PUNPCKLWD128,
17202 IX86_BUILTIN_PUNPCKLDQ128,
17203 IX86_BUILTIN_PUNPCKLQDQ128,
17205 IX86_BUILTIN_CLFLUSH,
17206 IX86_BUILTIN_MFENCE,
17207 IX86_BUILTIN_LFENCE,
17209 /* Prescott New Instructions. */
17210 IX86_BUILTIN_ADDSUBPS,
17211 IX86_BUILTIN_HADDPS,
17212 IX86_BUILTIN_HSUBPS,
17213 IX86_BUILTIN_MOVSHDUP,
17214 IX86_BUILTIN_MOVSLDUP,
17215 IX86_BUILTIN_ADDSUBPD,
17216 IX86_BUILTIN_HADDPD,
17217 IX86_BUILTIN_HSUBPD,
17218 IX86_BUILTIN_LDDQU,
17220 IX86_BUILTIN_MONITOR,
17221 IX86_BUILTIN_MWAIT,
17223 /* SSSE3. */
17224 IX86_BUILTIN_PHADDW,
17225 IX86_BUILTIN_PHADDD,
17226 IX86_BUILTIN_PHADDSW,
17227 IX86_BUILTIN_PHSUBW,
17228 IX86_BUILTIN_PHSUBD,
17229 IX86_BUILTIN_PHSUBSW,
17230 IX86_BUILTIN_PMADDUBSW,
17231 IX86_BUILTIN_PMULHRSW,
17232 IX86_BUILTIN_PSHUFB,
17233 IX86_BUILTIN_PSIGNB,
17234 IX86_BUILTIN_PSIGNW,
17235 IX86_BUILTIN_PSIGND,
17236 IX86_BUILTIN_PALIGNR,
17237 IX86_BUILTIN_PABSB,
17238 IX86_BUILTIN_PABSW,
17239 IX86_BUILTIN_PABSD,
17241 IX86_BUILTIN_PHADDW128,
17242 IX86_BUILTIN_PHADDD128,
17243 IX86_BUILTIN_PHADDSW128,
17244 IX86_BUILTIN_PHSUBW128,
17245 IX86_BUILTIN_PHSUBD128,
17246 IX86_BUILTIN_PHSUBSW128,
17247 IX86_BUILTIN_PMADDUBSW128,
17248 IX86_BUILTIN_PMULHRSW128,
17249 IX86_BUILTIN_PSHUFB128,
17250 IX86_BUILTIN_PSIGNB128,
17251 IX86_BUILTIN_PSIGNW128,
17252 IX86_BUILTIN_PSIGND128,
17253 IX86_BUILTIN_PALIGNR128,
17254 IX86_BUILTIN_PABSB128,
17255 IX86_BUILTIN_PABSW128,
17256 IX86_BUILTIN_PABSD128,
17258 /* AMDFAM10 - SSE4A New Instructions. */
17259 IX86_BUILTIN_MOVNTSD,
17260 IX86_BUILTIN_MOVNTSS,
17261 IX86_BUILTIN_EXTRQI,
17262 IX86_BUILTIN_EXTRQ,
17263 IX86_BUILTIN_INSERTQI,
17264 IX86_BUILTIN_INSERTQ,
17266 /* SSE4.1. */
17267 IX86_BUILTIN_BLENDPD,
17268 IX86_BUILTIN_BLENDPS,
17269 IX86_BUILTIN_BLENDVPD,
17270 IX86_BUILTIN_BLENDVPS,
17271 IX86_BUILTIN_PBLENDVB128,
17272 IX86_BUILTIN_PBLENDW128,
17274 IX86_BUILTIN_DPPD,
17275 IX86_BUILTIN_DPPS,
17277 IX86_BUILTIN_INSERTPS128,
17279 IX86_BUILTIN_MOVNTDQA,
17280 IX86_BUILTIN_MPSADBW128,
17281 IX86_BUILTIN_PACKUSDW128,
17282 IX86_BUILTIN_PCMPEQQ,
17283 IX86_BUILTIN_PHMINPOSUW128,
17285 IX86_BUILTIN_PMAXSB128,
17286 IX86_BUILTIN_PMAXSD128,
17287 IX86_BUILTIN_PMAXUD128,
17288 IX86_BUILTIN_PMAXUW128,
17290 IX86_BUILTIN_PMINSB128,
17291 IX86_BUILTIN_PMINSD128,
17292 IX86_BUILTIN_PMINUD128,
17293 IX86_BUILTIN_PMINUW128,
17295 IX86_BUILTIN_PMOVSXBW128,
17296 IX86_BUILTIN_PMOVSXBD128,
17297 IX86_BUILTIN_PMOVSXBQ128,
17298 IX86_BUILTIN_PMOVSXWD128,
17299 IX86_BUILTIN_PMOVSXWQ128,
17300 IX86_BUILTIN_PMOVSXDQ128,
17302 IX86_BUILTIN_PMOVZXBW128,
17303 IX86_BUILTIN_PMOVZXBD128,
17304 IX86_BUILTIN_PMOVZXBQ128,
17305 IX86_BUILTIN_PMOVZXWD128,
17306 IX86_BUILTIN_PMOVZXWQ128,
17307 IX86_BUILTIN_PMOVZXDQ128,
17309 IX86_BUILTIN_PMULDQ128,
17310 IX86_BUILTIN_PMULLD128,
17312 IX86_BUILTIN_ROUNDPD,
17313 IX86_BUILTIN_ROUNDPS,
17314 IX86_BUILTIN_ROUNDSD,
17315 IX86_BUILTIN_ROUNDSS,
17317 IX86_BUILTIN_PTESTZ,
17318 IX86_BUILTIN_PTESTC,
17319 IX86_BUILTIN_PTESTNZC,
17321 IX86_BUILTIN_VEC_INIT_V2SI,
17322 IX86_BUILTIN_VEC_INIT_V4HI,
17323 IX86_BUILTIN_VEC_INIT_V8QI,
17324 IX86_BUILTIN_VEC_EXT_V2DF,
17325 IX86_BUILTIN_VEC_EXT_V2DI,
17326 IX86_BUILTIN_VEC_EXT_V4SF,
17327 IX86_BUILTIN_VEC_EXT_V4SI,
17328 IX86_BUILTIN_VEC_EXT_V8HI,
17329 IX86_BUILTIN_VEC_EXT_V2SI,
17330 IX86_BUILTIN_VEC_EXT_V4HI,
17331 IX86_BUILTIN_VEC_EXT_V16QI,
17332 IX86_BUILTIN_VEC_SET_V2DI,
17333 IX86_BUILTIN_VEC_SET_V4SF,
17334 IX86_BUILTIN_VEC_SET_V4SI,
17335 IX86_BUILTIN_VEC_SET_V8HI,
17336 IX86_BUILTIN_VEC_SET_V4HI,
17337 IX86_BUILTIN_VEC_SET_V16QI,
17339 IX86_BUILTIN_VEC_PACK_SFIX,
17341 /* SSE4.2. */
17342 IX86_BUILTIN_CRC32QI,
17343 IX86_BUILTIN_CRC32HI,
17344 IX86_BUILTIN_CRC32SI,
17345 IX86_BUILTIN_CRC32DI,
17347 IX86_BUILTIN_PCMPESTRI128,
17348 IX86_BUILTIN_PCMPESTRM128,
17349 IX86_BUILTIN_PCMPESTRA128,
17350 IX86_BUILTIN_PCMPESTRC128,
17351 IX86_BUILTIN_PCMPESTRO128,
17352 IX86_BUILTIN_PCMPESTRS128,
17353 IX86_BUILTIN_PCMPESTRZ128,
17354 IX86_BUILTIN_PCMPISTRI128,
17355 IX86_BUILTIN_PCMPISTRM128,
17356 IX86_BUILTIN_PCMPISTRA128,
17357 IX86_BUILTIN_PCMPISTRC128,
17358 IX86_BUILTIN_PCMPISTRO128,
17359 IX86_BUILTIN_PCMPISTRS128,
17360 IX86_BUILTIN_PCMPISTRZ128,
17362 IX86_BUILTIN_PCMPGTQ,
17364 /* TFmode support builtins. */
17365 IX86_BUILTIN_INFQ,
17366 IX86_BUILTIN_FABSQ,
17367 IX86_BUILTIN_COPYSIGNQ,
17369 /* SSE5 instructions */
17370 IX86_BUILTIN_FMADDSS,
17371 IX86_BUILTIN_FMADDSD,
17372 IX86_BUILTIN_FMADDPS,
17373 IX86_BUILTIN_FMADDPD,
17374 IX86_BUILTIN_FMSUBSS,
17375 IX86_BUILTIN_FMSUBSD,
17376 IX86_BUILTIN_FMSUBPS,
17377 IX86_BUILTIN_FMSUBPD,
17378 IX86_BUILTIN_FNMADDSS,
17379 IX86_BUILTIN_FNMADDSD,
17380 IX86_BUILTIN_FNMADDPS,
17381 IX86_BUILTIN_FNMADDPD,
17382 IX86_BUILTIN_FNMSUBSS,
17383 IX86_BUILTIN_FNMSUBSD,
17384 IX86_BUILTIN_FNMSUBPS,
17385 IX86_BUILTIN_FNMSUBPD,
17386 IX86_BUILTIN_PCMOV_V2DI,
17387 IX86_BUILTIN_PCMOV_V4SI,
17388 IX86_BUILTIN_PCMOV_V8HI,
17389 IX86_BUILTIN_PCMOV_V16QI,
17390 IX86_BUILTIN_PCMOV_V4SF,
17391 IX86_BUILTIN_PCMOV_V2DF,
17392 IX86_BUILTIN_PPERM,
17393 IX86_BUILTIN_PERMPS,
17394 IX86_BUILTIN_PERMPD,
17395 IX86_BUILTIN_PMACSSWW,
17396 IX86_BUILTIN_PMACSWW,
17397 IX86_BUILTIN_PMACSSWD,
17398 IX86_BUILTIN_PMACSWD,
17399 IX86_BUILTIN_PMACSSDD,
17400 IX86_BUILTIN_PMACSDD,
17401 IX86_BUILTIN_PMACSSDQL,
17402 IX86_BUILTIN_PMACSSDQH,
17403 IX86_BUILTIN_PMACSDQL,
17404 IX86_BUILTIN_PMACSDQH,
17405 IX86_BUILTIN_PMADCSSWD,
17406 IX86_BUILTIN_PMADCSWD,
17407 IX86_BUILTIN_PHADDBW,
17408 IX86_BUILTIN_PHADDBD,
17409 IX86_BUILTIN_PHADDBQ,
17410 IX86_BUILTIN_PHADDWD,
17411 IX86_BUILTIN_PHADDWQ,
17412 IX86_BUILTIN_PHADDDQ,
17413 IX86_BUILTIN_PHADDUBW,
17414 IX86_BUILTIN_PHADDUBD,
17415 IX86_BUILTIN_PHADDUBQ,
17416 IX86_BUILTIN_PHADDUWD,
17417 IX86_BUILTIN_PHADDUWQ,
17418 IX86_BUILTIN_PHADDUDQ,
17419 IX86_BUILTIN_PHSUBBW,
17420 IX86_BUILTIN_PHSUBWD,
17421 IX86_BUILTIN_PHSUBDQ,
17422 IX86_BUILTIN_PROTB,
17423 IX86_BUILTIN_PROTW,
17424 IX86_BUILTIN_PROTD,
17425 IX86_BUILTIN_PROTQ,
17426 IX86_BUILTIN_PROTB_IMM,
17427 IX86_BUILTIN_PROTW_IMM,
17428 IX86_BUILTIN_PROTD_IMM,
17429 IX86_BUILTIN_PROTQ_IMM,
17430 IX86_BUILTIN_PSHLB,
17431 IX86_BUILTIN_PSHLW,
17432 IX86_BUILTIN_PSHLD,
17433 IX86_BUILTIN_PSHLQ,
17434 IX86_BUILTIN_PSHAB,
17435 IX86_BUILTIN_PSHAW,
17436 IX86_BUILTIN_PSHAD,
17437 IX86_BUILTIN_PSHAQ,
17438 IX86_BUILTIN_FRCZSS,
17439 IX86_BUILTIN_FRCZSD,
17440 IX86_BUILTIN_FRCZPS,
17441 IX86_BUILTIN_FRCZPD,
17442 IX86_BUILTIN_CVTPH2PS,
17443 IX86_BUILTIN_CVTPS2PH,
17445 IX86_BUILTIN_COMEQSS,
17446 IX86_BUILTIN_COMNESS,
17447 IX86_BUILTIN_COMLTSS,
17448 IX86_BUILTIN_COMLESS,
17449 IX86_BUILTIN_COMGTSS,
17450 IX86_BUILTIN_COMGESS,
17451 IX86_BUILTIN_COMUEQSS,
17452 IX86_BUILTIN_COMUNESS,
17453 IX86_BUILTIN_COMULTSS,
17454 IX86_BUILTIN_COMULESS,
17455 IX86_BUILTIN_COMUGTSS,
17456 IX86_BUILTIN_COMUGESS,
17457 IX86_BUILTIN_COMORDSS,
17458 IX86_BUILTIN_COMUNORDSS,
17459 IX86_BUILTIN_COMFALSESS,
17460 IX86_BUILTIN_COMTRUESS,
17462 IX86_BUILTIN_COMEQSD,
17463 IX86_BUILTIN_COMNESD,
17464 IX86_BUILTIN_COMLTSD,
17465 IX86_BUILTIN_COMLESD,
17466 IX86_BUILTIN_COMGTSD,
17467 IX86_BUILTIN_COMGESD,
17468 IX86_BUILTIN_COMUEQSD,
17469 IX86_BUILTIN_COMUNESD,
17470 IX86_BUILTIN_COMULTSD,
17471 IX86_BUILTIN_COMULESD,
17472 IX86_BUILTIN_COMUGTSD,
17473 IX86_BUILTIN_COMUGESD,
17474 IX86_BUILTIN_COMORDSD,
17475 IX86_BUILTIN_COMUNORDSD,
17476 IX86_BUILTIN_COMFALSESD,
17477 IX86_BUILTIN_COMTRUESD,
17479 IX86_BUILTIN_COMEQPS,
17480 IX86_BUILTIN_COMNEPS,
17481 IX86_BUILTIN_COMLTPS,
17482 IX86_BUILTIN_COMLEPS,
17483 IX86_BUILTIN_COMGTPS,
17484 IX86_BUILTIN_COMGEPS,
17485 IX86_BUILTIN_COMUEQPS,
17486 IX86_BUILTIN_COMUNEPS,
17487 IX86_BUILTIN_COMULTPS,
17488 IX86_BUILTIN_COMULEPS,
17489 IX86_BUILTIN_COMUGTPS,
17490 IX86_BUILTIN_COMUGEPS,
17491 IX86_BUILTIN_COMORDPS,
17492 IX86_BUILTIN_COMUNORDPS,
17493 IX86_BUILTIN_COMFALSEPS,
17494 IX86_BUILTIN_COMTRUEPS,
17496 IX86_BUILTIN_COMEQPD,
17497 IX86_BUILTIN_COMNEPD,
17498 IX86_BUILTIN_COMLTPD,
17499 IX86_BUILTIN_COMLEPD,
17500 IX86_BUILTIN_COMGTPD,
17501 IX86_BUILTIN_COMGEPD,
17502 IX86_BUILTIN_COMUEQPD,
17503 IX86_BUILTIN_COMUNEPD,
17504 IX86_BUILTIN_COMULTPD,
17505 IX86_BUILTIN_COMULEPD,
17506 IX86_BUILTIN_COMUGTPD,
17507 IX86_BUILTIN_COMUGEPD,
17508 IX86_BUILTIN_COMORDPD,
17509 IX86_BUILTIN_COMUNORDPD,
17510 IX86_BUILTIN_COMFALSEPD,
17511 IX86_BUILTIN_COMTRUEPD,
17513 IX86_BUILTIN_PCOMEQUB,
17514 IX86_BUILTIN_PCOMNEUB,
17515 IX86_BUILTIN_PCOMLTUB,
17516 IX86_BUILTIN_PCOMLEUB,
17517 IX86_BUILTIN_PCOMGTUB,
17518 IX86_BUILTIN_PCOMGEUB,
17519 IX86_BUILTIN_PCOMFALSEUB,
17520 IX86_BUILTIN_PCOMTRUEUB,
17521 IX86_BUILTIN_PCOMEQUW,
17522 IX86_BUILTIN_PCOMNEUW,
17523 IX86_BUILTIN_PCOMLTUW,
17524 IX86_BUILTIN_PCOMLEUW,
17525 IX86_BUILTIN_PCOMGTUW,
17526 IX86_BUILTIN_PCOMGEUW,
17527 IX86_BUILTIN_PCOMFALSEUW,
17528 IX86_BUILTIN_PCOMTRUEUW,
17529 IX86_BUILTIN_PCOMEQUD,
17530 IX86_BUILTIN_PCOMNEUD,
17531 IX86_BUILTIN_PCOMLTUD,
17532 IX86_BUILTIN_PCOMLEUD,
17533 IX86_BUILTIN_PCOMGTUD,
17534 IX86_BUILTIN_PCOMGEUD,
17535 IX86_BUILTIN_PCOMFALSEUD,
17536 IX86_BUILTIN_PCOMTRUEUD,
17537 IX86_BUILTIN_PCOMEQUQ,
17538 IX86_BUILTIN_PCOMNEUQ,
17539 IX86_BUILTIN_PCOMLTUQ,
17540 IX86_BUILTIN_PCOMLEUQ,
17541 IX86_BUILTIN_PCOMGTUQ,
17542 IX86_BUILTIN_PCOMGEUQ,
17543 IX86_BUILTIN_PCOMFALSEUQ,
17544 IX86_BUILTIN_PCOMTRUEUQ,
17546 IX86_BUILTIN_PCOMEQB,
17547 IX86_BUILTIN_PCOMNEB,
17548 IX86_BUILTIN_PCOMLTB,
17549 IX86_BUILTIN_PCOMLEB,
17550 IX86_BUILTIN_PCOMGTB,
17551 IX86_BUILTIN_PCOMGEB,
17552 IX86_BUILTIN_PCOMFALSEB,
17553 IX86_BUILTIN_PCOMTRUEB,
17554 IX86_BUILTIN_PCOMEQW,
17555 IX86_BUILTIN_PCOMNEW,
17556 IX86_BUILTIN_PCOMLTW,
17557 IX86_BUILTIN_PCOMLEW,
17558 IX86_BUILTIN_PCOMGTW,
17559 IX86_BUILTIN_PCOMGEW,
17560 IX86_BUILTIN_PCOMFALSEW,
17561 IX86_BUILTIN_PCOMTRUEW,
17562 IX86_BUILTIN_PCOMEQD,
17563 IX86_BUILTIN_PCOMNED,
17564 IX86_BUILTIN_PCOMLTD,
17565 IX86_BUILTIN_PCOMLED,
17566 IX86_BUILTIN_PCOMGTD,
17567 IX86_BUILTIN_PCOMGED,
17568 IX86_BUILTIN_PCOMFALSED,
17569 IX86_BUILTIN_PCOMTRUED,
17570 IX86_BUILTIN_PCOMEQQ,
17571 IX86_BUILTIN_PCOMNEQ,
17572 IX86_BUILTIN_PCOMLTQ,
17573 IX86_BUILTIN_PCOMLEQ,
17574 IX86_BUILTIN_PCOMGTQ,
17575 IX86_BUILTIN_PCOMGEQ,
17576 IX86_BUILTIN_PCOMFALSEQ,
17577 IX86_BUILTIN_PCOMTRUEQ,
17579 IX86_BUILTIN_MAX
17582 /* Table for the ix86 builtin decls. */
17583 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17585 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17586 * if the target_flags include one of MASK. Stores the function decl
17587 * in the ix86_builtins array.
17588 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17590 static inline tree
17591 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17593 tree decl = NULL_TREE;
17595 if (mask & ix86_isa_flags
17596 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17598 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17599 NULL, NULL_TREE);
17600 ix86_builtins[(int) code] = decl;
17603 return decl;
17606 /* Like def_builtin, but also marks the function decl "const". */
17608 static inline tree
17609 def_builtin_const (int mask, const char *name, tree type,
17610 enum ix86_builtins code)
17612 tree decl = def_builtin (mask, name, type, code);
17613 if (decl)
17614 TREE_READONLY (decl) = 1;
17615 return decl;
17618 /* Bits for builtin_description.flag. */
17620 /* Set when we don't support the comparison natively, and should
17621 swap_comparison in order to support it. */
17622 #define BUILTIN_DESC_SWAP_OPERANDS 1
17624 struct builtin_description
17626 const unsigned int mask;
17627 const enum insn_code icode;
17628 const char *const name;
17629 const enum ix86_builtins code;
17630 const enum rtx_code comparison;
17631 const int flag;
17634 static const struct builtin_description bdesc_comi[] =
17636 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17637 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17639 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17641 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17643 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17644 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17645 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17646 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17662 static const struct builtin_description bdesc_ptest[] =
17664 /* SSE4.1 */
17665 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17666 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17667 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17670 static const struct builtin_description bdesc_pcmpestr[] =
17672 /* SSE4.2 */
17673 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17674 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17675 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17676 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17677 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17678 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17679 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17682 static const struct builtin_description bdesc_pcmpistr[] =
17684 /* SSE4.2 */
17685 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17686 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17687 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17688 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17689 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17690 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17691 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17694 static const struct builtin_description bdesc_crc32[] =
17696 /* SSE4.2 */
17697 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17698 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17699 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17700 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17703 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17704 static const struct builtin_description bdesc_sse_3arg[] =
17706 /* SSE4.1 */
17707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17714 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17717 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17718 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17721 static const struct builtin_description bdesc_2arg[] =
17723 /* SSE */
17724 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17725 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17726 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17727 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17729 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17730 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17731 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17733 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17734 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17735 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17736 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17737 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17738 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17739 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17747 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17748 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17749 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17750 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17751 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17752 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17753 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17754 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17756 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17757 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17758 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17759 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17761 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17763 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17764 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17772 /* MMX */
17773 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17774 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17775 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17776 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17777 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17778 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17780 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17782 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17785 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17786 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17787 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17788 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17791 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17793 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17796 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17798 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17800 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17801 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17805 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17807 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17810 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17811 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17812 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17813 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17816 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17817 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17818 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17819 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17820 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17822 /* Special. */
17823 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17825 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17827 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17828 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17829 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17838 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17839 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17840 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17841 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17842 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17843 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17845 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17846 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17847 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17848 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17850 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17851 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17853 /* SSE2 */
17854 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17855 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17856 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17857 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17880 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17885 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17889 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17891 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17892 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17898 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
17900 /* SSE2 MMX */
17901 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17902 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17903 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17904 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17905 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17906 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17907 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17908 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17910 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17911 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17912 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17913 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17914 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17915 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17916 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17917 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17919 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17920 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17922 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17924 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17925 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17935 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17937 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17938 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17939 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17955 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17961 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17962 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17963 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17965 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17966 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17967 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17969 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17970 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17975 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17979 /* SSE3 MMX */
17980 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17981 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17982 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17983 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17984 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17985 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17987 /* SSSE3 */
17988 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17989 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17990 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17991 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17992 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17993 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17994 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17995 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17996 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17997 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17998 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17999 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18000 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18001 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18002 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18003 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18004 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18005 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18006 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18007 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18008 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18009 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18010 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18011 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18013 /* SSE4.1 */
18014 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18015 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18016 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18017 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18018 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18019 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18020 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18021 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18022 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18023 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18024 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18025 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18027 /* SSE4.2 */
18028 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18031 static const struct builtin_description bdesc_1arg[] =
18033 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18036 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18042 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18043 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18045 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18065 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18066 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18072 /* SSE3 */
18073 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18074 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18076 /* SSSE3 */
18077 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18078 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18079 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18080 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18081 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18082 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18084 /* SSE4.1 */
18085 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18086 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18087 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18088 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18089 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18090 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18091 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18092 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18093 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18094 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18095 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18096 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18097 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18099 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18100 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
18101 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
18104 /* SSE5 */
18105 enum multi_arg_type {
18106 MULTI_ARG_UNKNOWN,
18107 MULTI_ARG_3_SF,
18108 MULTI_ARG_3_DF,
18109 MULTI_ARG_3_DI,
18110 MULTI_ARG_3_SI,
18111 MULTI_ARG_3_SI_DI,
18112 MULTI_ARG_3_HI,
18113 MULTI_ARG_3_HI_SI,
18114 MULTI_ARG_3_QI,
18115 MULTI_ARG_3_PERMPS,
18116 MULTI_ARG_3_PERMPD,
18117 MULTI_ARG_2_SF,
18118 MULTI_ARG_2_DF,
18119 MULTI_ARG_2_DI,
18120 MULTI_ARG_2_SI,
18121 MULTI_ARG_2_HI,
18122 MULTI_ARG_2_QI,
18123 MULTI_ARG_2_DI_IMM,
18124 MULTI_ARG_2_SI_IMM,
18125 MULTI_ARG_2_HI_IMM,
18126 MULTI_ARG_2_QI_IMM,
18127 MULTI_ARG_2_SF_CMP,
18128 MULTI_ARG_2_DF_CMP,
18129 MULTI_ARG_2_DI_CMP,
18130 MULTI_ARG_2_SI_CMP,
18131 MULTI_ARG_2_HI_CMP,
18132 MULTI_ARG_2_QI_CMP,
18133 MULTI_ARG_2_DI_TF,
18134 MULTI_ARG_2_SI_TF,
18135 MULTI_ARG_2_HI_TF,
18136 MULTI_ARG_2_QI_TF,
18137 MULTI_ARG_2_SF_TF,
18138 MULTI_ARG_2_DF_TF,
18139 MULTI_ARG_1_SF,
18140 MULTI_ARG_1_DF,
18141 MULTI_ARG_1_DI,
18142 MULTI_ARG_1_SI,
18143 MULTI_ARG_1_HI,
18144 MULTI_ARG_1_QI,
18145 MULTI_ARG_1_SI_DI,
18146 MULTI_ARG_1_HI_DI,
18147 MULTI_ARG_1_HI_SI,
18148 MULTI_ARG_1_QI_DI,
18149 MULTI_ARG_1_QI_SI,
18150 MULTI_ARG_1_QI_HI,
18151 MULTI_ARG_1_PH2PS,
18152 MULTI_ARG_1_PS2PH
18155 static const struct builtin_description bdesc_multi_arg[] =
18157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18199 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18200 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18201 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18202 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18219 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18228 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18236 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18237 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18238 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18239 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18240 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18241 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18242 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18243 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18244 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18245 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18246 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18247 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18248 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18250 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18251 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18252 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18253 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18254 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18255 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18256 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18257 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18258 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18261 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18263 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18277 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18279 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18280 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18281 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18285 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18336 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18353 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18361 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18385 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18393 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18394 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18395 builtins. */
18396 static void
18397 ix86_init_mmx_sse_builtins (void)
18399 const struct builtin_description * d;
18400 size_t i;
18402 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18403 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18404 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18405 tree V2DI_type_node
18406 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18407 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18408 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18409 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18410 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18411 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18412 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18414 tree pchar_type_node = build_pointer_type (char_type_node);
18415 tree pcchar_type_node = build_pointer_type (
18416 build_type_variant (char_type_node, 1, 0));
18417 tree pfloat_type_node = build_pointer_type (float_type_node);
18418 tree pcfloat_type_node = build_pointer_type (
18419 build_type_variant (float_type_node, 1, 0));
18420 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18421 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18422 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18424 /* Comparisons. */
18425 tree int_ftype_v4sf_v4sf
18426 = build_function_type_list (integer_type_node,
18427 V4SF_type_node, V4SF_type_node, NULL_TREE);
18428 tree v4si_ftype_v4sf_v4sf
18429 = build_function_type_list (V4SI_type_node,
18430 V4SF_type_node, V4SF_type_node, NULL_TREE);
18431 /* MMX/SSE/integer conversions. */
18432 tree int_ftype_v4sf
18433 = build_function_type_list (integer_type_node,
18434 V4SF_type_node, NULL_TREE);
18435 tree int64_ftype_v4sf
18436 = build_function_type_list (long_long_integer_type_node,
18437 V4SF_type_node, NULL_TREE);
18438 tree int_ftype_v8qi
18439 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18440 tree v4sf_ftype_v4sf_int
18441 = build_function_type_list (V4SF_type_node,
18442 V4SF_type_node, integer_type_node, NULL_TREE);
18443 tree v4sf_ftype_v4sf_int64
18444 = build_function_type_list (V4SF_type_node,
18445 V4SF_type_node, long_long_integer_type_node,
18446 NULL_TREE);
18447 tree v4sf_ftype_v4sf_v2si
18448 = build_function_type_list (V4SF_type_node,
18449 V4SF_type_node, V2SI_type_node, NULL_TREE);
18451 /* Miscellaneous. */
18452 tree v8qi_ftype_v4hi_v4hi
18453 = build_function_type_list (V8QI_type_node,
18454 V4HI_type_node, V4HI_type_node, NULL_TREE);
18455 tree v4hi_ftype_v2si_v2si
18456 = build_function_type_list (V4HI_type_node,
18457 V2SI_type_node, V2SI_type_node, NULL_TREE);
18458 tree v4sf_ftype_v4sf_v4sf_int
18459 = build_function_type_list (V4SF_type_node,
18460 V4SF_type_node, V4SF_type_node,
18461 integer_type_node, NULL_TREE);
18462 tree v2si_ftype_v4hi_v4hi
18463 = build_function_type_list (V2SI_type_node,
18464 V4HI_type_node, V4HI_type_node, NULL_TREE);
18465 tree v4hi_ftype_v4hi_int
18466 = build_function_type_list (V4HI_type_node,
18467 V4HI_type_node, integer_type_node, NULL_TREE);
18468 tree v4hi_ftype_v4hi_di
18469 = build_function_type_list (V4HI_type_node,
18470 V4HI_type_node, long_long_unsigned_type_node,
18471 NULL_TREE);
18472 tree v2si_ftype_v2si_di
18473 = build_function_type_list (V2SI_type_node,
18474 V2SI_type_node, long_long_unsigned_type_node,
18475 NULL_TREE);
18476 tree void_ftype_void
18477 = build_function_type (void_type_node, void_list_node);
18478 tree void_ftype_unsigned
18479 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18480 tree void_ftype_unsigned_unsigned
18481 = build_function_type_list (void_type_node, unsigned_type_node,
18482 unsigned_type_node, NULL_TREE);
18483 tree void_ftype_pcvoid_unsigned_unsigned
18484 = build_function_type_list (void_type_node, const_ptr_type_node,
18485 unsigned_type_node, unsigned_type_node,
18486 NULL_TREE);
18487 tree unsigned_ftype_void
18488 = build_function_type (unsigned_type_node, void_list_node);
18489 tree v2si_ftype_v4sf
18490 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18491 /* Loads/stores. */
18492 tree void_ftype_v8qi_v8qi_pchar
18493 = build_function_type_list (void_type_node,
18494 V8QI_type_node, V8QI_type_node,
18495 pchar_type_node, NULL_TREE);
18496 tree v4sf_ftype_pcfloat
18497 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18498 /* @@@ the type is bogus */
18499 tree v4sf_ftype_v4sf_pv2si
18500 = build_function_type_list (V4SF_type_node,
18501 V4SF_type_node, pv2si_type_node, NULL_TREE);
18502 tree void_ftype_pv2si_v4sf
18503 = build_function_type_list (void_type_node,
18504 pv2si_type_node, V4SF_type_node, NULL_TREE);
18505 tree void_ftype_pfloat_v4sf
18506 = build_function_type_list (void_type_node,
18507 pfloat_type_node, V4SF_type_node, NULL_TREE);
18508 tree void_ftype_pdi_di
18509 = build_function_type_list (void_type_node,
18510 pdi_type_node, long_long_unsigned_type_node,
18511 NULL_TREE);
18512 tree void_ftype_pv2di_v2di
18513 = build_function_type_list (void_type_node,
18514 pv2di_type_node, V2DI_type_node, NULL_TREE);
18515 /* Normal vector unops. */
18516 tree v4sf_ftype_v4sf
18517 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18518 tree v16qi_ftype_v16qi
18519 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18520 tree v8hi_ftype_v8hi
18521 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18522 tree v4si_ftype_v4si
18523 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18524 tree v8qi_ftype_v8qi
18525 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18526 tree v4hi_ftype_v4hi
18527 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18529 /* Normal vector binops. */
18530 tree v4sf_ftype_v4sf_v4sf
18531 = build_function_type_list (V4SF_type_node,
18532 V4SF_type_node, V4SF_type_node, NULL_TREE);
18533 tree v8qi_ftype_v8qi_v8qi
18534 = build_function_type_list (V8QI_type_node,
18535 V8QI_type_node, V8QI_type_node, NULL_TREE);
18536 tree v4hi_ftype_v4hi_v4hi
18537 = build_function_type_list (V4HI_type_node,
18538 V4HI_type_node, V4HI_type_node, NULL_TREE);
18539 tree v2si_ftype_v2si_v2si
18540 = build_function_type_list (V2SI_type_node,
18541 V2SI_type_node, V2SI_type_node, NULL_TREE);
18542 tree di_ftype_di_di
18543 = build_function_type_list (long_long_unsigned_type_node,
18544 long_long_unsigned_type_node,
18545 long_long_unsigned_type_node, NULL_TREE);
18547 tree di_ftype_di_di_int
18548 = build_function_type_list (long_long_unsigned_type_node,
18549 long_long_unsigned_type_node,
18550 long_long_unsigned_type_node,
18551 integer_type_node, NULL_TREE);
18553 tree v2si_ftype_v2sf
18554 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18555 tree v2sf_ftype_v2si
18556 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18557 tree v2si_ftype_v2si
18558 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18559 tree v2sf_ftype_v2sf
18560 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18561 tree v2sf_ftype_v2sf_v2sf
18562 = build_function_type_list (V2SF_type_node,
18563 V2SF_type_node, V2SF_type_node, NULL_TREE);
18564 tree v2si_ftype_v2sf_v2sf
18565 = build_function_type_list (V2SI_type_node,
18566 V2SF_type_node, V2SF_type_node, NULL_TREE);
18567 tree pint_type_node = build_pointer_type (integer_type_node);
18568 tree pdouble_type_node = build_pointer_type (double_type_node);
18569 tree pcdouble_type_node = build_pointer_type (
18570 build_type_variant (double_type_node, 1, 0));
18571 tree int_ftype_v2df_v2df
18572 = build_function_type_list (integer_type_node,
18573 V2DF_type_node, V2DF_type_node, NULL_TREE);
18575 tree void_ftype_pcvoid
18576 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18577 tree v4sf_ftype_v4si
18578 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18579 tree v4si_ftype_v4sf
18580 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18581 tree v2df_ftype_v4si
18582 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18583 tree v4si_ftype_v2df
18584 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18585 tree v4si_ftype_v2df_v2df
18586 = build_function_type_list (V4SI_type_node,
18587 V2DF_type_node, V2DF_type_node, NULL_TREE);
18588 tree v2si_ftype_v2df
18589 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18590 tree v4sf_ftype_v2df
18591 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18592 tree v2df_ftype_v2si
18593 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18594 tree v2df_ftype_v4sf
18595 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18596 tree int_ftype_v2df
18597 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18598 tree int64_ftype_v2df
18599 = build_function_type_list (long_long_integer_type_node,
18600 V2DF_type_node, NULL_TREE);
18601 tree v2df_ftype_v2df_int
18602 = build_function_type_list (V2DF_type_node,
18603 V2DF_type_node, integer_type_node, NULL_TREE);
18604 tree v2df_ftype_v2df_int64
18605 = build_function_type_list (V2DF_type_node,
18606 V2DF_type_node, long_long_integer_type_node,
18607 NULL_TREE);
18608 tree v4sf_ftype_v4sf_v2df
18609 = build_function_type_list (V4SF_type_node,
18610 V4SF_type_node, V2DF_type_node, NULL_TREE);
18611 tree v2df_ftype_v2df_v4sf
18612 = build_function_type_list (V2DF_type_node,
18613 V2DF_type_node, V4SF_type_node, NULL_TREE);
18614 tree v2df_ftype_v2df_v2df_int
18615 = build_function_type_list (V2DF_type_node,
18616 V2DF_type_node, V2DF_type_node,
18617 integer_type_node,
18618 NULL_TREE);
18619 tree v2df_ftype_v2df_pcdouble
18620 = build_function_type_list (V2DF_type_node,
18621 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18622 tree void_ftype_pdouble_v2df
18623 = build_function_type_list (void_type_node,
18624 pdouble_type_node, V2DF_type_node, NULL_TREE);
18625 tree void_ftype_pint_int
18626 = build_function_type_list (void_type_node,
18627 pint_type_node, integer_type_node, NULL_TREE);
18628 tree void_ftype_v16qi_v16qi_pchar
18629 = build_function_type_list (void_type_node,
18630 V16QI_type_node, V16QI_type_node,
18631 pchar_type_node, NULL_TREE);
18632 tree v2df_ftype_pcdouble
18633 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18634 tree v2df_ftype_v2df_v2df
18635 = build_function_type_list (V2DF_type_node,
18636 V2DF_type_node, V2DF_type_node, NULL_TREE);
18637 tree v16qi_ftype_v16qi_v16qi
18638 = build_function_type_list (V16QI_type_node,
18639 V16QI_type_node, V16QI_type_node, NULL_TREE);
18640 tree v8hi_ftype_v8hi_v8hi
18641 = build_function_type_list (V8HI_type_node,
18642 V8HI_type_node, V8HI_type_node, NULL_TREE);
18643 tree v4si_ftype_v4si_v4si
18644 = build_function_type_list (V4SI_type_node,
18645 V4SI_type_node, V4SI_type_node, NULL_TREE);
18646 tree v2di_ftype_v2di_v2di
18647 = build_function_type_list (V2DI_type_node,
18648 V2DI_type_node, V2DI_type_node, NULL_TREE);
18649 tree v2di_ftype_v2df_v2df
18650 = build_function_type_list (V2DI_type_node,
18651 V2DF_type_node, V2DF_type_node, NULL_TREE);
18652 tree v2df_ftype_v2df
18653 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18654 tree v2di_ftype_v2di_int
18655 = build_function_type_list (V2DI_type_node,
18656 V2DI_type_node, integer_type_node, NULL_TREE);
18657 tree v2di_ftype_v2di_v2di_int
18658 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18659 V2DI_type_node, integer_type_node, NULL_TREE);
18660 tree v4si_ftype_v4si_int
18661 = build_function_type_list (V4SI_type_node,
18662 V4SI_type_node, integer_type_node, NULL_TREE);
18663 tree v8hi_ftype_v8hi_int
18664 = build_function_type_list (V8HI_type_node,
18665 V8HI_type_node, integer_type_node, NULL_TREE);
18666 tree v4si_ftype_v8hi_v8hi
18667 = build_function_type_list (V4SI_type_node,
18668 V8HI_type_node, V8HI_type_node, NULL_TREE);
18669 tree di_ftype_v8qi_v8qi
18670 = build_function_type_list (long_long_unsigned_type_node,
18671 V8QI_type_node, V8QI_type_node, NULL_TREE);
18672 tree di_ftype_v2si_v2si
18673 = build_function_type_list (long_long_unsigned_type_node,
18674 V2SI_type_node, V2SI_type_node, NULL_TREE);
18675 tree v2di_ftype_v16qi_v16qi
18676 = build_function_type_list (V2DI_type_node,
18677 V16QI_type_node, V16QI_type_node, NULL_TREE);
18678 tree v2di_ftype_v4si_v4si
18679 = build_function_type_list (V2DI_type_node,
18680 V4SI_type_node, V4SI_type_node, NULL_TREE);
18681 tree int_ftype_v16qi
18682 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18683 tree v16qi_ftype_pcchar
18684 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18685 tree void_ftype_pchar_v16qi
18686 = build_function_type_list (void_type_node,
18687 pchar_type_node, V16QI_type_node, NULL_TREE);
18689 tree v2di_ftype_v2di_unsigned_unsigned
18690 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18691 unsigned_type_node, unsigned_type_node,
18692 NULL_TREE);
18693 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18694 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18695 unsigned_type_node, unsigned_type_node,
18696 NULL_TREE);
18697 tree v2di_ftype_v2di_v16qi
18698 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18699 NULL_TREE);
18700 tree v2df_ftype_v2df_v2df_v2df
18701 = build_function_type_list (V2DF_type_node,
18702 V2DF_type_node, V2DF_type_node,
18703 V2DF_type_node, NULL_TREE);
18704 tree v4sf_ftype_v4sf_v4sf_v4sf
18705 = build_function_type_list (V4SF_type_node,
18706 V4SF_type_node, V4SF_type_node,
18707 V4SF_type_node, NULL_TREE);
18708 tree v8hi_ftype_v16qi
18709 = build_function_type_list (V8HI_type_node, V16QI_type_node,
18710 NULL_TREE);
18711 tree v4si_ftype_v16qi
18712 = build_function_type_list (V4SI_type_node, V16QI_type_node,
18713 NULL_TREE);
18714 tree v2di_ftype_v16qi
18715 = build_function_type_list (V2DI_type_node, V16QI_type_node,
18716 NULL_TREE);
18717 tree v4si_ftype_v8hi
18718 = build_function_type_list (V4SI_type_node, V8HI_type_node,
18719 NULL_TREE);
18720 tree v2di_ftype_v8hi
18721 = build_function_type_list (V2DI_type_node, V8HI_type_node,
18722 NULL_TREE);
18723 tree v2di_ftype_v4si
18724 = build_function_type_list (V2DI_type_node, V4SI_type_node,
18725 NULL_TREE);
18726 tree v2di_ftype_pv2di
18727 = build_function_type_list (V2DI_type_node, pv2di_type_node,
18728 NULL_TREE);
18729 tree v16qi_ftype_v16qi_v16qi_int
18730 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18731 V16QI_type_node, integer_type_node,
18732 NULL_TREE);
18733 tree v16qi_ftype_v16qi_v16qi_v16qi
18734 = build_function_type_list (V16QI_type_node, V16QI_type_node,
18735 V16QI_type_node, V16QI_type_node,
18736 NULL_TREE);
18737 tree v8hi_ftype_v8hi_v8hi_int
18738 = build_function_type_list (V8HI_type_node, V8HI_type_node,
18739 V8HI_type_node, integer_type_node,
18740 NULL_TREE);
18741 tree v4si_ftype_v4si_v4si_int
18742 = build_function_type_list (V4SI_type_node, V4SI_type_node,
18743 V4SI_type_node, integer_type_node,
18744 NULL_TREE);
18745 tree int_ftype_v2di_v2di
18746 = build_function_type_list (integer_type_node,
18747 V2DI_type_node, V2DI_type_node,
18748 NULL_TREE);
18749 tree int_ftype_v16qi_int_v16qi_int_int
18750 = build_function_type_list (integer_type_node,
18751 V16QI_type_node,
18752 integer_type_node,
18753 V16QI_type_node,
18754 integer_type_node,
18755 integer_type_node,
18756 NULL_TREE);
18757 tree v16qi_ftype_v16qi_int_v16qi_int_int
18758 = build_function_type_list (V16QI_type_node,
18759 V16QI_type_node,
18760 integer_type_node,
18761 V16QI_type_node,
18762 integer_type_node,
18763 integer_type_node,
18764 NULL_TREE);
18765 tree int_ftype_v16qi_v16qi_int
18766 = build_function_type_list (integer_type_node,
18767 V16QI_type_node,
18768 V16QI_type_node,
18769 integer_type_node,
18770 NULL_TREE);
18772 /* SSE5 instructions */
18773 tree v2di_ftype_v2di_v2di_v2di
18774 = build_function_type_list (V2DI_type_node,
18775 V2DI_type_node,
18776 V2DI_type_node,
18777 V2DI_type_node,
18778 NULL_TREE);
18780 tree v4si_ftype_v4si_v4si_v4si
18781 = build_function_type_list (V4SI_type_node,
18782 V4SI_type_node,
18783 V4SI_type_node,
18784 V4SI_type_node,
18785 NULL_TREE);
18787 tree v4si_ftype_v4si_v4si_v2di
18788 = build_function_type_list (V4SI_type_node,
18789 V4SI_type_node,
18790 V4SI_type_node,
18791 V2DI_type_node,
18792 NULL_TREE);
18794 tree v8hi_ftype_v8hi_v8hi_v8hi
18795 = build_function_type_list (V8HI_type_node,
18796 V8HI_type_node,
18797 V8HI_type_node,
18798 V8HI_type_node,
18799 NULL_TREE);
18801 tree v8hi_ftype_v8hi_v8hi_v4si
18802 = build_function_type_list (V8HI_type_node,
18803 V8HI_type_node,
18804 V8HI_type_node,
18805 V4SI_type_node,
18806 NULL_TREE);
18808 tree v2df_ftype_v2df_v2df_v16qi
18809 = build_function_type_list (V2DF_type_node,
18810 V2DF_type_node,
18811 V2DF_type_node,
18812 V16QI_type_node,
18813 NULL_TREE);
18815 tree v4sf_ftype_v4sf_v4sf_v16qi
18816 = build_function_type_list (V4SF_type_node,
18817 V4SF_type_node,
18818 V4SF_type_node,
18819 V16QI_type_node,
18820 NULL_TREE);
18822 tree v2di_ftype_v2di_si
18823 = build_function_type_list (V2DI_type_node,
18824 V2DI_type_node,
18825 integer_type_node,
18826 NULL_TREE);
18828 tree v4si_ftype_v4si_si
18829 = build_function_type_list (V4SI_type_node,
18830 V4SI_type_node,
18831 integer_type_node,
18832 NULL_TREE);
18834 tree v8hi_ftype_v8hi_si
18835 = build_function_type_list (V8HI_type_node,
18836 V8HI_type_node,
18837 integer_type_node,
18838 NULL_TREE);
18840 tree v16qi_ftype_v16qi_si
18841 = build_function_type_list (V16QI_type_node,
18842 V16QI_type_node,
18843 integer_type_node,
18844 NULL_TREE);
18845 tree v4sf_ftype_v4hi
18846 = build_function_type_list (V4SF_type_node,
18847 V4HI_type_node,
18848 NULL_TREE);
18850 tree v4hi_ftype_v4sf
18851 = build_function_type_list (V4HI_type_node,
18852 V4SF_type_node,
18853 NULL_TREE);
18855 tree v2di_ftype_v2di
18856 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18858 tree ftype;
18860 /* The __float80 type. */
18861 if (TYPE_MODE (long_double_type_node) == XFmode)
18862 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
18863 "__float80");
18864 else
18866 /* The __float80 type. */
18867 tree float80_type_node = make_node (REAL_TYPE);
18869 TYPE_PRECISION (float80_type_node) = 80;
18870 layout_type (float80_type_node);
18871 (*lang_hooks.types.register_builtin_type) (float80_type_node,
18872 "__float80");
18875 if (TARGET_64BIT)
18877 tree float128_type_node = make_node (REAL_TYPE);
18879 TYPE_PRECISION (float128_type_node) = 128;
18880 layout_type (float128_type_node);
18881 (*lang_hooks.types.register_builtin_type) (float128_type_node,
18882 "__float128");
18884 /* TFmode support builtins. */
18885 ftype = build_function_type (float128_type_node,
18886 void_list_node);
18887 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
18889 ftype = build_function_type_list (float128_type_node,
18890 float128_type_node,
18891 NULL_TREE);
18892 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
18894 ftype = build_function_type_list (float128_type_node,
18895 float128_type_node,
18896 float128_type_node,
18897 NULL_TREE);
18898 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
18901 /* Add all SSE builtins that are more or less simple operations on
18902 three operands. */
18903 for (i = 0, d = bdesc_sse_3arg;
18904 i < ARRAY_SIZE (bdesc_sse_3arg);
18905 i++, d++)
18907 /* Use one of the operands; the target can have a different mode for
18908 mask-generating compares. */
18909 enum machine_mode mode;
18910 tree type;
18912 if (d->name == 0)
18913 continue;
18914 mode = insn_data[d->icode].operand[1].mode;
18916 switch (mode)
18918 case V16QImode:
18919 type = v16qi_ftype_v16qi_v16qi_int;
18920 break;
18921 case V8HImode:
18922 type = v8hi_ftype_v8hi_v8hi_int;
18923 break;
18924 case V4SImode:
18925 type = v4si_ftype_v4si_v4si_int;
18926 break;
18927 case V2DImode:
18928 type = v2di_ftype_v2di_v2di_int;
18929 break;
18930 case V2DFmode:
18931 type = v2df_ftype_v2df_v2df_int;
18932 break;
18933 case V4SFmode:
18934 type = v4sf_ftype_v4sf_v4sf_int;
18935 break;
18936 default:
18937 gcc_unreachable ();
18940 /* Override for variable blends. */
18941 switch (d->icode)
18943 case CODE_FOR_sse4_1_blendvpd:
18944 type = v2df_ftype_v2df_v2df_v2df;
18945 break;
18946 case CODE_FOR_sse4_1_blendvps:
18947 type = v4sf_ftype_v4sf_v4sf_v4sf;
18948 break;
18949 case CODE_FOR_sse4_1_pblendvb:
18950 type = v16qi_ftype_v16qi_v16qi_v16qi;
18951 break;
18952 default:
18953 break;
18956 def_builtin_const (d->mask, d->name, type, d->code);
18959 /* Add all builtins that are more or less simple operations on two
18960 operands. */
18961 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18963 /* Use one of the operands; the target can have a different mode for
18964 mask-generating compares. */
18965 enum machine_mode mode;
18966 tree type;
18968 if (d->name == 0)
18969 continue;
18970 mode = insn_data[d->icode].operand[1].mode;
18972 switch (mode)
18974 case V16QImode:
18975 type = v16qi_ftype_v16qi_v16qi;
18976 break;
18977 case V8HImode:
18978 type = v8hi_ftype_v8hi_v8hi;
18979 break;
18980 case V4SImode:
18981 type = v4si_ftype_v4si_v4si;
18982 break;
18983 case V2DImode:
18984 type = v2di_ftype_v2di_v2di;
18985 break;
18986 case V2DFmode:
18987 type = v2df_ftype_v2df_v2df;
18988 break;
18989 case V4SFmode:
18990 type = v4sf_ftype_v4sf_v4sf;
18991 break;
18992 case V8QImode:
18993 type = v8qi_ftype_v8qi_v8qi;
18994 break;
18995 case V4HImode:
18996 type = v4hi_ftype_v4hi_v4hi;
18997 break;
18998 case V2SImode:
18999 type = v2si_ftype_v2si_v2si;
19000 break;
19001 case DImode:
19002 type = di_ftype_di_di;
19003 break;
19005 default:
19006 gcc_unreachable ();
19009 /* Override for comparisons. */
19010 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19011 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19012 type = v4si_ftype_v4sf_v4sf;
19014 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19015 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19016 type = v2di_ftype_v2df_v2df;
19018 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19019 type = v4si_ftype_v2df_v2df;
19021 def_builtin_const (d->mask, d->name, type, d->code);
19024 /* Add all builtins that are more or less simple operations on 1 operand. */
19025 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19027 enum machine_mode mode;
19028 tree type;
19030 if (d->name == 0)
19031 continue;
19032 mode = insn_data[d->icode].operand[1].mode;
19034 switch (mode)
19036 case V16QImode:
19037 type = v16qi_ftype_v16qi;
19038 break;
19039 case V8HImode:
19040 type = v8hi_ftype_v8hi;
19041 break;
19042 case V4SImode:
19043 type = v4si_ftype_v4si;
19044 break;
19045 case V2DFmode:
19046 type = v2df_ftype_v2df;
19047 break;
19048 case V4SFmode:
19049 type = v4sf_ftype_v4sf;
19050 break;
19051 case V8QImode:
19052 type = v8qi_ftype_v8qi;
19053 break;
19054 case V4HImode:
19055 type = v4hi_ftype_v4hi;
19056 break;
19057 case V2SImode:
19058 type = v2si_ftype_v2si;
19059 break;
19061 default:
19062 abort ();
19065 def_builtin_const (d->mask, d->name, type, d->code);
19068 /* pcmpestr[im] insns. */
19069 for (i = 0, d = bdesc_pcmpestr;
19070 i < ARRAY_SIZE (bdesc_pcmpestr);
19071 i++, d++)
19073 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19074 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19075 else
19076 ftype = int_ftype_v16qi_int_v16qi_int_int;
19077 def_builtin_const (d->mask, d->name, ftype, d->code);
19080 /* pcmpistr[im] insns. */
19081 for (i = 0, d = bdesc_pcmpistr;
19082 i < ARRAY_SIZE (bdesc_pcmpistr);
19083 i++, d++)
19085 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19086 ftype = v16qi_ftype_v16qi_v16qi_int;
19087 else
19088 ftype = int_ftype_v16qi_v16qi_int;
19089 def_builtin_const (d->mask, d->name, ftype, d->code);
19092 /* Add the remaining MMX insns with somewhat more complicated types. */
19093 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19094 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
19095 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
19096 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
19098 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
19099 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
19100 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
19102 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
19103 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
19105 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19106 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19108 /* comi/ucomi insns. */
19109 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19110 if (d->mask == OPTION_MASK_ISA_SSE2)
19111 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19112 else
19113 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19115 /* ptest insns. */
19116 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19117 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19119 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19120 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19121 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19123 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19124 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19125 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19126 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19127 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19128 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19129 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19130 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19131 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19132 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19133 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19135 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19137 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19138 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19140 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19141 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19142 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19143 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19145 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19146 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19147 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19148 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19150 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19152 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19154 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19155 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19156 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19157 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19158 ftype = build_function_type_list (float_type_node,
19159 float_type_node,
19160 NULL_TREE);
19161 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19162 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19163 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19165 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
19167 /* Original 3DNow! */
19168 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19169 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19170 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19171 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19172 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19173 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19174 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19175 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19176 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19177 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19178 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19179 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19180 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19181 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19182 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19183 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19184 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19185 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19186 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19187 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19189 /* 3DNow! extension as used in the Athlon CPU. */
19190 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19191 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19192 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19193 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19194 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19195 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19197 /* SSE2 */
19198 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19200 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19201 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19203 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19204 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19206 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19207 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19208 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19209 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19210 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19212 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19213 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19214 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19215 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19217 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19218 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19220 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
19222 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19223 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19225 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19226 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19227 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19228 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19229 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19231 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19233 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19234 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19235 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19236 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19238 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19239 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19240 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19242 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19243 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19244 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19245 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19247 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19248 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19249 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19251 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19252 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19254 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19255 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19257 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19258 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19259 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19260 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19261 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19262 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19263 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19265 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19266 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19267 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19268 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19269 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19270 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19271 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19273 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19274 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19275 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19276 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19278 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19280 /* Prescott New Instructions. */
19281 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19282 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19283 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19285 /* SSSE3. */
19286 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19287 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19289 /* SSE4.1. */
19290 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19291 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19292 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19293 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19294 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19295 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19296 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19297 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19298 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19299 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19300 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19301 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19302 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19303 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19305 /* SSE4.1 and SSE5 */
19306 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
19307 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
19308 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
19309 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
19311 /* SSE4.2. */
19312 ftype = build_function_type_list (unsigned_type_node,
19313 unsigned_type_node,
19314 unsigned_char_type_node,
19315 NULL_TREE);
19316 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19317 ftype = build_function_type_list (unsigned_type_node,
19318 unsigned_type_node,
19319 short_unsigned_type_node,
19320 NULL_TREE);
19321 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19322 ftype = build_function_type_list (unsigned_type_node,
19323 unsigned_type_node,
19324 unsigned_type_node,
19325 NULL_TREE);
19326 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19327 ftype = build_function_type_list (long_long_unsigned_type_node,
19328 long_long_unsigned_type_node,
19329 long_long_unsigned_type_node,
19330 NULL_TREE);
19331 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19333 /* AMDFAM10 SSE4A New built-ins */
19334 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19335 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19336 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19337 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19338 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19339 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19341 /* Access to the vec_init patterns. */
19342 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19343 integer_type_node, NULL_TREE);
19344 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19346 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19347 short_integer_type_node,
19348 short_integer_type_node,
19349 short_integer_type_node, NULL_TREE);
19350 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19352 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19353 char_type_node, char_type_node,
19354 char_type_node, char_type_node,
19355 char_type_node, char_type_node,
19356 char_type_node, NULL_TREE);
19357 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19359 /* Access to the vec_extract patterns. */
19360 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19361 integer_type_node, NULL_TREE);
19362 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19364 ftype = build_function_type_list (long_long_integer_type_node,
19365 V2DI_type_node, integer_type_node,
19366 NULL_TREE);
19367 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19369 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19370 integer_type_node, NULL_TREE);
19371 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19373 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19374 integer_type_node, NULL_TREE);
19375 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19377 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19378 integer_type_node, NULL_TREE);
19379 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19381 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19382 integer_type_node, NULL_TREE);
19383 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19385 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19386 integer_type_node, NULL_TREE);
19387 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19389 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19390 integer_type_node, NULL_TREE);
19391 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19393 /* Access to the vec_set patterns. */
19394 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19395 intDI_type_node,
19396 integer_type_node, NULL_TREE);
19397 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19399 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19400 float_type_node,
19401 integer_type_node, NULL_TREE);
19402 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19404 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19405 intSI_type_node,
19406 integer_type_node, NULL_TREE);
19407 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19409 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19410 intHI_type_node,
19411 integer_type_node, NULL_TREE);
19412 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19414 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19415 intHI_type_node,
19416 integer_type_node, NULL_TREE);
19417 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19419 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19420 intQI_type_node,
19421 integer_type_node, NULL_TREE);
19422 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19424 /* Add SSE5 multi-arg argument instructions */
19425 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19427 tree mtype = NULL_TREE;
19429 if (d->name == 0)
19430 continue;
19432 switch ((enum multi_arg_type)d->flag)
19434 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19435 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19436 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19437 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19438 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19439 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19440 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19441 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19442 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19443 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19444 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19445 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19446 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19447 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19448 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19449 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19450 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19451 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19452 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19453 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19454 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19455 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19456 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19457 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19458 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19459 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19460 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19461 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19462 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19463 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19464 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19465 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19466 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19467 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19468 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19469 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19470 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19471 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19472 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19473 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19474 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19475 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19476 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19477 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19478 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19479 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19480 case MULTI_ARG_UNKNOWN:
19481 default:
19482 gcc_unreachable ();
19485 if (mtype)
19486 def_builtin_const (d->mask, d->name, mtype, d->code);
19490 static void
19491 ix86_init_builtins (void)
19493 if (TARGET_MMX)
19494 ix86_init_mmx_sse_builtins ();
19497 /* Errors in the source file can cause expand_expr to return const0_rtx
19498 where we expect a vector. To avoid crashing, use one of the vector
19499 clear instructions. */
19500 static rtx
19501 safe_vector_operand (rtx x, enum machine_mode mode)
19503 if (x == const0_rtx)
19504 x = CONST0_RTX (mode);
19505 return x;
19508 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19509 4 operands. The third argument must be a constant smaller than 8
19510 bits or xmm0. */
19512 static rtx
19513 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
19514 rtx target)
19516 rtx pat;
19517 tree arg0 = CALL_EXPR_ARG (exp, 0);
19518 tree arg1 = CALL_EXPR_ARG (exp, 1);
19519 tree arg2 = CALL_EXPR_ARG (exp, 2);
19520 rtx op0 = expand_normal (arg0);
19521 rtx op1 = expand_normal (arg1);
19522 rtx op2 = expand_normal (arg2);
19523 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19524 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19525 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
19526 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
19528 if (VECTOR_MODE_P (mode1))
19529 op0 = safe_vector_operand (op0, mode1);
19530 if (VECTOR_MODE_P (mode2))
19531 op1 = safe_vector_operand (op1, mode2);
19532 if (VECTOR_MODE_P (mode3))
19533 op2 = safe_vector_operand (op2, mode3);
19535 if (optimize
19536 || target == 0
19537 || GET_MODE (target) != tmode
19538 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19539 target = gen_reg_rtx (tmode);
19541 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19542 op0 = copy_to_mode_reg (mode1, op0);
19543 if ((optimize && !register_operand (op1, mode2))
19544 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
19545 op1 = copy_to_mode_reg (mode2, op1);
19547 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19548 switch (icode)
19550 case CODE_FOR_sse4_1_blendvpd:
19551 case CODE_FOR_sse4_1_blendvps:
19552 case CODE_FOR_sse4_1_pblendvb:
19553 op2 = copy_to_mode_reg (mode3, op2);
19554 break;
19556 case CODE_FOR_sse4_1_roundsd:
19557 case CODE_FOR_sse4_1_roundss:
19558 error ("the third argument must be a 4-bit immediate");
19559 return const0_rtx;
19561 default:
19562 error ("the third argument must be an 8-bit immediate");
19563 return const0_rtx;
19566 pat = GEN_FCN (icode) (target, op0, op1, op2);
19567 if (! pat)
19568 return 0;
19569 emit_insn (pat);
19570 return target;
19573 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19575 static rtx
19576 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19578 rtx pat;
19579 tree arg0 = CALL_EXPR_ARG (exp, 0);
19580 tree arg1 = CALL_EXPR_ARG (exp, 1);
19581 rtx op0 = expand_normal (arg0);
19582 rtx op1 = expand_normal (arg1);
19583 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19584 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19585 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19587 if (optimize
19588 || !target
19589 || GET_MODE (target) != tmode
19590 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19591 target = gen_reg_rtx (tmode);
19593 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19594 op0 = copy_to_mode_reg (mode0, op0);
19595 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19597 op1 = copy_to_reg (op1);
19598 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19601 pat = GEN_FCN (icode) (target, op0, op1);
19602 if (! pat)
19603 return 0;
19604 emit_insn (pat);
19605 return target;
19608 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19610 static rtx
19611 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
19613 rtx pat, xops[3];
19614 tree arg0 = CALL_EXPR_ARG (exp, 0);
19615 tree arg1 = CALL_EXPR_ARG (exp, 1);
19616 rtx op0 = expand_normal (arg0);
19617 rtx op1 = expand_normal (arg1);
19618 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19619 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19620 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19622 if (VECTOR_MODE_P (mode0))
19623 op0 = safe_vector_operand (op0, mode0);
19624 if (VECTOR_MODE_P (mode1))
19625 op1 = safe_vector_operand (op1, mode1);
19627 if (optimize || !target
19628 || GET_MODE (target) != tmode
19629 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19630 target = gen_reg_rtx (tmode);
19632 if (GET_MODE (op1) == SImode && mode1 == TImode)
19634 rtx x = gen_reg_rtx (V4SImode);
19635 emit_insn (gen_sse2_loadd (x, op1));
19636 op1 = gen_lowpart (TImode, x);
19639 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19640 op0 = copy_to_mode_reg (mode0, op0);
19641 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19642 op1 = copy_to_mode_reg (mode1, op1);
19644 /* ??? Using ix86_fixup_binary_operands is problematic when
19645 we've got mismatched modes. Fake it. */
19647 xops[0] = target;
19648 xops[1] = op0;
19649 xops[2] = op1;
19651 if (tmode == mode0 && tmode == mode1)
19653 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
19654 op0 = xops[1];
19655 op1 = xops[2];
19657 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
19659 op0 = force_reg (mode0, op0);
19660 op1 = force_reg (mode1, op1);
19661 target = gen_reg_rtx (tmode);
19664 pat = GEN_FCN (icode) (target, op0, op1);
19665 if (! pat)
19666 return 0;
19667 emit_insn (pat);
19668 return target;
19671 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
19673 static rtx
19674 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
19675 enum multi_arg_type m_type,
19676 enum insn_code sub_code)
19678 rtx pat;
19679 int i;
19680 int nargs;
19681 bool comparison_p = false;
19682 bool tf_p = false;
19683 bool last_arg_constant = false;
19684 int num_memory = 0;
19685 struct {
19686 rtx op;
19687 enum machine_mode mode;
19688 } args[4];
19690 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19692 switch (m_type)
19694 case MULTI_ARG_3_SF:
19695 case MULTI_ARG_3_DF:
19696 case MULTI_ARG_3_DI:
19697 case MULTI_ARG_3_SI:
19698 case MULTI_ARG_3_SI_DI:
19699 case MULTI_ARG_3_HI:
19700 case MULTI_ARG_3_HI_SI:
19701 case MULTI_ARG_3_QI:
19702 case MULTI_ARG_3_PERMPS:
19703 case MULTI_ARG_3_PERMPD:
19704 nargs = 3;
19705 break;
19707 case MULTI_ARG_2_SF:
19708 case MULTI_ARG_2_DF:
19709 case MULTI_ARG_2_DI:
19710 case MULTI_ARG_2_SI:
19711 case MULTI_ARG_2_HI:
19712 case MULTI_ARG_2_QI:
19713 nargs = 2;
19714 break;
19716 case MULTI_ARG_2_DI_IMM:
19717 case MULTI_ARG_2_SI_IMM:
19718 case MULTI_ARG_2_HI_IMM:
19719 case MULTI_ARG_2_QI_IMM:
19720 nargs = 2;
19721 last_arg_constant = true;
19722 break;
19724 case MULTI_ARG_1_SF:
19725 case MULTI_ARG_1_DF:
19726 case MULTI_ARG_1_DI:
19727 case MULTI_ARG_1_SI:
19728 case MULTI_ARG_1_HI:
19729 case MULTI_ARG_1_QI:
19730 case MULTI_ARG_1_SI_DI:
19731 case MULTI_ARG_1_HI_DI:
19732 case MULTI_ARG_1_HI_SI:
19733 case MULTI_ARG_1_QI_DI:
19734 case MULTI_ARG_1_QI_SI:
19735 case MULTI_ARG_1_QI_HI:
19736 case MULTI_ARG_1_PH2PS:
19737 case MULTI_ARG_1_PS2PH:
19738 nargs = 1;
19739 break;
19741 case MULTI_ARG_2_SF_CMP:
19742 case MULTI_ARG_2_DF_CMP:
19743 case MULTI_ARG_2_DI_CMP:
19744 case MULTI_ARG_2_SI_CMP:
19745 case MULTI_ARG_2_HI_CMP:
19746 case MULTI_ARG_2_QI_CMP:
19747 nargs = 2;
19748 comparison_p = true;
19749 break;
19751 case MULTI_ARG_2_SF_TF:
19752 case MULTI_ARG_2_DF_TF:
19753 case MULTI_ARG_2_DI_TF:
19754 case MULTI_ARG_2_SI_TF:
19755 case MULTI_ARG_2_HI_TF:
19756 case MULTI_ARG_2_QI_TF:
19757 nargs = 2;
19758 tf_p = true;
19759 break;
19761 case MULTI_ARG_UNKNOWN:
19762 default:
19763 gcc_unreachable ();
19766 if (optimize || !target
19767 || GET_MODE (target) != tmode
19768 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19769 target = gen_reg_rtx (tmode);
19771 gcc_assert (nargs <= 4);
19773 for (i = 0; i < nargs; i++)
19775 tree arg = CALL_EXPR_ARG (exp, i);
19776 rtx op = expand_normal (arg);
19777 int adjust = (comparison_p) ? 1 : 0;
19778 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
19780 if (last_arg_constant && i == nargs-1)
19782 if (GET_CODE (op) != CONST_INT)
19784 error ("last argument must be an immediate");
19785 return gen_reg_rtx (tmode);
19788 else
19790 if (VECTOR_MODE_P (mode))
19791 op = safe_vector_operand (op, mode);
19793 /* If we aren't optimizing, only allow one memory operand to be
19794 generated. */
19795 if (memory_operand (op, mode))
19796 num_memory++;
19798 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
19800 if (optimize
19801 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
19802 || num_memory > 1)
19803 op = force_reg (mode, op);
19806 args[i].op = op;
19807 args[i].mode = mode;
19810 switch (nargs)
19812 case 1:
19813 pat = GEN_FCN (icode) (target, args[0].op);
19814 break;
19816 case 2:
19817 if (tf_p)
19818 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
19819 GEN_INT ((int)sub_code));
19820 else if (! comparison_p)
19821 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
19822 else
19824 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
19825 args[0].op,
19826 args[1].op);
19828 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
19830 break;
19832 case 3:
19833 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
19834 break;
19836 default:
19837 gcc_unreachable ();
19840 if (! pat)
19841 return 0;
19843 emit_insn (pat);
19844 return target;
19847 /* Subroutine of ix86_expand_builtin to take care of stores. */
19849 static rtx
19850 ix86_expand_store_builtin (enum insn_code icode, tree exp)
19852 rtx pat;
19853 tree arg0 = CALL_EXPR_ARG (exp, 0);
19854 tree arg1 = CALL_EXPR_ARG (exp, 1);
19855 rtx op0 = expand_normal (arg0);
19856 rtx op1 = expand_normal (arg1);
19857 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
19858 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19860 if (VECTOR_MODE_P (mode1))
19861 op1 = safe_vector_operand (op1, mode1);
19863 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19864 op1 = copy_to_mode_reg (mode1, op1);
19866 pat = GEN_FCN (icode) (op0, op1);
19867 if (pat)
19868 emit_insn (pat);
19869 return 0;
19872 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
19874 static rtx
19875 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
19876 rtx target, int do_load)
19878 rtx pat;
19879 tree arg0 = CALL_EXPR_ARG (exp, 0);
19880 rtx op0 = expand_normal (arg0);
19881 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19882 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19884 if (optimize || !target
19885 || GET_MODE (target) != tmode
19886 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19887 target = gen_reg_rtx (tmode);
19888 if (do_load)
19889 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19890 else
19892 if (VECTOR_MODE_P (mode0))
19893 op0 = safe_vector_operand (op0, mode0);
19895 if ((optimize && !register_operand (op0, mode0))
19896 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19897 op0 = copy_to_mode_reg (mode0, op0);
19900 switch (icode)
19902 case CODE_FOR_sse4_1_roundpd:
19903 case CODE_FOR_sse4_1_roundps:
19905 tree arg1 = CALL_EXPR_ARG (exp, 1);
19906 rtx op1 = expand_normal (arg1);
19907 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19909 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19911 error ("the second argument must be a 4-bit immediate");
19912 return const0_rtx;
19914 pat = GEN_FCN (icode) (target, op0, op1);
19916 break;
19917 default:
19918 pat = GEN_FCN (icode) (target, op0);
19919 break;
19922 if (! pat)
19923 return 0;
19924 emit_insn (pat);
19925 return target;
19928 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
19929 sqrtss, rsqrtss, rcpss. */
19931 static rtx
19932 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
19934 rtx pat;
19935 tree arg0 = CALL_EXPR_ARG (exp, 0);
19936 rtx op1, op0 = expand_normal (arg0);
19937 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19938 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19940 if (optimize || !target
19941 || GET_MODE (target) != tmode
19942 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19943 target = gen_reg_rtx (tmode);
19945 if (VECTOR_MODE_P (mode0))
19946 op0 = safe_vector_operand (op0, mode0);
19948 if ((optimize && !register_operand (op0, mode0))
19949 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19950 op0 = copy_to_mode_reg (mode0, op0);
19952 op1 = op0;
19953 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
19954 op1 = copy_to_mode_reg (mode0, op1);
19956 pat = GEN_FCN (icode) (target, op0, op1);
19957 if (! pat)
19958 return 0;
19959 emit_insn (pat);
19960 return target;
19963 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
19965 static rtx
19966 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
19967 rtx target)
19969 rtx pat;
19970 tree arg0 = CALL_EXPR_ARG (exp, 0);
19971 tree arg1 = CALL_EXPR_ARG (exp, 1);
19972 rtx op0 = expand_normal (arg0);
19973 rtx op1 = expand_normal (arg1);
19974 rtx op2;
19975 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
19976 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
19977 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
19978 enum rtx_code comparison = d->comparison;
19980 if (VECTOR_MODE_P (mode0))
19981 op0 = safe_vector_operand (op0, mode0);
19982 if (VECTOR_MODE_P (mode1))
19983 op1 = safe_vector_operand (op1, mode1);
19985 /* Swap operands if we have a comparison that isn't available in
19986 hardware. */
19987 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
19989 rtx tmp = gen_reg_rtx (mode1);
19990 emit_move_insn (tmp, op1);
19991 op1 = op0;
19992 op0 = tmp;
19995 if (optimize || !target
19996 || GET_MODE (target) != tmode
19997 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
19998 target = gen_reg_rtx (tmode);
20000 if ((optimize && !register_operand (op0, mode0))
20001 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20002 op0 = copy_to_mode_reg (mode0, op0);
20003 if ((optimize && !register_operand (op1, mode1))
20004 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20005 op1 = copy_to_mode_reg (mode1, op1);
20007 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20008 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20009 if (! pat)
20010 return 0;
20011 emit_insn (pat);
20012 return target;
20015 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20017 static rtx
20018 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20019 rtx target)
20021 rtx pat;
20022 tree arg0 = CALL_EXPR_ARG (exp, 0);
20023 tree arg1 = CALL_EXPR_ARG (exp, 1);
20024 rtx op0 = expand_normal (arg0);
20025 rtx op1 = expand_normal (arg1);
20026 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20027 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20028 enum rtx_code comparison = d->comparison;
20030 if (VECTOR_MODE_P (mode0))
20031 op0 = safe_vector_operand (op0, mode0);
20032 if (VECTOR_MODE_P (mode1))
20033 op1 = safe_vector_operand (op1, mode1);
20035 /* Swap operands if we have a comparison that isn't available in
20036 hardware. */
20037 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20039 rtx tmp = op1;
20040 op1 = op0;
20041 op0 = tmp;
20044 target = gen_reg_rtx (SImode);
20045 emit_move_insn (target, const0_rtx);
20046 target = gen_rtx_SUBREG (QImode, target, 0);
20048 if ((optimize && !register_operand (op0, mode0))
20049 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20050 op0 = copy_to_mode_reg (mode0, op0);
20051 if ((optimize && !register_operand (op1, mode1))
20052 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20053 op1 = copy_to_mode_reg (mode1, op1);
20055 pat = GEN_FCN (d->icode) (op0, op1);
20056 if (! pat)
20057 return 0;
20058 emit_insn (pat);
20059 emit_insn (gen_rtx_SET (VOIDmode,
20060 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20061 gen_rtx_fmt_ee (comparison, QImode,
20062 SET_DEST (pat),
20063 const0_rtx)));
20065 return SUBREG_REG (target);
20068 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20070 static rtx
20071 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20072 rtx target)
20074 rtx pat;
20075 tree arg0 = CALL_EXPR_ARG (exp, 0);
20076 tree arg1 = CALL_EXPR_ARG (exp, 1);
20077 rtx op0 = expand_normal (arg0);
20078 rtx op1 = expand_normal (arg1);
20079 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20080 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20081 enum rtx_code comparison = d->comparison;
20083 if (VECTOR_MODE_P (mode0))
20084 op0 = safe_vector_operand (op0, mode0);
20085 if (VECTOR_MODE_P (mode1))
20086 op1 = safe_vector_operand (op1, mode1);
20088 target = gen_reg_rtx (SImode);
20089 emit_move_insn (target, const0_rtx);
20090 target = gen_rtx_SUBREG (QImode, target, 0);
20092 if ((optimize && !register_operand (op0, mode0))
20093 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20094 op0 = copy_to_mode_reg (mode0, op0);
20095 if ((optimize && !register_operand (op1, mode1))
20096 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20097 op1 = copy_to_mode_reg (mode1, op1);
20099 pat = GEN_FCN (d->icode) (op0, op1);
20100 if (! pat)
20101 return 0;
20102 emit_insn (pat);
20103 emit_insn (gen_rtx_SET (VOIDmode,
20104 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20105 gen_rtx_fmt_ee (comparison, QImode,
20106 SET_DEST (pat),
20107 const0_rtx)));
20109 return SUBREG_REG (target);
20112 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20114 static rtx
20115 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20116 tree exp, rtx target)
20118 rtx pat;
20119 tree arg0 = CALL_EXPR_ARG (exp, 0);
20120 tree arg1 = CALL_EXPR_ARG (exp, 1);
20121 tree arg2 = CALL_EXPR_ARG (exp, 2);
20122 tree arg3 = CALL_EXPR_ARG (exp, 3);
20123 tree arg4 = CALL_EXPR_ARG (exp, 4);
20124 rtx scratch0, scratch1;
20125 rtx op0 = expand_normal (arg0);
20126 rtx op1 = expand_normal (arg1);
20127 rtx op2 = expand_normal (arg2);
20128 rtx op3 = expand_normal (arg3);
20129 rtx op4 = expand_normal (arg4);
20130 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20132 tmode0 = insn_data[d->icode].operand[0].mode;
20133 tmode1 = insn_data[d->icode].operand[1].mode;
20134 modev2 = insn_data[d->icode].operand[2].mode;
20135 modei3 = insn_data[d->icode].operand[3].mode;
20136 modev4 = insn_data[d->icode].operand[4].mode;
20137 modei5 = insn_data[d->icode].operand[5].mode;
20138 modeimm = insn_data[d->icode].operand[6].mode;
20140 if (VECTOR_MODE_P (modev2))
20141 op0 = safe_vector_operand (op0, modev2);
20142 if (VECTOR_MODE_P (modev4))
20143 op2 = safe_vector_operand (op2, modev4);
20145 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20146 op0 = copy_to_mode_reg (modev2, op0);
20147 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20148 op1 = copy_to_mode_reg (modei3, op1);
20149 if ((optimize && !register_operand (op2, modev4))
20150 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20151 op2 = copy_to_mode_reg (modev4, op2);
20152 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20153 op3 = copy_to_mode_reg (modei5, op3);
20155 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20157 error ("the fifth argument must be a 8-bit immediate");
20158 return const0_rtx;
20161 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20163 if (optimize || !target
20164 || GET_MODE (target) != tmode0
20165 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20166 target = gen_reg_rtx (tmode0);
20168 scratch1 = gen_reg_rtx (tmode1);
20170 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20172 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20174 if (optimize || !target
20175 || GET_MODE (target) != tmode1
20176 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20177 target = gen_reg_rtx (tmode1);
20179 scratch0 = gen_reg_rtx (tmode0);
20181 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20183 else
20185 gcc_assert (d->flag);
20187 scratch0 = gen_reg_rtx (tmode0);
20188 scratch1 = gen_reg_rtx (tmode1);
20190 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20193 if (! pat)
20194 return 0;
20196 emit_insn (pat);
20198 if (d->flag)
20200 target = gen_reg_rtx (SImode);
20201 emit_move_insn (target, const0_rtx);
20202 target = gen_rtx_SUBREG (QImode, target, 0);
20204 emit_insn
20205 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20206 gen_rtx_fmt_ee (EQ, QImode,
20207 gen_rtx_REG ((enum machine_mode) d->flag,
20208 FLAGS_REG),
20209 const0_rtx)));
20210 return SUBREG_REG (target);
20212 else
20213 return target;
20217 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20219 static rtx
20220 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20221 tree exp, rtx target)
20223 rtx pat;
20224 tree arg0 = CALL_EXPR_ARG (exp, 0);
20225 tree arg1 = CALL_EXPR_ARG (exp, 1);
20226 tree arg2 = CALL_EXPR_ARG (exp, 2);
20227 rtx scratch0, scratch1;
20228 rtx op0 = expand_normal (arg0);
20229 rtx op1 = expand_normal (arg1);
20230 rtx op2 = expand_normal (arg2);
20231 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20233 tmode0 = insn_data[d->icode].operand[0].mode;
20234 tmode1 = insn_data[d->icode].operand[1].mode;
20235 modev2 = insn_data[d->icode].operand[2].mode;
20236 modev3 = insn_data[d->icode].operand[3].mode;
20237 modeimm = insn_data[d->icode].operand[4].mode;
20239 if (VECTOR_MODE_P (modev2))
20240 op0 = safe_vector_operand (op0, modev2);
20241 if (VECTOR_MODE_P (modev3))
20242 op1 = safe_vector_operand (op1, modev3);
20244 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20245 op0 = copy_to_mode_reg (modev2, op0);
20246 if ((optimize && !register_operand (op1, modev3))
20247 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20248 op1 = copy_to_mode_reg (modev3, op1);
20250 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20252 error ("the third argument must be a 8-bit immediate");
20253 return const0_rtx;
20256 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20258 if (optimize || !target
20259 || GET_MODE (target) != tmode0
20260 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20261 target = gen_reg_rtx (tmode0);
20263 scratch1 = gen_reg_rtx (tmode1);
20265 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20267 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20269 if (optimize || !target
20270 || GET_MODE (target) != tmode1
20271 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20272 target = gen_reg_rtx (tmode1);
20274 scratch0 = gen_reg_rtx (tmode0);
20276 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20278 else
20280 gcc_assert (d->flag);
20282 scratch0 = gen_reg_rtx (tmode0);
20283 scratch1 = gen_reg_rtx (tmode1);
20285 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20288 if (! pat)
20289 return 0;
20291 emit_insn (pat);
20293 if (d->flag)
20295 target = gen_reg_rtx (SImode);
20296 emit_move_insn (target, const0_rtx);
20297 target = gen_rtx_SUBREG (QImode, target, 0);
20299 emit_insn
20300 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20301 gen_rtx_fmt_ee (EQ, QImode,
20302 gen_rtx_REG ((enum machine_mode) d->flag,
20303 FLAGS_REG),
20304 const0_rtx)));
20305 return SUBREG_REG (target);
20307 else
20308 return target;
20311 /* Return the integer constant in ARG. Constrain it to be in the range
20312 of the subparts of VEC_TYPE; issue an error if not. */
20314 static int
20315 get_element_number (tree vec_type, tree arg)
20317 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20319 if (!host_integerp (arg, 1)
20320 || (elt = tree_low_cst (arg, 1), elt > max))
20322 error ("selector must be an integer constant in the range 0..%wi", max);
20323 return 0;
20326 return elt;
20329 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20330 ix86_expand_vector_init. We DO have language-level syntax for this, in
20331 the form of (type){ init-list }. Except that since we can't place emms
20332 instructions from inside the compiler, we can't allow the use of MMX
20333 registers unless the user explicitly asks for it. So we do *not* define
20334 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20335 we have builtins invoked by mmintrin.h that gives us license to emit
20336 these sorts of instructions. */
20338 static rtx
20339 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20341 enum machine_mode tmode = TYPE_MODE (type);
20342 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20343 int i, n_elt = GET_MODE_NUNITS (tmode);
20344 rtvec v = rtvec_alloc (n_elt);
20346 gcc_assert (VECTOR_MODE_P (tmode));
20347 gcc_assert (call_expr_nargs (exp) == n_elt);
20349 for (i = 0; i < n_elt; ++i)
20351 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20352 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20355 if (!target || !register_operand (target, tmode))
20356 target = gen_reg_rtx (tmode);
20358 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20359 return target;
20362 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20363 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20364 had a language-level syntax for referencing vector elements. */
20366 static rtx
20367 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20369 enum machine_mode tmode, mode0;
20370 tree arg0, arg1;
20371 int elt;
20372 rtx op0;
20374 arg0 = CALL_EXPR_ARG (exp, 0);
20375 arg1 = CALL_EXPR_ARG (exp, 1);
20377 op0 = expand_normal (arg0);
20378 elt = get_element_number (TREE_TYPE (arg0), arg1);
20380 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20381 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20382 gcc_assert (VECTOR_MODE_P (mode0));
20384 op0 = force_reg (mode0, op0);
20386 if (optimize || !target || !register_operand (target, tmode))
20387 target = gen_reg_rtx (tmode);
20389 ix86_expand_vector_extract (true, target, op0, elt);
20391 return target;
20394 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20395 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20396 a language-level syntax for referencing vector elements. */
20398 static rtx
20399 ix86_expand_vec_set_builtin (tree exp)
20401 enum machine_mode tmode, mode1;
20402 tree arg0, arg1, arg2;
20403 int elt;
20404 rtx op0, op1, target;
20406 arg0 = CALL_EXPR_ARG (exp, 0);
20407 arg1 = CALL_EXPR_ARG (exp, 1);
20408 arg2 = CALL_EXPR_ARG (exp, 2);
20410 tmode = TYPE_MODE (TREE_TYPE (arg0));
20411 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20412 gcc_assert (VECTOR_MODE_P (tmode));
20414 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20415 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20416 elt = get_element_number (TREE_TYPE (arg0), arg2);
20418 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20419 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20421 op0 = force_reg (tmode, op0);
20422 op1 = force_reg (mode1, op1);
20424 /* OP0 is the source of these builtin functions and shouldn't be
20425 modified. Create a copy, use it and return it as target. */
20426 target = gen_reg_rtx (tmode);
20427 emit_move_insn (target, op0);
20428 ix86_expand_vector_set (true, target, op1, elt);
20430 return target;
20433 /* Expand an expression EXP that calls a built-in function,
20434 with result going to TARGET if that's convenient
20435 (and in mode MODE if that's convenient).
20436 SUBTARGET may be used as the target for computing one of EXP's operands.
20437 IGNORE is nonzero if the value is to be ignored. */
20439 static rtx
20440 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20441 enum machine_mode mode ATTRIBUTE_UNUSED,
20442 int ignore ATTRIBUTE_UNUSED)
20444 const struct builtin_description *d;
20445 size_t i;
20446 enum insn_code icode;
20447 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20448 tree arg0, arg1, arg2, arg3;
20449 rtx op0, op1, op2, op3, pat;
20450 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20451 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20453 switch (fcode)
20455 case IX86_BUILTIN_EMMS:
20456 emit_insn (gen_mmx_emms ());
20457 return 0;
20459 case IX86_BUILTIN_SFENCE:
20460 emit_insn (gen_sse_sfence ());
20461 return 0;
20463 case IX86_BUILTIN_MASKMOVQ:
20464 case IX86_BUILTIN_MASKMOVDQU:
20465 icode = (fcode == IX86_BUILTIN_MASKMOVQ
20466 ? CODE_FOR_mmx_maskmovq
20467 : CODE_FOR_sse2_maskmovdqu);
20468 /* Note the arg order is different from the operand order. */
20469 arg1 = CALL_EXPR_ARG (exp, 0);
20470 arg2 = CALL_EXPR_ARG (exp, 1);
20471 arg0 = CALL_EXPR_ARG (exp, 2);
20472 op0 = expand_normal (arg0);
20473 op1 = expand_normal (arg1);
20474 op2 = expand_normal (arg2);
20475 mode0 = insn_data[icode].operand[0].mode;
20476 mode1 = insn_data[icode].operand[1].mode;
20477 mode2 = insn_data[icode].operand[2].mode;
20479 op0 = force_reg (Pmode, op0);
20480 op0 = gen_rtx_MEM (mode1, op0);
20482 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20483 op0 = copy_to_mode_reg (mode0, op0);
20484 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20485 op1 = copy_to_mode_reg (mode1, op1);
20486 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20487 op2 = copy_to_mode_reg (mode2, op2);
20488 pat = GEN_FCN (icode) (op0, op1, op2);
20489 if (! pat)
20490 return 0;
20491 emit_insn (pat);
20492 return 0;
20494 case IX86_BUILTIN_RSQRTF:
20495 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20497 case IX86_BUILTIN_SQRTSS:
20498 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20499 case IX86_BUILTIN_RSQRTSS:
20500 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20501 case IX86_BUILTIN_RCPSS:
20502 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20504 case IX86_BUILTIN_LOADUPS:
20505 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20507 case IX86_BUILTIN_STOREUPS:
20508 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20510 case IX86_BUILTIN_LOADHPS:
20511 case IX86_BUILTIN_LOADLPS:
20512 case IX86_BUILTIN_LOADHPD:
20513 case IX86_BUILTIN_LOADLPD:
20514 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20515 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20516 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20517 : CODE_FOR_sse2_loadlpd);
20518 arg0 = CALL_EXPR_ARG (exp, 0);
20519 arg1 = CALL_EXPR_ARG (exp, 1);
20520 op0 = expand_normal (arg0);
20521 op1 = expand_normal (arg1);
20522 tmode = insn_data[icode].operand[0].mode;
20523 mode0 = insn_data[icode].operand[1].mode;
20524 mode1 = insn_data[icode].operand[2].mode;
20526 op0 = force_reg (mode0, op0);
20527 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20528 if (optimize || target == 0
20529 || GET_MODE (target) != tmode
20530 || !register_operand (target, tmode))
20531 target = gen_reg_rtx (tmode);
20532 pat = GEN_FCN (icode) (target, op0, op1);
20533 if (! pat)
20534 return 0;
20535 emit_insn (pat);
20536 return target;
20538 case IX86_BUILTIN_STOREHPS:
20539 case IX86_BUILTIN_STORELPS:
20540 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20541 : CODE_FOR_sse_storelps);
20542 arg0 = CALL_EXPR_ARG (exp, 0);
20543 arg1 = CALL_EXPR_ARG (exp, 1);
20544 op0 = expand_normal (arg0);
20545 op1 = expand_normal (arg1);
20546 mode0 = insn_data[icode].operand[0].mode;
20547 mode1 = insn_data[icode].operand[1].mode;
20549 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20550 op1 = force_reg (mode1, op1);
20552 pat = GEN_FCN (icode) (op0, op1);
20553 if (! pat)
20554 return 0;
20555 emit_insn (pat);
20556 return const0_rtx;
20558 case IX86_BUILTIN_MOVNTPS:
20559 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20560 case IX86_BUILTIN_MOVNTQ:
20561 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20563 case IX86_BUILTIN_LDMXCSR:
20564 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20565 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20566 emit_move_insn (target, op0);
20567 emit_insn (gen_sse_ldmxcsr (target));
20568 return 0;
20570 case IX86_BUILTIN_STMXCSR:
20571 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20572 emit_insn (gen_sse_stmxcsr (target));
20573 return copy_to_mode_reg (SImode, target);
20575 case IX86_BUILTIN_SHUFPS:
20576 case IX86_BUILTIN_SHUFPD:
20577 icode = (fcode == IX86_BUILTIN_SHUFPS
20578 ? CODE_FOR_sse_shufps
20579 : CODE_FOR_sse2_shufpd);
20580 arg0 = CALL_EXPR_ARG (exp, 0);
20581 arg1 = CALL_EXPR_ARG (exp, 1);
20582 arg2 = CALL_EXPR_ARG (exp, 2);
20583 op0 = expand_normal (arg0);
20584 op1 = expand_normal (arg1);
20585 op2 = expand_normal (arg2);
20586 tmode = insn_data[icode].operand[0].mode;
20587 mode0 = insn_data[icode].operand[1].mode;
20588 mode1 = insn_data[icode].operand[2].mode;
20589 mode2 = insn_data[icode].operand[3].mode;
20591 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20592 op0 = copy_to_mode_reg (mode0, op0);
20593 if ((optimize && !register_operand (op1, mode1))
20594 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
20595 op1 = copy_to_mode_reg (mode1, op1);
20596 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20598 /* @@@ better error message */
20599 error ("mask must be an immediate");
20600 return gen_reg_rtx (tmode);
20602 if (optimize || target == 0
20603 || GET_MODE (target) != tmode
20604 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20605 target = gen_reg_rtx (tmode);
20606 pat = GEN_FCN (icode) (target, op0, op1, op2);
20607 if (! pat)
20608 return 0;
20609 emit_insn (pat);
20610 return target;
20612 case IX86_BUILTIN_PSHUFW:
20613 case IX86_BUILTIN_PSHUFD:
20614 case IX86_BUILTIN_PSHUFHW:
20615 case IX86_BUILTIN_PSHUFLW:
20616 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20617 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20618 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20619 : CODE_FOR_mmx_pshufw);
20620 arg0 = CALL_EXPR_ARG (exp, 0);
20621 arg1 = CALL_EXPR_ARG (exp, 1);
20622 op0 = expand_normal (arg0);
20623 op1 = expand_normal (arg1);
20624 tmode = insn_data[icode].operand[0].mode;
20625 mode1 = insn_data[icode].operand[1].mode;
20626 mode2 = insn_data[icode].operand[2].mode;
20628 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20629 op0 = copy_to_mode_reg (mode1, op0);
20630 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20632 /* @@@ better error message */
20633 error ("mask must be an immediate");
20634 return const0_rtx;
20636 if (target == 0
20637 || GET_MODE (target) != tmode
20638 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20639 target = gen_reg_rtx (tmode);
20640 pat = GEN_FCN (icode) (target, op0, op1);
20641 if (! pat)
20642 return 0;
20643 emit_insn (pat);
20644 return target;
20646 case IX86_BUILTIN_PSLLWI128:
20647 icode = CODE_FOR_ashlv8hi3;
20648 goto do_pshifti;
20649 case IX86_BUILTIN_PSLLDI128:
20650 icode = CODE_FOR_ashlv4si3;
20651 goto do_pshifti;
20652 case IX86_BUILTIN_PSLLQI128:
20653 icode = CODE_FOR_ashlv2di3;
20654 goto do_pshifti;
20655 case IX86_BUILTIN_PSRAWI128:
20656 icode = CODE_FOR_ashrv8hi3;
20657 goto do_pshifti;
20658 case IX86_BUILTIN_PSRADI128:
20659 icode = CODE_FOR_ashrv4si3;
20660 goto do_pshifti;
20661 case IX86_BUILTIN_PSRLWI128:
20662 icode = CODE_FOR_lshrv8hi3;
20663 goto do_pshifti;
20664 case IX86_BUILTIN_PSRLDI128:
20665 icode = CODE_FOR_lshrv4si3;
20666 goto do_pshifti;
20667 case IX86_BUILTIN_PSRLQI128:
20668 icode = CODE_FOR_lshrv2di3;
20669 goto do_pshifti;
20670 do_pshifti:
20671 arg0 = CALL_EXPR_ARG (exp, 0);
20672 arg1 = CALL_EXPR_ARG (exp, 1);
20673 op0 = expand_normal (arg0);
20674 op1 = expand_normal (arg1);
20676 if (!CONST_INT_P (op1))
20678 error ("shift must be an immediate");
20679 return const0_rtx;
20681 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
20682 op1 = GEN_INT (255);
20684 tmode = insn_data[icode].operand[0].mode;
20685 mode1 = insn_data[icode].operand[1].mode;
20686 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20687 op0 = copy_to_reg (op0);
20689 target = gen_reg_rtx (tmode);
20690 pat = GEN_FCN (icode) (target, op0, op1);
20691 if (!pat)
20692 return 0;
20693 emit_insn (pat);
20694 return target;
20696 case IX86_BUILTIN_PSLLW128:
20697 icode = CODE_FOR_ashlv8hi3;
20698 goto do_pshift;
20699 case IX86_BUILTIN_PSLLD128:
20700 icode = CODE_FOR_ashlv4si3;
20701 goto do_pshift;
20702 case IX86_BUILTIN_PSLLQ128:
20703 icode = CODE_FOR_ashlv2di3;
20704 goto do_pshift;
20705 case IX86_BUILTIN_PSRAW128:
20706 icode = CODE_FOR_ashrv8hi3;
20707 goto do_pshift;
20708 case IX86_BUILTIN_PSRAD128:
20709 icode = CODE_FOR_ashrv4si3;
20710 goto do_pshift;
20711 case IX86_BUILTIN_PSRLW128:
20712 icode = CODE_FOR_lshrv8hi3;
20713 goto do_pshift;
20714 case IX86_BUILTIN_PSRLD128:
20715 icode = CODE_FOR_lshrv4si3;
20716 goto do_pshift;
20717 case IX86_BUILTIN_PSRLQ128:
20718 icode = CODE_FOR_lshrv2di3;
20719 goto do_pshift;
20720 do_pshift:
20721 arg0 = CALL_EXPR_ARG (exp, 0);
20722 arg1 = CALL_EXPR_ARG (exp, 1);
20723 op0 = expand_normal (arg0);
20724 op1 = expand_normal (arg1);
20726 tmode = insn_data[icode].operand[0].mode;
20727 mode1 = insn_data[icode].operand[1].mode;
20729 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20730 op0 = copy_to_reg (op0);
20732 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
20733 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
20734 op1 = copy_to_reg (op1);
20736 target = gen_reg_rtx (tmode);
20737 pat = GEN_FCN (icode) (target, op0, op1);
20738 if (!pat)
20739 return 0;
20740 emit_insn (pat);
20741 return target;
20743 case IX86_BUILTIN_PSLLDQI128:
20744 case IX86_BUILTIN_PSRLDQI128:
20745 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
20746 : CODE_FOR_sse2_lshrti3);
20747 arg0 = CALL_EXPR_ARG (exp, 0);
20748 arg1 = CALL_EXPR_ARG (exp, 1);
20749 op0 = expand_normal (arg0);
20750 op1 = expand_normal (arg1);
20751 tmode = insn_data[icode].operand[0].mode;
20752 mode1 = insn_data[icode].operand[1].mode;
20753 mode2 = insn_data[icode].operand[2].mode;
20755 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20757 op0 = copy_to_reg (op0);
20758 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20760 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20762 error ("shift must be an immediate");
20763 return const0_rtx;
20765 target = gen_reg_rtx (V2DImode);
20766 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
20767 op0, op1);
20768 if (! pat)
20769 return 0;
20770 emit_insn (pat);
20771 return target;
20773 case IX86_BUILTIN_FEMMS:
20774 emit_insn (gen_mmx_femms ());
20775 return NULL_RTX;
20777 case IX86_BUILTIN_PAVGUSB:
20778 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
20780 case IX86_BUILTIN_PF2ID:
20781 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
20783 case IX86_BUILTIN_PFACC:
20784 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
20786 case IX86_BUILTIN_PFADD:
20787 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
20789 case IX86_BUILTIN_PFCMPEQ:
20790 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
20792 case IX86_BUILTIN_PFCMPGE:
20793 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
20795 case IX86_BUILTIN_PFCMPGT:
20796 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
20798 case IX86_BUILTIN_PFMAX:
20799 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
20801 case IX86_BUILTIN_PFMIN:
20802 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
20804 case IX86_BUILTIN_PFMUL:
20805 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
20807 case IX86_BUILTIN_PFRCP:
20808 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
20810 case IX86_BUILTIN_PFRCPIT1:
20811 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
20813 case IX86_BUILTIN_PFRCPIT2:
20814 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
20816 case IX86_BUILTIN_PFRSQIT1:
20817 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
20819 case IX86_BUILTIN_PFRSQRT:
20820 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
20822 case IX86_BUILTIN_PFSUB:
20823 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
20825 case IX86_BUILTIN_PFSUBR:
20826 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
20828 case IX86_BUILTIN_PI2FD:
20829 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
20831 case IX86_BUILTIN_PMULHRW:
20832 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
20834 case IX86_BUILTIN_PF2IW:
20835 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
20837 case IX86_BUILTIN_PFNACC:
20838 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
20840 case IX86_BUILTIN_PFPNACC:
20841 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
20843 case IX86_BUILTIN_PI2FW:
20844 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
20846 case IX86_BUILTIN_PSWAPDSI:
20847 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
20849 case IX86_BUILTIN_PSWAPDSF:
20850 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
20852 case IX86_BUILTIN_SQRTSD:
20853 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
20854 case IX86_BUILTIN_LOADUPD:
20855 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
20856 case IX86_BUILTIN_STOREUPD:
20857 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
20859 case IX86_BUILTIN_MFENCE:
20860 emit_insn (gen_sse2_mfence ());
20861 return 0;
20862 case IX86_BUILTIN_LFENCE:
20863 emit_insn (gen_sse2_lfence ());
20864 return 0;
20866 case IX86_BUILTIN_CLFLUSH:
20867 arg0 = CALL_EXPR_ARG (exp, 0);
20868 op0 = expand_normal (arg0);
20869 icode = CODE_FOR_sse2_clflush;
20870 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
20871 op0 = copy_to_mode_reg (Pmode, op0);
20873 emit_insn (gen_sse2_clflush (op0));
20874 return 0;
20876 case IX86_BUILTIN_MOVNTPD:
20877 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
20878 case IX86_BUILTIN_MOVNTDQ:
20879 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
20880 case IX86_BUILTIN_MOVNTI:
20881 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
20883 case IX86_BUILTIN_LOADDQU:
20884 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
20885 case IX86_BUILTIN_STOREDQU:
20886 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
20888 case IX86_BUILTIN_MONITOR:
20889 arg0 = CALL_EXPR_ARG (exp, 0);
20890 arg1 = CALL_EXPR_ARG (exp, 1);
20891 arg2 = CALL_EXPR_ARG (exp, 2);
20892 op0 = expand_normal (arg0);
20893 op1 = expand_normal (arg1);
20894 op2 = expand_normal (arg2);
20895 if (!REG_P (op0))
20896 op0 = copy_to_mode_reg (Pmode, op0);
20897 if (!REG_P (op1))
20898 op1 = copy_to_mode_reg (SImode, op1);
20899 if (!REG_P (op2))
20900 op2 = copy_to_mode_reg (SImode, op2);
20901 if (!TARGET_64BIT)
20902 emit_insn (gen_sse3_monitor (op0, op1, op2));
20903 else
20904 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
20905 return 0;
20907 case IX86_BUILTIN_MWAIT:
20908 arg0 = CALL_EXPR_ARG (exp, 0);
20909 arg1 = CALL_EXPR_ARG (exp, 1);
20910 op0 = expand_normal (arg0);
20911 op1 = expand_normal (arg1);
20912 if (!REG_P (op0))
20913 op0 = copy_to_mode_reg (SImode, op0);
20914 if (!REG_P (op1))
20915 op1 = copy_to_mode_reg (SImode, op1);
20916 emit_insn (gen_sse3_mwait (op0, op1));
20917 return 0;
20919 case IX86_BUILTIN_LDDQU:
20920 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
20921 target, 1);
20923 case IX86_BUILTIN_PALIGNR:
20924 case IX86_BUILTIN_PALIGNR128:
20925 if (fcode == IX86_BUILTIN_PALIGNR)
20927 icode = CODE_FOR_ssse3_palignrdi;
20928 mode = DImode;
20930 else
20932 icode = CODE_FOR_ssse3_palignrti;
20933 mode = V2DImode;
20935 arg0 = CALL_EXPR_ARG (exp, 0);
20936 arg1 = CALL_EXPR_ARG (exp, 1);
20937 arg2 = CALL_EXPR_ARG (exp, 2);
20938 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
20939 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
20940 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
20941 tmode = insn_data[icode].operand[0].mode;
20942 mode1 = insn_data[icode].operand[1].mode;
20943 mode2 = insn_data[icode].operand[2].mode;
20944 mode3 = insn_data[icode].operand[3].mode;
20946 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20948 op0 = copy_to_reg (op0);
20949 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
20951 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20953 op1 = copy_to_reg (op1);
20954 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
20956 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
20958 error ("shift must be an immediate");
20959 return const0_rtx;
20961 target = gen_reg_rtx (mode);
20962 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
20963 op0, op1, op2);
20964 if (! pat)
20965 return 0;
20966 emit_insn (pat);
20967 return target;
20969 case IX86_BUILTIN_MOVNTDQA:
20970 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
20971 target, 1);
20973 case IX86_BUILTIN_MOVNTSD:
20974 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
20976 case IX86_BUILTIN_MOVNTSS:
20977 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
20979 case IX86_BUILTIN_INSERTQ:
20980 case IX86_BUILTIN_EXTRQ:
20981 icode = (fcode == IX86_BUILTIN_EXTRQ
20982 ? CODE_FOR_sse4a_extrq
20983 : CODE_FOR_sse4a_insertq);
20984 arg0 = CALL_EXPR_ARG (exp, 0);
20985 arg1 = CALL_EXPR_ARG (exp, 1);
20986 op0 = expand_normal (arg0);
20987 op1 = expand_normal (arg1);
20988 tmode = insn_data[icode].operand[0].mode;
20989 mode1 = insn_data[icode].operand[1].mode;
20990 mode2 = insn_data[icode].operand[2].mode;
20991 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20992 op0 = copy_to_mode_reg (mode1, op0);
20993 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20994 op1 = copy_to_mode_reg (mode2, op1);
20995 if (optimize || target == 0
20996 || GET_MODE (target) != tmode
20997 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20998 target = gen_reg_rtx (tmode);
20999 pat = GEN_FCN (icode) (target, op0, op1);
21000 if (! pat)
21001 return NULL_RTX;
21002 emit_insn (pat);
21003 return target;
21005 case IX86_BUILTIN_EXTRQI:
21006 icode = CODE_FOR_sse4a_extrqi;
21007 arg0 = CALL_EXPR_ARG (exp, 0);
21008 arg1 = CALL_EXPR_ARG (exp, 1);
21009 arg2 = CALL_EXPR_ARG (exp, 2);
21010 op0 = expand_normal (arg0);
21011 op1 = expand_normal (arg1);
21012 op2 = expand_normal (arg2);
21013 tmode = insn_data[icode].operand[0].mode;
21014 mode1 = insn_data[icode].operand[1].mode;
21015 mode2 = insn_data[icode].operand[2].mode;
21016 mode3 = insn_data[icode].operand[3].mode;
21017 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21018 op0 = copy_to_mode_reg (mode1, op0);
21019 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21021 error ("index mask must be an immediate");
21022 return gen_reg_rtx (tmode);
21024 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21026 error ("length mask must be an immediate");
21027 return gen_reg_rtx (tmode);
21029 if (optimize || target == 0
21030 || GET_MODE (target) != tmode
21031 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21032 target = gen_reg_rtx (tmode);
21033 pat = GEN_FCN (icode) (target, op0, op1, op2);
21034 if (! pat)
21035 return NULL_RTX;
21036 emit_insn (pat);
21037 return target;
21039 case IX86_BUILTIN_INSERTQI:
21040 icode = CODE_FOR_sse4a_insertqi;
21041 arg0 = CALL_EXPR_ARG (exp, 0);
21042 arg1 = CALL_EXPR_ARG (exp, 1);
21043 arg2 = CALL_EXPR_ARG (exp, 2);
21044 arg3 = CALL_EXPR_ARG (exp, 3);
21045 op0 = expand_normal (arg0);
21046 op1 = expand_normal (arg1);
21047 op2 = expand_normal (arg2);
21048 op3 = expand_normal (arg3);
21049 tmode = insn_data[icode].operand[0].mode;
21050 mode1 = insn_data[icode].operand[1].mode;
21051 mode2 = insn_data[icode].operand[2].mode;
21052 mode3 = insn_data[icode].operand[3].mode;
21053 mode4 = insn_data[icode].operand[4].mode;
21055 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21056 op0 = copy_to_mode_reg (mode1, op0);
21058 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21059 op1 = copy_to_mode_reg (mode2, op1);
21061 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21063 error ("index mask must be an immediate");
21064 return gen_reg_rtx (tmode);
21066 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21068 error ("length mask must be an immediate");
21069 return gen_reg_rtx (tmode);
21071 if (optimize || target == 0
21072 || GET_MODE (target) != tmode
21073 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21074 target = gen_reg_rtx (tmode);
21075 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21076 if (! pat)
21077 return NULL_RTX;
21078 emit_insn (pat);
21079 return target;
21081 case IX86_BUILTIN_VEC_INIT_V2SI:
21082 case IX86_BUILTIN_VEC_INIT_V4HI:
21083 case IX86_BUILTIN_VEC_INIT_V8QI:
21084 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21086 case IX86_BUILTIN_VEC_EXT_V2DF:
21087 case IX86_BUILTIN_VEC_EXT_V2DI:
21088 case IX86_BUILTIN_VEC_EXT_V4SF:
21089 case IX86_BUILTIN_VEC_EXT_V4SI:
21090 case IX86_BUILTIN_VEC_EXT_V8HI:
21091 case IX86_BUILTIN_VEC_EXT_V2SI:
21092 case IX86_BUILTIN_VEC_EXT_V4HI:
21093 case IX86_BUILTIN_VEC_EXT_V16QI:
21094 return ix86_expand_vec_ext_builtin (exp, target);
21096 case IX86_BUILTIN_VEC_SET_V2DI:
21097 case IX86_BUILTIN_VEC_SET_V4SF:
21098 case IX86_BUILTIN_VEC_SET_V4SI:
21099 case IX86_BUILTIN_VEC_SET_V8HI:
21100 case IX86_BUILTIN_VEC_SET_V4HI:
21101 case IX86_BUILTIN_VEC_SET_V16QI:
21102 return ix86_expand_vec_set_builtin (exp);
21104 case IX86_BUILTIN_INFQ:
21106 REAL_VALUE_TYPE inf;
21107 rtx tmp;
21109 real_inf (&inf);
21110 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21112 tmp = validize_mem (force_const_mem (mode, tmp));
21114 if (target == 0)
21115 target = gen_reg_rtx (mode);
21117 emit_move_insn (target, tmp);
21118 return target;
21121 case IX86_BUILTIN_FABSQ:
21122 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21124 case IX86_BUILTIN_COPYSIGNQ:
21125 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21127 default:
21128 break;
21131 for (i = 0, d = bdesc_sse_3arg;
21132 i < ARRAY_SIZE (bdesc_sse_3arg);
21133 i++, d++)
21134 if (d->code == fcode)
21135 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
21136 target);
21138 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21139 if (d->code == fcode)
21141 /* Compares are treated specially. */
21142 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21143 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21144 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21145 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21146 return ix86_expand_sse_compare (d, exp, target);
21148 return ix86_expand_binop_builtin (d->icode, exp, target);
21151 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21152 if (d->code == fcode)
21153 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21155 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21156 if (d->code == fcode)
21157 return ix86_expand_sse_comi (d, exp, target);
21159 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21160 if (d->code == fcode)
21161 return ix86_expand_sse_ptest (d, exp, target);
21163 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21164 if (d->code == fcode)
21165 return ix86_expand_crc32 (d->icode, exp, target);
21167 for (i = 0, d = bdesc_pcmpestr;
21168 i < ARRAY_SIZE (bdesc_pcmpestr);
21169 i++, d++)
21170 if (d->code == fcode)
21171 return ix86_expand_sse_pcmpestr (d, exp, target);
21173 for (i = 0, d = bdesc_pcmpistr;
21174 i < ARRAY_SIZE (bdesc_pcmpistr);
21175 i++, d++)
21176 if (d->code == fcode)
21177 return ix86_expand_sse_pcmpistr (d, exp, target);
21179 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21180 if (d->code == fcode)
21181 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21182 (enum multi_arg_type)d->flag,
21183 d->comparison);
21185 gcc_unreachable ();
21188 /* Returns a function decl for a vectorized version of the builtin function
21189 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21190 if it is not available. */
21192 static tree
21193 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21194 tree type_in)
21196 enum machine_mode in_mode, out_mode;
21197 int in_n, out_n;
21199 if (TREE_CODE (type_out) != VECTOR_TYPE
21200 || TREE_CODE (type_in) != VECTOR_TYPE)
21201 return NULL_TREE;
21203 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21204 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21205 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21206 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21208 switch (fn)
21210 case BUILT_IN_SQRT:
21211 if (out_mode == DFmode && out_n == 2
21212 && in_mode == DFmode && in_n == 2)
21213 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21214 break;
21216 case BUILT_IN_SQRTF:
21217 if (out_mode == SFmode && out_n == 4
21218 && in_mode == SFmode && in_n == 4)
21219 return ix86_builtins[IX86_BUILTIN_SQRTPS];
21220 break;
21222 case BUILT_IN_LRINT:
21223 if (out_mode == SImode && out_n == 4
21224 && in_mode == DFmode && in_n == 2)
21225 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21226 break;
21228 case BUILT_IN_LRINTF:
21229 if (out_mode == SImode && out_n == 4
21230 && in_mode == SFmode && in_n == 4)
21231 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21232 break;
21234 default:
21238 /* Dispatch to a handler for a vectorization library. */
21239 if (ix86_veclib_handler)
21240 return (*ix86_veclib_handler)(fn, type_out, type_in);
21242 return NULL_TREE;
21245 /* Handler for an ACML-style interface to a library with vectorized
21246 intrinsics. */
21248 static tree
21249 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21251 char name[20] = "__vr.._";
21252 tree fntype, new_fndecl, args;
21253 unsigned arity;
21254 const char *bname;
21255 enum machine_mode el_mode, in_mode;
21256 int n, in_n;
21258 /* The ACML is 64bits only and suitable for unsafe math only as
21259 it does not correctly support parts of IEEE with the required
21260 precision such as denormals. */
21261 if (!TARGET_64BIT
21262 || !flag_unsafe_math_optimizations)
21263 return NULL_TREE;
21265 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21266 n = TYPE_VECTOR_SUBPARTS (type_out);
21267 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21268 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21269 if (el_mode != in_mode
21270 || n != in_n)
21271 return NULL_TREE;
21273 switch (fn)
21275 case BUILT_IN_SIN:
21276 case BUILT_IN_COS:
21277 case BUILT_IN_EXP:
21278 case BUILT_IN_LOG:
21279 case BUILT_IN_LOG2:
21280 case BUILT_IN_LOG10:
21281 name[4] = 'd';
21282 name[5] = '2';
21283 if (el_mode != DFmode
21284 || n != 2)
21285 return NULL_TREE;
21286 break;
21288 case BUILT_IN_SINF:
21289 case BUILT_IN_COSF:
21290 case BUILT_IN_EXPF:
21291 case BUILT_IN_POWF:
21292 case BUILT_IN_LOGF:
21293 case BUILT_IN_LOG2F:
21294 case BUILT_IN_LOG10F:
21295 name[4] = 's';
21296 name[5] = '4';
21297 if (el_mode != SFmode
21298 || n != 4)
21299 return NULL_TREE;
21300 break;
21302 default:
21303 return NULL_TREE;
21306 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21307 sprintf (name + 7, "%s", bname+10);
21309 arity = 0;
21310 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21311 args = TREE_CHAIN (args))
21312 arity++;
21314 if (arity == 1)
21315 fntype = build_function_type_list (type_out, type_in, NULL);
21316 else
21317 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21319 /* Build a function declaration for the vectorized function. */
21320 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21321 TREE_PUBLIC (new_fndecl) = 1;
21322 DECL_EXTERNAL (new_fndecl) = 1;
21323 DECL_IS_NOVOPS (new_fndecl) = 1;
21324 TREE_READONLY (new_fndecl) = 1;
21326 return new_fndecl;
21330 /* Returns a decl of a function that implements conversion of the
21331 input vector of type TYPE, or NULL_TREE if it is not available. */
21333 static tree
21334 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21336 if (TREE_CODE (type) != VECTOR_TYPE)
21337 return NULL_TREE;
21339 switch (code)
21341 case FLOAT_EXPR:
21342 switch (TYPE_MODE (type))
21344 case V4SImode:
21345 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21346 default:
21347 return NULL_TREE;
21350 case FIX_TRUNC_EXPR:
21351 switch (TYPE_MODE (type))
21353 case V4SFmode:
21354 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21355 default:
21356 return NULL_TREE;
21358 default:
21359 return NULL_TREE;
21364 /* Returns a code for a target-specific builtin that implements
21365 reciprocal of the function, or NULL_TREE if not available. */
21367 static tree
21368 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21369 bool sqrt ATTRIBUTE_UNUSED)
21371 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21372 && flag_finite_math_only && !flag_trapping_math
21373 && flag_unsafe_math_optimizations))
21374 return NULL_TREE;
21376 if (md_fn)
21377 /* Machine dependent builtins. */
21378 switch (fn)
21380 /* Vectorized version of sqrt to rsqrt conversion. */
21381 case IX86_BUILTIN_SQRTPS:
21382 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
21384 default:
21385 return NULL_TREE;
21387 else
21388 /* Normal builtins. */
21389 switch (fn)
21391 /* Sqrt to rsqrt conversion. */
21392 case BUILT_IN_SQRTF:
21393 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21395 default:
21396 return NULL_TREE;
21400 /* Store OPERAND to the memory after reload is completed. This means
21401 that we can't easily use assign_stack_local. */
21403 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21405 rtx result;
21407 gcc_assert (reload_completed);
21408 if (TARGET_RED_ZONE)
21410 result = gen_rtx_MEM (mode,
21411 gen_rtx_PLUS (Pmode,
21412 stack_pointer_rtx,
21413 GEN_INT (-RED_ZONE_SIZE)));
21414 emit_move_insn (result, operand);
21416 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21418 switch (mode)
21420 case HImode:
21421 case SImode:
21422 operand = gen_lowpart (DImode, operand);
21423 /* FALLTHRU */
21424 case DImode:
21425 emit_insn (
21426 gen_rtx_SET (VOIDmode,
21427 gen_rtx_MEM (DImode,
21428 gen_rtx_PRE_DEC (DImode,
21429 stack_pointer_rtx)),
21430 operand));
21431 break;
21432 default:
21433 gcc_unreachable ();
21435 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21437 else
21439 switch (mode)
21441 case DImode:
21443 rtx operands[2];
21444 split_di (&operand, 1, operands, operands + 1);
21445 emit_insn (
21446 gen_rtx_SET (VOIDmode,
21447 gen_rtx_MEM (SImode,
21448 gen_rtx_PRE_DEC (Pmode,
21449 stack_pointer_rtx)),
21450 operands[1]));
21451 emit_insn (
21452 gen_rtx_SET (VOIDmode,
21453 gen_rtx_MEM (SImode,
21454 gen_rtx_PRE_DEC (Pmode,
21455 stack_pointer_rtx)),
21456 operands[0]));
21458 break;
21459 case HImode:
21460 /* Store HImodes as SImodes. */
21461 operand = gen_lowpart (SImode, operand);
21462 /* FALLTHRU */
21463 case SImode:
21464 emit_insn (
21465 gen_rtx_SET (VOIDmode,
21466 gen_rtx_MEM (GET_MODE (operand),
21467 gen_rtx_PRE_DEC (SImode,
21468 stack_pointer_rtx)),
21469 operand));
21470 break;
21471 default:
21472 gcc_unreachable ();
21474 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21476 return result;
21479 /* Free operand from the memory. */
21480 void
21481 ix86_free_from_memory (enum machine_mode mode)
21483 if (!TARGET_RED_ZONE)
21485 int size;
21487 if (mode == DImode || TARGET_64BIT)
21488 size = 8;
21489 else
21490 size = 4;
21491 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21492 to pop or add instruction if registers are available. */
21493 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21494 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21495 GEN_INT (size))));
21499 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21500 QImode must go into class Q_REGS.
21501 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21502 movdf to do mem-to-mem moves through integer regs. */
21503 enum reg_class
21504 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21506 enum machine_mode mode = GET_MODE (x);
21508 /* We're only allowed to return a subclass of CLASS. Many of the
21509 following checks fail for NO_REGS, so eliminate that early. */
21510 if (regclass == NO_REGS)
21511 return NO_REGS;
21513 /* All classes can load zeros. */
21514 if (x == CONST0_RTX (mode))
21515 return regclass;
21517 /* Force constants into memory if we are loading a (nonzero) constant into
21518 an MMX or SSE register. This is because there are no MMX/SSE instructions
21519 to load from a constant. */
21520 if (CONSTANT_P (x)
21521 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21522 return NO_REGS;
21524 /* Prefer SSE regs only, if we can use them for math. */
21525 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21526 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21528 /* Floating-point constants need more complex checks. */
21529 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21531 /* General regs can load everything. */
21532 if (reg_class_subset_p (regclass, GENERAL_REGS))
21533 return regclass;
21535 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21536 zero above. We only want to wind up preferring 80387 registers if
21537 we plan on doing computation with them. */
21538 if (TARGET_80387
21539 && standard_80387_constant_p (x))
21541 /* Limit class to non-sse. */
21542 if (regclass == FLOAT_SSE_REGS)
21543 return FLOAT_REGS;
21544 if (regclass == FP_TOP_SSE_REGS)
21545 return FP_TOP_REG;
21546 if (regclass == FP_SECOND_SSE_REGS)
21547 return FP_SECOND_REG;
21548 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21549 return regclass;
21552 return NO_REGS;
21555 /* Generally when we see PLUS here, it's the function invariant
21556 (plus soft-fp const_int). Which can only be computed into general
21557 regs. */
21558 if (GET_CODE (x) == PLUS)
21559 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21561 /* QImode constants are easy to load, but non-constant QImode data
21562 must go into Q_REGS. */
21563 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21565 if (reg_class_subset_p (regclass, Q_REGS))
21566 return regclass;
21567 if (reg_class_subset_p (Q_REGS, regclass))
21568 return Q_REGS;
21569 return NO_REGS;
21572 return regclass;
21575 /* Discourage putting floating-point values in SSE registers unless
21576 SSE math is being used, and likewise for the 387 registers. */
21577 enum reg_class
21578 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
21580 enum machine_mode mode = GET_MODE (x);
21582 /* Restrict the output reload class to the register bank that we are doing
21583 math on. If we would like not to return a subset of CLASS, reject this
21584 alternative: if reload cannot do this, it will still use its choice. */
21585 mode = GET_MODE (x);
21586 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21587 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
21589 if (X87_FLOAT_MODE_P (mode))
21591 if (regclass == FP_TOP_SSE_REGS)
21592 return FP_TOP_REG;
21593 else if (regclass == FP_SECOND_SSE_REGS)
21594 return FP_SECOND_REG;
21595 else
21596 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21599 return regclass;
21602 /* If we are copying between general and FP registers, we need a memory
21603 location. The same is true for SSE and MMX registers.
21605 To optimize register_move_cost performance, allow inline variant.
21607 The macro can't work reliably when one of the CLASSES is class containing
21608 registers from multiple units (SSE, MMX, integer). We avoid this by never
21609 combining those units in single alternative in the machine description.
21610 Ensure that this constraint holds to avoid unexpected surprises.
21612 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
21613 enforce these sanity checks. */
21615 static inline int
21616 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21617 enum machine_mode mode, int strict)
21619 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
21620 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
21621 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
21622 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
21623 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
21624 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
21626 gcc_assert (!strict);
21627 return true;
21630 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
21631 return true;
21633 /* ??? This is a lie. We do have moves between mmx/general, and for
21634 mmx/sse2. But by saying we need secondary memory we discourage the
21635 register allocator from using the mmx registers unless needed. */
21636 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
21637 return true;
21639 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21641 /* SSE1 doesn't have any direct moves from other classes. */
21642 if (!TARGET_SSE2)
21643 return true;
21645 /* If the target says that inter-unit moves are more expensive
21646 than moving through memory, then don't generate them. */
21647 if (!TARGET_INTER_UNIT_MOVES)
21648 return true;
21650 /* Between SSE and general, we have moves no larger than word size. */
21651 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21652 return true;
21655 return false;
21659 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
21660 enum machine_mode mode, int strict)
21662 return inline_secondary_memory_needed (class1, class2, mode, strict);
21665 /* Return true if the registers in CLASS cannot represent the change from
21666 modes FROM to TO. */
21668 bool
21669 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
21670 enum reg_class regclass)
21672 if (from == to)
21673 return false;
21675 /* x87 registers can't do subreg at all, as all values are reformatted
21676 to extended precision. */
21677 if (MAYBE_FLOAT_CLASS_P (regclass))
21678 return true;
21680 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
21682 /* Vector registers do not support QI or HImode loads. If we don't
21683 disallow a change to these modes, reload will assume it's ok to
21684 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
21685 the vec_dupv4hi pattern. */
21686 if (GET_MODE_SIZE (from) < 4)
21687 return true;
21689 /* Vector registers do not support subreg with nonzero offsets, which
21690 are otherwise valid for integer registers. Since we can't see
21691 whether we have a nonzero offset from here, prohibit all
21692 nonparadoxical subregs changing size. */
21693 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
21694 return true;
21697 return false;
21700 /* Return the cost of moving data of mode M between a
21701 register and memory. A value of 2 is the default; this cost is
21702 relative to those in `REGISTER_MOVE_COST'.
21704 This function is used extensively by register_move_cost that is used to
21705 build tables at startup. Make it inline in this case.
21706 When IN is 2, return maximum of in and out move cost.
21708 If moving between registers and memory is more expensive than
21709 between two registers, you should define this macro to express the
21710 relative cost.
21712 Model also increased moving costs of QImode registers in non
21713 Q_REGS classes.
21715 static inline int
21716 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
21717 int in)
21719 int cost;
21720 if (FLOAT_CLASS_P (regclass))
21722 int index;
21723 switch (mode)
21725 case SFmode:
21726 index = 0;
21727 break;
21728 case DFmode:
21729 index = 1;
21730 break;
21731 case XFmode:
21732 index = 2;
21733 break;
21734 default:
21735 return 100;
21737 if (in == 2)
21738 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
21739 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
21741 if (SSE_CLASS_P (regclass))
21743 int index;
21744 switch (GET_MODE_SIZE (mode))
21746 case 4:
21747 index = 0;
21748 break;
21749 case 8:
21750 index = 1;
21751 break;
21752 case 16:
21753 index = 2;
21754 break;
21755 default:
21756 return 100;
21758 if (in == 2)
21759 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
21760 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
21762 if (MMX_CLASS_P (regclass))
21764 int index;
21765 switch (GET_MODE_SIZE (mode))
21767 case 4:
21768 index = 0;
21769 break;
21770 case 8:
21771 index = 1;
21772 break;
21773 default:
21774 return 100;
21776 if (in)
21777 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
21778 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
21780 switch (GET_MODE_SIZE (mode))
21782 case 1:
21783 if (Q_CLASS_P (regclass) || TARGET_64BIT)
21785 if (!in)
21786 return ix86_cost->int_store[0];
21787 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
21788 cost = ix86_cost->movzbl_load;
21789 else
21790 cost = ix86_cost->int_load[0];
21791 if (in == 2)
21792 return MAX (cost, ix86_cost->int_store[0]);
21793 return cost;
21795 else
21797 if (in == 2)
21798 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
21799 if (in)
21800 return ix86_cost->movzbl_load;
21801 else
21802 return ix86_cost->int_store[0] + 4;
21804 break;
21805 case 2:
21806 if (in == 2)
21807 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
21808 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
21809 default:
21810 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
21811 if (mode == TFmode)
21812 mode = XFmode;
21813 if (in == 2)
21814 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
21815 else if (in)
21816 cost = ix86_cost->int_load[2];
21817 else
21818 cost = ix86_cost->int_store[2];
21819 return (cost * (((int) GET_MODE_SIZE (mode)
21820 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
21825 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
21827 return inline_memory_move_cost (mode, regclass, in);
21831 /* Return the cost of moving data from a register in class CLASS1 to
21832 one in class CLASS2.
21834 It is not required that the cost always equal 2 when FROM is the same as TO;
21835 on some machines it is expensive to move between registers if they are not
21836 general registers. */
21839 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
21840 enum reg_class class2)
21842 /* In case we require secondary memory, compute cost of the store followed
21843 by load. In order to avoid bad register allocation choices, we need
21844 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
21846 if (inline_secondary_memory_needed (class1, class2, mode, 0))
21848 int cost = 1;
21850 cost += inline_memory_move_cost (mode, class1, 2);
21851 cost += inline_memory_move_cost (mode, class2, 2);
21853 /* In case of copying from general_purpose_register we may emit multiple
21854 stores followed by single load causing memory size mismatch stall.
21855 Count this as arbitrarily high cost of 20. */
21856 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
21857 cost += 20;
21859 /* In the case of FP/MMX moves, the registers actually overlap, and we
21860 have to switch modes in order to treat them differently. */
21861 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
21862 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
21863 cost += 20;
21865 return cost;
21868 /* Moves between SSE/MMX and integer unit are expensive. */
21869 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
21870 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
21872 /* ??? By keeping returned value relatively high, we limit the number
21873 of moves between integer and MMX/SSE registers for all targets.
21874 Additionally, high value prevents problem with x86_modes_tieable_p(),
21875 where integer modes in MMX/SSE registers are not tieable
21876 because of missing QImode and HImode moves to, from or between
21877 MMX/SSE registers. */
21878 return MAX (ix86_cost->mmxsse_to_integer, 8);
21880 if (MAYBE_FLOAT_CLASS_P (class1))
21881 return ix86_cost->fp_move;
21882 if (MAYBE_SSE_CLASS_P (class1))
21883 return ix86_cost->sse_move;
21884 if (MAYBE_MMX_CLASS_P (class1))
21885 return ix86_cost->mmx_move;
21886 return 2;
21889 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
21891 bool
21892 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
21894 /* Flags and only flags can only hold CCmode values. */
21895 if (CC_REGNO_P (regno))
21896 return GET_MODE_CLASS (mode) == MODE_CC;
21897 if (GET_MODE_CLASS (mode) == MODE_CC
21898 || GET_MODE_CLASS (mode) == MODE_RANDOM
21899 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
21900 return 0;
21901 if (FP_REGNO_P (regno))
21902 return VALID_FP_MODE_P (mode);
21903 if (SSE_REGNO_P (regno))
21905 /* We implement the move patterns for all vector modes into and
21906 out of SSE registers, even when no operation instructions
21907 are available. */
21908 return (VALID_SSE_REG_MODE (mode)
21909 || VALID_SSE2_REG_MODE (mode)
21910 || VALID_MMX_REG_MODE (mode)
21911 || VALID_MMX_REG_MODE_3DNOW (mode));
21913 if (MMX_REGNO_P (regno))
21915 /* We implement the move patterns for 3DNOW modes even in MMX mode,
21916 so if the register is available at all, then we can move data of
21917 the given mode into or out of it. */
21918 return (VALID_MMX_REG_MODE (mode)
21919 || VALID_MMX_REG_MODE_3DNOW (mode));
21922 if (mode == QImode)
21924 /* Take care for QImode values - they can be in non-QI regs,
21925 but then they do cause partial register stalls. */
21926 if (regno < 4 || TARGET_64BIT)
21927 return 1;
21928 if (!TARGET_PARTIAL_REG_STALL)
21929 return 1;
21930 return reload_in_progress || reload_completed;
21932 /* We handle both integer and floats in the general purpose registers. */
21933 else if (VALID_INT_MODE_P (mode))
21934 return 1;
21935 else if (VALID_FP_MODE_P (mode))
21936 return 1;
21937 else if (VALID_DFP_MODE_P (mode))
21938 return 1;
21939 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
21940 on to use that value in smaller contexts, this can easily force a
21941 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
21942 supporting DImode, allow it. */
21943 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
21944 return 1;
21946 return 0;
21949 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21950 tieable integer mode. */
21952 static bool
21953 ix86_tieable_integer_mode_p (enum machine_mode mode)
21955 switch (mode)
21957 case HImode:
21958 case SImode:
21959 return true;
21961 case QImode:
21962 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21964 case DImode:
21965 return TARGET_64BIT;
21967 default:
21968 return false;
21972 /* Return true if MODE1 is accessible in a register that can hold MODE2
21973 without copying. That is, all register classes that can hold MODE2
21974 can also hold MODE1. */
21976 bool
21977 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
21979 if (mode1 == mode2)
21980 return true;
21982 if (ix86_tieable_integer_mode_p (mode1)
21983 && ix86_tieable_integer_mode_p (mode2))
21984 return true;
21986 /* MODE2 being XFmode implies fp stack or general regs, which means we
21987 can tie any smaller floating point modes to it. Note that we do not
21988 tie this with TFmode. */
21989 if (mode2 == XFmode)
21990 return mode1 == SFmode || mode1 == DFmode;
21992 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21993 that we can tie it with SFmode. */
21994 if (mode2 == DFmode)
21995 return mode1 == SFmode;
21997 /* If MODE2 is only appropriate for an SSE register, then tie with
21998 any other mode acceptable to SSE registers. */
21999 if (GET_MODE_SIZE (mode2) == 16
22000 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22001 return (GET_MODE_SIZE (mode1) == 16
22002 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22004 /* If MODE2 is appropriate for an MMX register, then tie
22005 with any other mode acceptable to MMX registers. */
22006 if (GET_MODE_SIZE (mode2) == 8
22007 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22008 return (GET_MODE_SIZE (mode1) == 8
22009 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22011 return false;
22014 /* Compute a (partial) cost for rtx X. Return true if the complete
22015 cost has been computed, and false if subexpressions should be
22016 scanned. In either case, *TOTAL contains the cost result. */
22018 static bool
22019 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22021 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22022 enum machine_mode mode = GET_MODE (x);
22024 switch (code)
22026 case CONST_INT:
22027 case CONST:
22028 case LABEL_REF:
22029 case SYMBOL_REF:
22030 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22031 *total = 3;
22032 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22033 *total = 2;
22034 else if (flag_pic && SYMBOLIC_CONST (x)
22035 && (!TARGET_64BIT
22036 || (!GET_CODE (x) != LABEL_REF
22037 && (GET_CODE (x) != SYMBOL_REF
22038 || !SYMBOL_REF_LOCAL_P (x)))))
22039 *total = 1;
22040 else
22041 *total = 0;
22042 return true;
22044 case CONST_DOUBLE:
22045 if (mode == VOIDmode)
22046 *total = 0;
22047 else
22048 switch (standard_80387_constant_p (x))
22050 case 1: /* 0.0 */
22051 *total = 1;
22052 break;
22053 default: /* Other constants */
22054 *total = 2;
22055 break;
22056 case 0:
22057 case -1:
22058 /* Start with (MEM (SYMBOL_REF)), since that's where
22059 it'll probably end up. Add a penalty for size. */
22060 *total = (COSTS_N_INSNS (1)
22061 + (flag_pic != 0 && !TARGET_64BIT)
22062 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22063 break;
22065 return true;
22067 case ZERO_EXTEND:
22068 /* The zero extensions is often completely free on x86_64, so make
22069 it as cheap as possible. */
22070 if (TARGET_64BIT && mode == DImode
22071 && GET_MODE (XEXP (x, 0)) == SImode)
22072 *total = 1;
22073 else if (TARGET_ZERO_EXTEND_WITH_AND)
22074 *total = ix86_cost->add;
22075 else
22076 *total = ix86_cost->movzx;
22077 return false;
22079 case SIGN_EXTEND:
22080 *total = ix86_cost->movsx;
22081 return false;
22083 case ASHIFT:
22084 if (CONST_INT_P (XEXP (x, 1))
22085 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22087 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22088 if (value == 1)
22090 *total = ix86_cost->add;
22091 return false;
22093 if ((value == 2 || value == 3)
22094 && ix86_cost->lea <= ix86_cost->shift_const)
22096 *total = ix86_cost->lea;
22097 return false;
22100 /* FALLTHRU */
22102 case ROTATE:
22103 case ASHIFTRT:
22104 case LSHIFTRT:
22105 case ROTATERT:
22106 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22108 if (CONST_INT_P (XEXP (x, 1)))
22110 if (INTVAL (XEXP (x, 1)) > 32)
22111 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22112 else
22113 *total = ix86_cost->shift_const * 2;
22115 else
22117 if (GET_CODE (XEXP (x, 1)) == AND)
22118 *total = ix86_cost->shift_var * 2;
22119 else
22120 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22123 else
22125 if (CONST_INT_P (XEXP (x, 1)))
22126 *total = ix86_cost->shift_const;
22127 else
22128 *total = ix86_cost->shift_var;
22130 return false;
22132 case MULT:
22133 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22135 /* ??? SSE scalar cost should be used here. */
22136 *total = ix86_cost->fmul;
22137 return false;
22139 else if (X87_FLOAT_MODE_P (mode))
22141 *total = ix86_cost->fmul;
22142 return false;
22144 else if (FLOAT_MODE_P (mode))
22146 /* ??? SSE vector cost should be used here. */
22147 *total = ix86_cost->fmul;
22148 return false;
22150 else
22152 rtx op0 = XEXP (x, 0);
22153 rtx op1 = XEXP (x, 1);
22154 int nbits;
22155 if (CONST_INT_P (XEXP (x, 1)))
22157 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22158 for (nbits = 0; value != 0; value &= value - 1)
22159 nbits++;
22161 else
22162 /* This is arbitrary. */
22163 nbits = 7;
22165 /* Compute costs correctly for widening multiplication. */
22166 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
22167 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22168 == GET_MODE_SIZE (mode))
22170 int is_mulwiden = 0;
22171 enum machine_mode inner_mode = GET_MODE (op0);
22173 if (GET_CODE (op0) == GET_CODE (op1))
22174 is_mulwiden = 1, op1 = XEXP (op1, 0);
22175 else if (CONST_INT_P (op1))
22177 if (GET_CODE (op0) == SIGN_EXTEND)
22178 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22179 == INTVAL (op1);
22180 else
22181 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22184 if (is_mulwiden)
22185 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22188 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22189 + nbits * ix86_cost->mult_bit
22190 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22192 return true;
22195 case DIV:
22196 case UDIV:
22197 case MOD:
22198 case UMOD:
22199 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22200 /* ??? SSE cost should be used here. */
22201 *total = ix86_cost->fdiv;
22202 else if (X87_FLOAT_MODE_P (mode))
22203 *total = ix86_cost->fdiv;
22204 else if (FLOAT_MODE_P (mode))
22205 /* ??? SSE vector cost should be used here. */
22206 *total = ix86_cost->fdiv;
22207 else
22208 *total = ix86_cost->divide[MODE_INDEX (mode)];
22209 return false;
22211 case PLUS:
22212 if (GET_MODE_CLASS (mode) == MODE_INT
22213 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22215 if (GET_CODE (XEXP (x, 0)) == PLUS
22216 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22217 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22218 && CONSTANT_P (XEXP (x, 1)))
22220 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22221 if (val == 2 || val == 4 || val == 8)
22223 *total = ix86_cost->lea;
22224 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22225 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22226 outer_code);
22227 *total += rtx_cost (XEXP (x, 1), outer_code);
22228 return true;
22231 else if (GET_CODE (XEXP (x, 0)) == MULT
22232 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22234 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22235 if (val == 2 || val == 4 || val == 8)
22237 *total = ix86_cost->lea;
22238 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22239 *total += rtx_cost (XEXP (x, 1), outer_code);
22240 return true;
22243 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22245 *total = ix86_cost->lea;
22246 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22247 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22248 *total += rtx_cost (XEXP (x, 1), outer_code);
22249 return true;
22252 /* FALLTHRU */
22254 case MINUS:
22255 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22257 /* ??? SSE cost should be used here. */
22258 *total = ix86_cost->fadd;
22259 return false;
22261 else if (X87_FLOAT_MODE_P (mode))
22263 *total = ix86_cost->fadd;
22264 return false;
22266 else if (FLOAT_MODE_P (mode))
22268 /* ??? SSE vector cost should be used here. */
22269 *total = ix86_cost->fadd;
22270 return false;
22272 /* FALLTHRU */
22274 case AND:
22275 case IOR:
22276 case XOR:
22277 if (!TARGET_64BIT && mode == DImode)
22279 *total = (ix86_cost->add * 2
22280 + (rtx_cost (XEXP (x, 0), outer_code)
22281 << (GET_MODE (XEXP (x, 0)) != DImode))
22282 + (rtx_cost (XEXP (x, 1), outer_code)
22283 << (GET_MODE (XEXP (x, 1)) != DImode)));
22284 return true;
22286 /* FALLTHRU */
22288 case NEG:
22289 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22291 /* ??? SSE cost should be used here. */
22292 *total = ix86_cost->fchs;
22293 return false;
22295 else if (X87_FLOAT_MODE_P (mode))
22297 *total = ix86_cost->fchs;
22298 return false;
22300 else if (FLOAT_MODE_P (mode))
22302 /* ??? SSE vector cost should be used here. */
22303 *total = ix86_cost->fchs;
22304 return false;
22306 /* FALLTHRU */
22308 case NOT:
22309 if (!TARGET_64BIT && mode == DImode)
22310 *total = ix86_cost->add * 2;
22311 else
22312 *total = ix86_cost->add;
22313 return false;
22315 case COMPARE:
22316 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22317 && XEXP (XEXP (x, 0), 1) == const1_rtx
22318 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22319 && XEXP (x, 1) == const0_rtx)
22321 /* This kind of construct is implemented using test[bwl].
22322 Treat it as if we had an AND. */
22323 *total = (ix86_cost->add
22324 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22325 + rtx_cost (const1_rtx, outer_code));
22326 return true;
22328 return false;
22330 case FLOAT_EXTEND:
22331 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22332 *total = 0;
22333 return false;
22335 case ABS:
22336 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22337 /* ??? SSE cost should be used here. */
22338 *total = ix86_cost->fabs;
22339 else if (X87_FLOAT_MODE_P (mode))
22340 *total = ix86_cost->fabs;
22341 else if (FLOAT_MODE_P (mode))
22342 /* ??? SSE vector cost should be used here. */
22343 *total = ix86_cost->fabs;
22344 return false;
22346 case SQRT:
22347 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22348 /* ??? SSE cost should be used here. */
22349 *total = ix86_cost->fsqrt;
22350 else if (X87_FLOAT_MODE_P (mode))
22351 *total = ix86_cost->fsqrt;
22352 else if (FLOAT_MODE_P (mode))
22353 /* ??? SSE vector cost should be used here. */
22354 *total = ix86_cost->fsqrt;
22355 return false;
22357 case UNSPEC:
22358 if (XINT (x, 1) == UNSPEC_TP)
22359 *total = 0;
22360 return false;
22362 default:
22363 return false;
22367 #if TARGET_MACHO
22369 static int current_machopic_label_num;
22371 /* Given a symbol name and its associated stub, write out the
22372 definition of the stub. */
22374 void
22375 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22377 unsigned int length;
22378 char *binder_name, *symbol_name, lazy_ptr_name[32];
22379 int label = ++current_machopic_label_num;
22381 /* For 64-bit we shouldn't get here. */
22382 gcc_assert (!TARGET_64BIT);
22384 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22385 symb = (*targetm.strip_name_encoding) (symb);
22387 length = strlen (stub);
22388 binder_name = alloca (length + 32);
22389 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22391 length = strlen (symb);
22392 symbol_name = alloca (length + 32);
22393 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22395 sprintf (lazy_ptr_name, "L%d$lz", label);
22397 if (MACHOPIC_PURE)
22398 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22399 else
22400 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22402 fprintf (file, "%s:\n", stub);
22403 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22405 if (MACHOPIC_PURE)
22407 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22408 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22409 fprintf (file, "\tjmp\t*%%edx\n");
22411 else
22412 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22414 fprintf (file, "%s:\n", binder_name);
22416 if (MACHOPIC_PURE)
22418 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22419 fprintf (file, "\tpushl\t%%eax\n");
22421 else
22422 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22424 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22426 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22427 fprintf (file, "%s:\n", lazy_ptr_name);
22428 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22429 fprintf (file, "\t.long %s\n", binder_name);
22432 void
22433 darwin_x86_file_end (void)
22435 darwin_file_end ();
22436 ix86_file_end ();
22438 #endif /* TARGET_MACHO */
22440 /* Order the registers for register allocator. */
22442 void
22443 x86_order_regs_for_local_alloc (void)
22445 int pos = 0;
22446 int i;
22448 /* First allocate the local general purpose registers. */
22449 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22450 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22451 reg_alloc_order [pos++] = i;
22453 /* Global general purpose registers. */
22454 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22455 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22456 reg_alloc_order [pos++] = i;
22458 /* x87 registers come first in case we are doing FP math
22459 using them. */
22460 if (!TARGET_SSE_MATH)
22461 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22462 reg_alloc_order [pos++] = i;
22464 /* SSE registers. */
22465 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22466 reg_alloc_order [pos++] = i;
22467 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22468 reg_alloc_order [pos++] = i;
22470 /* x87 registers. */
22471 if (TARGET_SSE_MATH)
22472 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22473 reg_alloc_order [pos++] = i;
22475 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22476 reg_alloc_order [pos++] = i;
22478 /* Initialize the rest of array as we do not allocate some registers
22479 at all. */
22480 while (pos < FIRST_PSEUDO_REGISTER)
22481 reg_alloc_order [pos++] = 0;
22484 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22485 struct attribute_spec.handler. */
22486 static tree
22487 ix86_handle_struct_attribute (tree *node, tree name,
22488 tree args ATTRIBUTE_UNUSED,
22489 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22491 tree *type = NULL;
22492 if (DECL_P (*node))
22494 if (TREE_CODE (*node) == TYPE_DECL)
22495 type = &TREE_TYPE (*node);
22497 else
22498 type = node;
22500 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22501 || TREE_CODE (*type) == UNION_TYPE)))
22503 warning (OPT_Wattributes, "%qs attribute ignored",
22504 IDENTIFIER_POINTER (name));
22505 *no_add_attrs = true;
22508 else if ((is_attribute_p ("ms_struct", name)
22509 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22510 || ((is_attribute_p ("gcc_struct", name)
22511 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22513 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22514 IDENTIFIER_POINTER (name));
22515 *no_add_attrs = true;
22518 return NULL_TREE;
22521 static bool
22522 ix86_ms_bitfield_layout_p (const_tree record_type)
22524 return (TARGET_MS_BITFIELD_LAYOUT &&
22525 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22526 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22529 /* Returns an expression indicating where the this parameter is
22530 located on entry to the FUNCTION. */
22532 static rtx
22533 x86_this_parameter (tree function)
22535 tree type = TREE_TYPE (function);
22536 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22538 if (TARGET_64BIT)
22540 const int *parm_regs;
22542 if (TARGET_64BIT_MS_ABI)
22543 parm_regs = x86_64_ms_abi_int_parameter_registers;
22544 else
22545 parm_regs = x86_64_int_parameter_registers;
22546 return gen_rtx_REG (DImode, parm_regs[aggr]);
22549 if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
22551 int regno = 0;
22552 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22553 regno = 2;
22554 return gen_rtx_REG (SImode, regno);
22557 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22560 /* Determine whether x86_output_mi_thunk can succeed. */
22562 static bool
22563 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
22564 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
22565 HOST_WIDE_INT vcall_offset, const_tree function)
22567 /* 64-bit can handle anything. */
22568 if (TARGET_64BIT)
22569 return true;
22571 /* For 32-bit, everything's fine if we have one free register. */
22572 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22573 return true;
22575 /* Need a free register for vcall_offset. */
22576 if (vcall_offset)
22577 return false;
22579 /* Need a free register for GOT references. */
22580 if (flag_pic && !(*targetm.binds_local_p) (function))
22581 return false;
22583 /* Otherwise ok. */
22584 return true;
22587 /* Output the assembler code for a thunk function. THUNK_DECL is the
22588 declaration for the thunk function itself, FUNCTION is the decl for
22589 the target function. DELTA is an immediate constant offset to be
22590 added to THIS. If VCALL_OFFSET is nonzero, the word at
22591 *(*this + vcall_offset) should be added to THIS. */
22593 static void
22594 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
22595 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
22596 HOST_WIDE_INT vcall_offset, tree function)
22598 rtx xops[3];
22599 rtx this_param = x86_this_parameter (function);
22600 rtx this_reg, tmp;
22602 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22603 pull it in now and let DELTA benefit. */
22604 if (REG_P (this_param))
22605 this_reg = this_param;
22606 else if (vcall_offset)
22608 /* Put the this parameter into %eax. */
22609 xops[0] = this_param;
22610 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
22611 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22613 else
22614 this_reg = NULL_RTX;
22616 /* Adjust the this parameter by a fixed constant. */
22617 if (delta)
22619 xops[0] = GEN_INT (delta);
22620 xops[1] = this_reg ? this_reg : this_param;
22621 if (TARGET_64BIT)
22623 if (!x86_64_general_operand (xops[0], DImode))
22625 tmp = gen_rtx_REG (DImode, R10_REG);
22626 xops[1] = tmp;
22627 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
22628 xops[0] = tmp;
22629 xops[1] = this_param;
22631 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22633 else
22634 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22637 /* Adjust the this parameter by a value stored in the vtable. */
22638 if (vcall_offset)
22640 if (TARGET_64BIT)
22641 tmp = gen_rtx_REG (DImode, R10_REG);
22642 else
22644 int tmp_regno = 2 /* ECX */;
22645 if (lookup_attribute ("fastcall",
22646 TYPE_ATTRIBUTES (TREE_TYPE (function))))
22647 tmp_regno = 0 /* EAX */;
22648 tmp = gen_rtx_REG (SImode, tmp_regno);
22651 xops[0] = gen_rtx_MEM (Pmode, this_reg);
22652 xops[1] = tmp;
22653 if (TARGET_64BIT)
22654 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22655 else
22656 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22658 /* Adjust the this parameter. */
22659 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
22660 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
22662 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
22663 xops[0] = GEN_INT (vcall_offset);
22664 xops[1] = tmp2;
22665 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
22666 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
22668 xops[1] = this_reg;
22669 if (TARGET_64BIT)
22670 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
22671 else
22672 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
22675 /* If necessary, drop THIS back to its stack slot. */
22676 if (this_reg && this_reg != this_param)
22678 xops[0] = this_reg;
22679 xops[1] = this_param;
22680 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
22683 xops[0] = XEXP (DECL_RTL (function), 0);
22684 if (TARGET_64BIT)
22686 if (!flag_pic || (*targetm.binds_local_p) (function))
22687 output_asm_insn ("jmp\t%P0", xops);
22688 /* All thunks should be in the same object as their target,
22689 and thus binds_local_p should be true. */
22690 else if (TARGET_64BIT_MS_ABI)
22691 gcc_unreachable ();
22692 else
22694 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
22695 tmp = gen_rtx_CONST (Pmode, tmp);
22696 tmp = gen_rtx_MEM (QImode, tmp);
22697 xops[0] = tmp;
22698 output_asm_insn ("jmp\t%A0", xops);
22701 else
22703 if (!flag_pic || (*targetm.binds_local_p) (function))
22704 output_asm_insn ("jmp\t%P0", xops);
22705 else
22706 #if TARGET_MACHO
22707 if (TARGET_MACHO)
22709 rtx sym_ref = XEXP (DECL_RTL (function), 0);
22710 tmp = (gen_rtx_SYMBOL_REF
22711 (Pmode,
22712 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
22713 tmp = gen_rtx_MEM (QImode, tmp);
22714 xops[0] = tmp;
22715 output_asm_insn ("jmp\t%0", xops);
22717 else
22718 #endif /* TARGET_MACHO */
22720 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
22721 output_set_got (tmp, NULL_RTX);
22723 xops[1] = tmp;
22724 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
22725 output_asm_insn ("jmp\t{*}%1", xops);
22730 static void
22731 x86_file_start (void)
22733 default_file_start ();
22734 #if TARGET_MACHO
22735 darwin_file_start ();
22736 #endif
22737 if (X86_FILE_START_VERSION_DIRECTIVE)
22738 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22739 if (X86_FILE_START_FLTUSED)
22740 fputs ("\t.global\t__fltused\n", asm_out_file);
22741 if (ix86_asm_dialect == ASM_INTEL)
22742 fputs ("\t.intel_syntax\n", asm_out_file);
22746 x86_field_alignment (tree field, int computed)
22748 enum machine_mode mode;
22749 tree type = TREE_TYPE (field);
22751 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22752 return computed;
22753 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
22754 ? get_inner_array_type (type) : type);
22755 if (mode == DFmode || mode == DCmode
22756 || GET_MODE_CLASS (mode) == MODE_INT
22757 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22758 return MIN (32, computed);
22759 return computed;
22762 /* Output assembler code to FILE to increment profiler label # LABELNO
22763 for profiling a function entry. */
22764 void
22765 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22767 if (TARGET_64BIT)
22769 #ifndef NO_PROFILE_COUNTERS
22770 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
22771 #endif
22773 if (!TARGET_64BIT_MS_ABI && flag_pic)
22774 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
22775 else
22776 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22778 else if (flag_pic)
22780 #ifndef NO_PROFILE_COUNTERS
22781 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
22782 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
22783 #endif
22784 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
22786 else
22788 #ifndef NO_PROFILE_COUNTERS
22789 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
22790 PROFILE_COUNT_REGISTER);
22791 #endif
22792 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
22796 /* We don't have exact information about the insn sizes, but we may assume
22797 quite safely that we are informed about all 1 byte insns and memory
22798 address sizes. This is enough to eliminate unnecessary padding in
22799 99% of cases. */
22801 static int
22802 min_insn_size (rtx insn)
22804 int l = 0;
22806 if (!INSN_P (insn) || !active_insn_p (insn))
22807 return 0;
22809 /* Discard alignments we've emit and jump instructions. */
22810 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22811 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22812 return 0;
22813 if (JUMP_P (insn)
22814 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
22815 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
22816 return 0;
22818 /* Important case - calls are always 5 bytes.
22819 It is common to have many calls in the row. */
22820 if (CALL_P (insn)
22821 && symbolic_reference_mentioned_p (PATTERN (insn))
22822 && !SIBLING_CALL_P (insn))
22823 return 5;
22824 if (get_attr_length (insn) <= 1)
22825 return 1;
22827 /* For normal instructions we may rely on the sizes of addresses
22828 and the presence of symbol to require 4 bytes of encoding.
22829 This is not the case for jumps where references are PC relative. */
22830 if (!JUMP_P (insn))
22832 l = get_attr_length_address (insn);
22833 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22834 l = 4;
22836 if (l)
22837 return 1+l;
22838 else
22839 return 2;
22842 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22843 window. */
22845 static void
22846 ix86_avoid_jump_misspredicts (void)
22848 rtx insn, start = get_insns ();
22849 int nbytes = 0, njumps = 0;
22850 int isjump = 0;
22852 /* Look for all minimal intervals of instructions containing 4 jumps.
22853 The intervals are bounded by START and INSN. NBYTES is the total
22854 size of instructions in the interval including INSN and not including
22855 START. When the NBYTES is smaller than 16 bytes, it is possible
22856 that the end of START and INSN ends up in the same 16byte page.
22858 The smallest offset in the page INSN can start is the case where START
22859 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22860 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
22862 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22865 nbytes += min_insn_size (insn);
22866 if (dump_file)
22867 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
22868 INSN_UID (insn), min_insn_size (insn));
22869 if ((JUMP_P (insn)
22870 && GET_CODE (PATTERN (insn)) != ADDR_VEC
22871 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
22872 || CALL_P (insn))
22873 njumps++;
22874 else
22875 continue;
22877 while (njumps > 3)
22879 start = NEXT_INSN (start);
22880 if ((JUMP_P (start)
22881 && GET_CODE (PATTERN (start)) != ADDR_VEC
22882 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
22883 || CALL_P (start))
22884 njumps--, isjump = 1;
22885 else
22886 isjump = 0;
22887 nbytes -= min_insn_size (start);
22889 gcc_assert (njumps >= 0);
22890 if (dump_file)
22891 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
22892 INSN_UID (start), INSN_UID (insn), nbytes);
22894 if (njumps == 3 && isjump && nbytes < 16)
22896 int padsize = 15 - nbytes + min_insn_size (insn);
22898 if (dump_file)
22899 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
22900 INSN_UID (insn), padsize);
22901 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
22906 /* AMD Athlon works faster
22907 when RET is not destination of conditional jump or directly preceded
22908 by other jump instruction. We avoid the penalty by inserting NOP just
22909 before the RET instructions in such cases. */
22910 static void
22911 ix86_pad_returns (void)
22913 edge e;
22914 edge_iterator ei;
22916 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
22918 basic_block bb = e->src;
22919 rtx ret = BB_END (bb);
22920 rtx prev;
22921 bool replace = false;
22923 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
22924 || !maybe_hot_bb_p (bb))
22925 continue;
22926 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
22927 if (active_insn_p (prev) || LABEL_P (prev))
22928 break;
22929 if (prev && LABEL_P (prev))
22931 edge e;
22932 edge_iterator ei;
22934 FOR_EACH_EDGE (e, ei, bb->preds)
22935 if (EDGE_FREQUENCY (e) && e->src->index >= 0
22936 && !(e->flags & EDGE_FALLTHRU))
22937 replace = true;
22939 if (!replace)
22941 prev = prev_active_insn (ret);
22942 if (prev
22943 && ((JUMP_P (prev) && any_condjump_p (prev))
22944 || CALL_P (prev)))
22945 replace = true;
22946 /* Empty functions get branch mispredict even when the jump destination
22947 is not visible to us. */
22948 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
22949 replace = true;
22951 if (replace)
22953 emit_insn_before (gen_return_internal_long (), ret);
22954 delete_insn (ret);
22959 /* Implement machine specific optimizations. We implement padding of returns
22960 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
22961 static void
22962 ix86_reorg (void)
22964 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
22965 ix86_pad_returns ();
22966 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
22967 ix86_avoid_jump_misspredicts ();
22970 /* Return nonzero when QImode register that must be represented via REX prefix
22971 is used. */
22972 bool
22973 x86_extended_QIreg_mentioned_p (rtx insn)
22975 int i;
22976 extract_insn_cached (insn);
22977 for (i = 0; i < recog_data.n_operands; i++)
22978 if (REG_P (recog_data.operand[i])
22979 && REGNO (recog_data.operand[i]) >= 4)
22980 return true;
22981 return false;
22984 /* Return nonzero when P points to register encoded via REX prefix.
22985 Called via for_each_rtx. */
22986 static int
22987 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
22989 unsigned int regno;
22990 if (!REG_P (*p))
22991 return 0;
22992 regno = REGNO (*p);
22993 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
22996 /* Return true when INSN mentions register that must be encoded using REX
22997 prefix. */
22998 bool
22999 x86_extended_reg_mentioned_p (rtx insn)
23001 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23004 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23005 optabs would emit if we didn't have TFmode patterns. */
23007 void
23008 x86_emit_floatuns (rtx operands[2])
23010 rtx neglab, donelab, i0, i1, f0, in, out;
23011 enum machine_mode mode, inmode;
23013 inmode = GET_MODE (operands[1]);
23014 gcc_assert (inmode == SImode || inmode == DImode);
23016 out = operands[0];
23017 in = force_reg (inmode, operands[1]);
23018 mode = GET_MODE (out);
23019 neglab = gen_label_rtx ();
23020 donelab = gen_label_rtx ();
23021 f0 = gen_reg_rtx (mode);
23023 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23025 expand_float (out, in, 0);
23027 emit_jump_insn (gen_jump (donelab));
23028 emit_barrier ();
23030 emit_label (neglab);
23032 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23033 1, OPTAB_DIRECT);
23034 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23035 1, OPTAB_DIRECT);
23036 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23038 expand_float (f0, i0, 0);
23040 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23042 emit_label (donelab);
23045 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23046 with all elements equal to VAR. Return true if successful. */
23048 static bool
23049 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23050 rtx target, rtx val)
23052 enum machine_mode smode, wsmode, wvmode;
23053 rtx x;
23055 switch (mode)
23057 case V2SImode:
23058 case V2SFmode:
23059 if (!mmx_ok)
23060 return false;
23061 /* FALLTHRU */
23063 case V2DFmode:
23064 case V2DImode:
23065 case V4SFmode:
23066 case V4SImode:
23067 val = force_reg (GET_MODE_INNER (mode), val);
23068 x = gen_rtx_VEC_DUPLICATE (mode, val);
23069 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23070 return true;
23072 case V4HImode:
23073 if (!mmx_ok)
23074 return false;
23075 if (TARGET_SSE || TARGET_3DNOW_A)
23077 val = gen_lowpart (SImode, val);
23078 x = gen_rtx_TRUNCATE (HImode, val);
23079 x = gen_rtx_VEC_DUPLICATE (mode, x);
23080 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23081 return true;
23083 else
23085 smode = HImode;
23086 wsmode = SImode;
23087 wvmode = V2SImode;
23088 goto widen;
23091 case V8QImode:
23092 if (!mmx_ok)
23093 return false;
23094 smode = QImode;
23095 wsmode = HImode;
23096 wvmode = V4HImode;
23097 goto widen;
23098 case V8HImode:
23099 if (TARGET_SSE2)
23101 rtx tmp1, tmp2;
23102 /* Extend HImode to SImode using a paradoxical SUBREG. */
23103 tmp1 = gen_reg_rtx (SImode);
23104 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23105 /* Insert the SImode value as low element of V4SImode vector. */
23106 tmp2 = gen_reg_rtx (V4SImode);
23107 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23108 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23109 CONST0_RTX (V4SImode),
23110 const1_rtx);
23111 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23112 /* Cast the V4SImode vector back to a V8HImode vector. */
23113 tmp1 = gen_reg_rtx (V8HImode);
23114 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23115 /* Duplicate the low short through the whole low SImode word. */
23116 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23117 /* Cast the V8HImode vector back to a V4SImode vector. */
23118 tmp2 = gen_reg_rtx (V4SImode);
23119 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23120 /* Replicate the low element of the V4SImode vector. */
23121 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23122 /* Cast the V2SImode back to V8HImode, and store in target. */
23123 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23124 return true;
23126 smode = HImode;
23127 wsmode = SImode;
23128 wvmode = V4SImode;
23129 goto widen;
23130 case V16QImode:
23131 if (TARGET_SSE2)
23133 rtx tmp1, tmp2;
23134 /* Extend QImode to SImode using a paradoxical SUBREG. */
23135 tmp1 = gen_reg_rtx (SImode);
23136 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23137 /* Insert the SImode value as low element of V4SImode vector. */
23138 tmp2 = gen_reg_rtx (V4SImode);
23139 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23140 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23141 CONST0_RTX (V4SImode),
23142 const1_rtx);
23143 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23144 /* Cast the V4SImode vector back to a V16QImode vector. */
23145 tmp1 = gen_reg_rtx (V16QImode);
23146 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23147 /* Duplicate the low byte through the whole low SImode word. */
23148 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23149 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23150 /* Cast the V16QImode vector back to a V4SImode vector. */
23151 tmp2 = gen_reg_rtx (V4SImode);
23152 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23153 /* Replicate the low element of the V4SImode vector. */
23154 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23155 /* Cast the V2SImode back to V16QImode, and store in target. */
23156 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23157 return true;
23159 smode = QImode;
23160 wsmode = HImode;
23161 wvmode = V8HImode;
23162 goto widen;
23163 widen:
23164 /* Replicate the value once into the next wider mode and recurse. */
23165 val = convert_modes (wsmode, smode, val, true);
23166 x = expand_simple_binop (wsmode, ASHIFT, val,
23167 GEN_INT (GET_MODE_BITSIZE (smode)),
23168 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23169 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23171 x = gen_reg_rtx (wvmode);
23172 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23173 gcc_unreachable ();
23174 emit_move_insn (target, gen_lowpart (mode, x));
23175 return true;
23177 default:
23178 return false;
23182 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23183 whose ONE_VAR element is VAR, and other elements are zero. Return true
23184 if successful. */
23186 static bool
23187 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23188 rtx target, rtx var, int one_var)
23190 enum machine_mode vsimode;
23191 rtx new_target;
23192 rtx x, tmp;
23194 switch (mode)
23196 case V2SFmode:
23197 case V2SImode:
23198 if (!mmx_ok)
23199 return false;
23200 /* FALLTHRU */
23202 case V2DFmode:
23203 case V2DImode:
23204 if (one_var != 0)
23205 return false;
23206 var = force_reg (GET_MODE_INNER (mode), var);
23207 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23208 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23209 return true;
23211 case V4SFmode:
23212 case V4SImode:
23213 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23214 new_target = gen_reg_rtx (mode);
23215 else
23216 new_target = target;
23217 var = force_reg (GET_MODE_INNER (mode), var);
23218 x = gen_rtx_VEC_DUPLICATE (mode, var);
23219 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23220 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23221 if (one_var != 0)
23223 /* We need to shuffle the value to the correct position, so
23224 create a new pseudo to store the intermediate result. */
23226 /* With SSE2, we can use the integer shuffle insns. */
23227 if (mode != V4SFmode && TARGET_SSE2)
23229 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23230 GEN_INT (1),
23231 GEN_INT (one_var == 1 ? 0 : 1),
23232 GEN_INT (one_var == 2 ? 0 : 1),
23233 GEN_INT (one_var == 3 ? 0 : 1)));
23234 if (target != new_target)
23235 emit_move_insn (target, new_target);
23236 return true;
23239 /* Otherwise convert the intermediate result to V4SFmode and
23240 use the SSE1 shuffle instructions. */
23241 if (mode != V4SFmode)
23243 tmp = gen_reg_rtx (V4SFmode);
23244 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23246 else
23247 tmp = new_target;
23249 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23250 GEN_INT (1),
23251 GEN_INT (one_var == 1 ? 0 : 1),
23252 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23253 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23255 if (mode != V4SFmode)
23256 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23257 else if (tmp != target)
23258 emit_move_insn (target, tmp);
23260 else if (target != new_target)
23261 emit_move_insn (target, new_target);
23262 return true;
23264 case V8HImode:
23265 case V16QImode:
23266 vsimode = V4SImode;
23267 goto widen;
23268 case V4HImode:
23269 case V8QImode:
23270 if (!mmx_ok)
23271 return false;
23272 vsimode = V2SImode;
23273 goto widen;
23274 widen:
23275 if (one_var != 0)
23276 return false;
23278 /* Zero extend the variable element to SImode and recurse. */
23279 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23281 x = gen_reg_rtx (vsimode);
23282 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23283 var, one_var))
23284 gcc_unreachable ();
23286 emit_move_insn (target, gen_lowpart (mode, x));
23287 return true;
23289 default:
23290 return false;
23294 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23295 consisting of the values in VALS. It is known that all elements
23296 except ONE_VAR are constants. Return true if successful. */
23298 static bool
23299 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23300 rtx target, rtx vals, int one_var)
23302 rtx var = XVECEXP (vals, 0, one_var);
23303 enum machine_mode wmode;
23304 rtx const_vec, x;
23306 const_vec = copy_rtx (vals);
23307 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23308 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23310 switch (mode)
23312 case V2DFmode:
23313 case V2DImode:
23314 case V2SFmode:
23315 case V2SImode:
23316 /* For the two element vectors, it's just as easy to use
23317 the general case. */
23318 return false;
23320 case V4SFmode:
23321 case V4SImode:
23322 case V8HImode:
23323 case V4HImode:
23324 break;
23326 case V16QImode:
23327 wmode = V8HImode;
23328 goto widen;
23329 case V8QImode:
23330 wmode = V4HImode;
23331 goto widen;
23332 widen:
23333 /* There's no way to set one QImode entry easily. Combine
23334 the variable value with its adjacent constant value, and
23335 promote to an HImode set. */
23336 x = XVECEXP (vals, 0, one_var ^ 1);
23337 if (one_var & 1)
23339 var = convert_modes (HImode, QImode, var, true);
23340 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23341 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23342 x = GEN_INT (INTVAL (x) & 0xff);
23344 else
23346 var = convert_modes (HImode, QImode, var, true);
23347 x = gen_int_mode (INTVAL (x) << 8, HImode);
23349 if (x != const0_rtx)
23350 var = expand_simple_binop (HImode, IOR, var, x, var,
23351 1, OPTAB_LIB_WIDEN);
23353 x = gen_reg_rtx (wmode);
23354 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23355 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23357 emit_move_insn (target, gen_lowpart (mode, x));
23358 return true;
23360 default:
23361 return false;
23364 emit_move_insn (target, const_vec);
23365 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23366 return true;
23369 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23370 all values variable, and none identical. */
23372 static void
23373 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23374 rtx target, rtx vals)
23376 enum machine_mode half_mode = GET_MODE_INNER (mode);
23377 rtx op0 = NULL, op1 = NULL;
23378 bool use_vec_concat = false;
23380 switch (mode)
23382 case V2SFmode:
23383 case V2SImode:
23384 if (!mmx_ok && !TARGET_SSE)
23385 break;
23386 /* FALLTHRU */
23388 case V2DFmode:
23389 case V2DImode:
23390 /* For the two element vectors, we always implement VEC_CONCAT. */
23391 op0 = XVECEXP (vals, 0, 0);
23392 op1 = XVECEXP (vals, 0, 1);
23393 use_vec_concat = true;
23394 break;
23396 case V4SFmode:
23397 half_mode = V2SFmode;
23398 goto half;
23399 case V4SImode:
23400 half_mode = V2SImode;
23401 goto half;
23402 half:
23404 rtvec v;
23406 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23407 Recurse to load the two halves. */
23409 op0 = gen_reg_rtx (half_mode);
23410 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23411 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23413 op1 = gen_reg_rtx (half_mode);
23414 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23415 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23417 use_vec_concat = true;
23419 break;
23421 case V8HImode:
23422 case V16QImode:
23423 case V4HImode:
23424 case V8QImode:
23425 break;
23427 default:
23428 gcc_unreachable ();
23431 if (use_vec_concat)
23433 if (!register_operand (op0, half_mode))
23434 op0 = force_reg (half_mode, op0);
23435 if (!register_operand (op1, half_mode))
23436 op1 = force_reg (half_mode, op1);
23438 emit_insn (gen_rtx_SET (VOIDmode, target,
23439 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23441 else
23443 int i, j, n_elts, n_words, n_elt_per_word;
23444 enum machine_mode inner_mode;
23445 rtx words[4], shift;
23447 inner_mode = GET_MODE_INNER (mode);
23448 n_elts = GET_MODE_NUNITS (mode);
23449 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23450 n_elt_per_word = n_elts / n_words;
23451 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23453 for (i = 0; i < n_words; ++i)
23455 rtx word = NULL_RTX;
23457 for (j = 0; j < n_elt_per_word; ++j)
23459 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23460 elt = convert_modes (word_mode, inner_mode, elt, true);
23462 if (j == 0)
23463 word = elt;
23464 else
23466 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23467 word, 1, OPTAB_LIB_WIDEN);
23468 word = expand_simple_binop (word_mode, IOR, word, elt,
23469 word, 1, OPTAB_LIB_WIDEN);
23473 words[i] = word;
23476 if (n_words == 1)
23477 emit_move_insn (target, gen_lowpart (mode, words[0]));
23478 else if (n_words == 2)
23480 rtx tmp = gen_reg_rtx (mode);
23481 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23482 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23483 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23484 emit_move_insn (target, tmp);
23486 else if (n_words == 4)
23488 rtx tmp = gen_reg_rtx (V4SImode);
23489 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23490 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23491 emit_move_insn (target, gen_lowpart (mode, tmp));
23493 else
23494 gcc_unreachable ();
23498 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23499 instructions unless MMX_OK is true. */
23501 void
23502 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23504 enum machine_mode mode = GET_MODE (target);
23505 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23506 int n_elts = GET_MODE_NUNITS (mode);
23507 int n_var = 0, one_var = -1;
23508 bool all_same = true, all_const_zero = true;
23509 int i;
23510 rtx x;
23512 for (i = 0; i < n_elts; ++i)
23514 x = XVECEXP (vals, 0, i);
23515 if (!CONSTANT_P (x))
23516 n_var++, one_var = i;
23517 else if (x != CONST0_RTX (inner_mode))
23518 all_const_zero = false;
23519 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23520 all_same = false;
23523 /* Constants are best loaded from the constant pool. */
23524 if (n_var == 0)
23526 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23527 return;
23530 /* If all values are identical, broadcast the value. */
23531 if (all_same
23532 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23533 XVECEXP (vals, 0, 0)))
23534 return;
23536 /* Values where only one field is non-constant are best loaded from
23537 the pool and overwritten via move later. */
23538 if (n_var == 1)
23540 if (all_const_zero
23541 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23542 XVECEXP (vals, 0, one_var),
23543 one_var))
23544 return;
23546 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23547 return;
23550 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
23553 void
23554 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
23556 enum machine_mode mode = GET_MODE (target);
23557 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23558 bool use_vec_merge = false;
23559 rtx tmp;
23561 switch (mode)
23563 case V2SFmode:
23564 case V2SImode:
23565 if (mmx_ok)
23567 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
23568 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
23569 if (elt == 0)
23570 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
23571 else
23572 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
23573 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23574 return;
23576 break;
23578 case V2DImode:
23579 use_vec_merge = TARGET_SSE4_1;
23580 if (use_vec_merge)
23581 break;
23583 case V2DFmode:
23585 rtx op0, op1;
23587 /* For the two element vectors, we implement a VEC_CONCAT with
23588 the extraction of the other element. */
23590 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
23591 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
23593 if (elt == 0)
23594 op0 = val, op1 = tmp;
23595 else
23596 op0 = tmp, op1 = val;
23598 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
23599 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23601 return;
23603 case V4SFmode:
23604 use_vec_merge = TARGET_SSE4_1;
23605 if (use_vec_merge)
23606 break;
23608 switch (elt)
23610 case 0:
23611 use_vec_merge = true;
23612 break;
23614 case 1:
23615 /* tmp = target = A B C D */
23616 tmp = copy_to_reg (target);
23617 /* target = A A B B */
23618 emit_insn (gen_sse_unpcklps (target, target, target));
23619 /* target = X A B B */
23620 ix86_expand_vector_set (false, target, val, 0);
23621 /* target = A X C D */
23622 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23623 GEN_INT (1), GEN_INT (0),
23624 GEN_INT (2+4), GEN_INT (3+4)));
23625 return;
23627 case 2:
23628 /* tmp = target = A B C D */
23629 tmp = copy_to_reg (target);
23630 /* tmp = X B C D */
23631 ix86_expand_vector_set (false, tmp, val, 0);
23632 /* target = A B X D */
23633 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23634 GEN_INT (0), GEN_INT (1),
23635 GEN_INT (0+4), GEN_INT (3+4)));
23636 return;
23638 case 3:
23639 /* tmp = target = A B C D */
23640 tmp = copy_to_reg (target);
23641 /* tmp = X B C D */
23642 ix86_expand_vector_set (false, tmp, val, 0);
23643 /* target = A B X D */
23644 emit_insn (gen_sse_shufps_1 (target, target, tmp,
23645 GEN_INT (0), GEN_INT (1),
23646 GEN_INT (2+4), GEN_INT (0+4)));
23647 return;
23649 default:
23650 gcc_unreachable ();
23652 break;
23654 case V4SImode:
23655 use_vec_merge = TARGET_SSE4_1;
23656 if (use_vec_merge)
23657 break;
23659 /* Element 0 handled by vec_merge below. */
23660 if (elt == 0)
23662 use_vec_merge = true;
23663 break;
23666 if (TARGET_SSE2)
23668 /* With SSE2, use integer shuffles to swap element 0 and ELT,
23669 store into element 0, then shuffle them back. */
23671 rtx order[4];
23673 order[0] = GEN_INT (elt);
23674 order[1] = const1_rtx;
23675 order[2] = const2_rtx;
23676 order[3] = GEN_INT (3);
23677 order[elt] = const0_rtx;
23679 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23680 order[1], order[2], order[3]));
23682 ix86_expand_vector_set (false, target, val, 0);
23684 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
23685 order[1], order[2], order[3]));
23687 else
23689 /* For SSE1, we have to reuse the V4SF code. */
23690 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
23691 gen_lowpart (SFmode, val), elt);
23693 return;
23695 case V8HImode:
23696 use_vec_merge = TARGET_SSE2;
23697 break;
23698 case V4HImode:
23699 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23700 break;
23702 case V16QImode:
23703 use_vec_merge = TARGET_SSE4_1;
23704 break;
23706 case V8QImode:
23707 default:
23708 break;
23711 if (use_vec_merge)
23713 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
23714 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
23715 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23717 else
23719 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23721 emit_move_insn (mem, target);
23723 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23724 emit_move_insn (tmp, val);
23726 emit_move_insn (target, mem);
23730 void
23731 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
23733 enum machine_mode mode = GET_MODE (vec);
23734 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23735 bool use_vec_extr = false;
23736 rtx tmp;
23738 switch (mode)
23740 case V2SImode:
23741 case V2SFmode:
23742 if (!mmx_ok)
23743 break;
23744 /* FALLTHRU */
23746 case V2DFmode:
23747 case V2DImode:
23748 use_vec_extr = true;
23749 break;
23751 case V4SFmode:
23752 use_vec_extr = TARGET_SSE4_1;
23753 if (use_vec_extr)
23754 break;
23756 switch (elt)
23758 case 0:
23759 tmp = vec;
23760 break;
23762 case 1:
23763 case 3:
23764 tmp = gen_reg_rtx (mode);
23765 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
23766 GEN_INT (elt), GEN_INT (elt),
23767 GEN_INT (elt+4), GEN_INT (elt+4)));
23768 break;
23770 case 2:
23771 tmp = gen_reg_rtx (mode);
23772 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
23773 break;
23775 default:
23776 gcc_unreachable ();
23778 vec = tmp;
23779 use_vec_extr = true;
23780 elt = 0;
23781 break;
23783 case V4SImode:
23784 use_vec_extr = TARGET_SSE4_1;
23785 if (use_vec_extr)
23786 break;
23788 if (TARGET_SSE2)
23790 switch (elt)
23792 case 0:
23793 tmp = vec;
23794 break;
23796 case 1:
23797 case 3:
23798 tmp = gen_reg_rtx (mode);
23799 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
23800 GEN_INT (elt), GEN_INT (elt),
23801 GEN_INT (elt), GEN_INT (elt)));
23802 break;
23804 case 2:
23805 tmp = gen_reg_rtx (mode);
23806 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
23807 break;
23809 default:
23810 gcc_unreachable ();
23812 vec = tmp;
23813 use_vec_extr = true;
23814 elt = 0;
23816 else
23818 /* For SSE1, we have to reuse the V4SF code. */
23819 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
23820 gen_lowpart (V4SFmode, vec), elt);
23821 return;
23823 break;
23825 case V8HImode:
23826 use_vec_extr = TARGET_SSE2;
23827 break;
23828 case V4HImode:
23829 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
23830 break;
23832 case V16QImode:
23833 use_vec_extr = TARGET_SSE4_1;
23834 break;
23836 case V8QImode:
23837 /* ??? Could extract the appropriate HImode element and shift. */
23838 default:
23839 break;
23842 if (use_vec_extr)
23844 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
23845 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
23847 /* Let the rtl optimizers know about the zero extension performed. */
23848 if (inner_mode == QImode || inner_mode == HImode)
23850 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
23851 target = gen_lowpart (SImode, target);
23854 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23856 else
23858 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
23860 emit_move_insn (mem, vec);
23862 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
23863 emit_move_insn (target, tmp);
23867 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
23868 pattern to reduce; DEST is the destination; IN is the input vector. */
23870 void
23871 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
23873 rtx tmp1, tmp2, tmp3;
23875 tmp1 = gen_reg_rtx (V4SFmode);
23876 tmp2 = gen_reg_rtx (V4SFmode);
23877 tmp3 = gen_reg_rtx (V4SFmode);
23879 emit_insn (gen_sse_movhlps (tmp1, in, in));
23880 emit_insn (fn (tmp2, tmp1, in));
23882 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
23883 GEN_INT (1), GEN_INT (1),
23884 GEN_INT (1+4), GEN_INT (1+4)));
23885 emit_insn (fn (dest, tmp2, tmp3));
23888 /* Target hook for scalar_mode_supported_p. */
23889 static bool
23890 ix86_scalar_mode_supported_p (enum machine_mode mode)
23892 if (DECIMAL_FLOAT_MODE_P (mode))
23893 return true;
23894 else if (mode == TFmode)
23895 return TARGET_64BIT;
23896 else
23897 return default_scalar_mode_supported_p (mode);
23900 /* Implements target hook vector_mode_supported_p. */
23901 static bool
23902 ix86_vector_mode_supported_p (enum machine_mode mode)
23904 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
23905 return true;
23906 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
23907 return true;
23908 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
23909 return true;
23910 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
23911 return true;
23912 return false;
23915 /* Target hook for c_mode_for_suffix. */
23916 static enum machine_mode
23917 ix86_c_mode_for_suffix (char suffix)
23919 if (TARGET_64BIT && suffix == 'q')
23920 return TFmode;
23921 if (TARGET_MMX && suffix == 'w')
23922 return XFmode;
23924 return VOIDmode;
23927 /* Worker function for TARGET_MD_ASM_CLOBBERS.
23929 We do this in the new i386 backend to maintain source compatibility
23930 with the old cc0-based compiler. */
23932 static tree
23933 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
23934 tree inputs ATTRIBUTE_UNUSED,
23935 tree clobbers)
23937 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
23938 clobbers);
23939 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
23940 clobbers);
23941 return clobbers;
23944 /* Implements target vector targetm.asm.encode_section_info. This
23945 is not used by netware. */
23947 static void ATTRIBUTE_UNUSED
23948 ix86_encode_section_info (tree decl, rtx rtl, int first)
23950 default_encode_section_info (decl, rtl, first);
23952 if (TREE_CODE (decl) == VAR_DECL
23953 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
23954 && ix86_in_large_data_p (decl))
23955 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
23958 /* Worker function for REVERSE_CONDITION. */
23960 enum rtx_code
23961 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
23963 return (mode != CCFPmode && mode != CCFPUmode
23964 ? reverse_condition (code)
23965 : reverse_condition_maybe_unordered (code));
23968 /* Output code to perform an x87 FP register move, from OPERANDS[1]
23969 to OPERANDS[0]. */
23971 const char *
23972 output_387_reg_move (rtx insn, rtx *operands)
23974 if (REG_P (operands[0]))
23976 if (REG_P (operands[1])
23977 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
23979 if (REGNO (operands[0]) == FIRST_STACK_REG)
23980 return output_387_ffreep (operands, 0);
23981 return "fstp\t%y0";
23983 if (STACK_TOP_P (operands[0]))
23984 return "fld%z1\t%y1";
23985 return "fst\t%y0";
23987 else if (MEM_P (operands[0]))
23989 gcc_assert (REG_P (operands[1]));
23990 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
23991 return "fstp%z0\t%y0";
23992 else
23994 /* There is no non-popping store to memory for XFmode.
23995 So if we need one, follow the store with a load. */
23996 if (GET_MODE (operands[0]) == XFmode)
23997 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
23998 else
23999 return "fst%z0\t%y0";
24002 else
24003 gcc_unreachable();
24006 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24007 FP status register is set. */
24009 void
24010 ix86_emit_fp_unordered_jump (rtx label)
24012 rtx reg = gen_reg_rtx (HImode);
24013 rtx temp;
24015 emit_insn (gen_x86_fnstsw_1 (reg));
24017 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24019 emit_insn (gen_x86_sahf_1 (reg));
24021 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24022 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24024 else
24026 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24028 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24029 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24032 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24033 gen_rtx_LABEL_REF (VOIDmode, label),
24034 pc_rtx);
24035 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24037 emit_jump_insn (temp);
24038 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24041 /* Output code to perform a log1p XFmode calculation. */
24043 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24045 rtx label1 = gen_label_rtx ();
24046 rtx label2 = gen_label_rtx ();
24048 rtx tmp = gen_reg_rtx (XFmode);
24049 rtx tmp2 = gen_reg_rtx (XFmode);
24051 emit_insn (gen_absxf2 (tmp, op1));
24052 emit_insn (gen_cmpxf (tmp,
24053 CONST_DOUBLE_FROM_REAL_VALUE (
24054 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24055 XFmode)));
24056 emit_jump_insn (gen_bge (label1));
24058 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24059 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24060 emit_jump (label2);
24062 emit_label (label1);
24063 emit_move_insn (tmp, CONST1_RTX (XFmode));
24064 emit_insn (gen_addxf3 (tmp, op1, tmp));
24065 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24066 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24068 emit_label (label2);
24071 /* Output code to perform a Newton-Rhapson approximation of a single precision
24072 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24074 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24076 rtx x0, x1, e0, e1, two;
24078 x0 = gen_reg_rtx (mode);
24079 e0 = gen_reg_rtx (mode);
24080 e1 = gen_reg_rtx (mode);
24081 x1 = gen_reg_rtx (mode);
24083 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24085 if (VECTOR_MODE_P (mode))
24086 two = ix86_build_const_vector (SFmode, true, two);
24088 two = force_reg (mode, two);
24090 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24092 /* x0 = 1./b estimate */
24093 emit_insn (gen_rtx_SET (VOIDmode, x0,
24094 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24095 UNSPEC_RCP)));
24096 /* e0 = x0 * b */
24097 emit_insn (gen_rtx_SET (VOIDmode, e0,
24098 gen_rtx_MULT (mode, x0, b)));
24099 /* e1 = 2. - e0 */
24100 emit_insn (gen_rtx_SET (VOIDmode, e1,
24101 gen_rtx_MINUS (mode, two, e0)));
24102 /* x1 = x0 * e1 */
24103 emit_insn (gen_rtx_SET (VOIDmode, x1,
24104 gen_rtx_MULT (mode, x0, e1)));
24105 /* res = a * x1 */
24106 emit_insn (gen_rtx_SET (VOIDmode, res,
24107 gen_rtx_MULT (mode, a, x1)));
24110 /* Output code to perform a Newton-Rhapson approximation of a
24111 single precision floating point [reciprocal] square root. */
24113 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24114 bool recip)
24116 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
24118 x0 = gen_reg_rtx (mode);
24119 e0 = gen_reg_rtx (mode);
24120 e1 = gen_reg_rtx (mode);
24121 e2 = gen_reg_rtx (mode);
24122 e3 = gen_reg_rtx (mode);
24124 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
24125 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
24127 mask = gen_reg_rtx (mode);
24129 if (VECTOR_MODE_P (mode))
24131 three = ix86_build_const_vector (SFmode, true, three);
24132 half = ix86_build_const_vector (SFmode, true, half);
24135 three = force_reg (mode, three);
24136 half = force_reg (mode, half);
24138 zero = force_reg (mode, CONST0_RTX(mode));
24140 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
24141 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
24143 /* Compare a to zero. */
24144 emit_insn (gen_rtx_SET (VOIDmode, mask,
24145 gen_rtx_NE (mode, a, zero)));
24147 /* x0 = 1./sqrt(a) estimate */
24148 emit_insn (gen_rtx_SET (VOIDmode, x0,
24149 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24150 UNSPEC_RSQRT)));
24151 /* Filter out infinity. */
24152 if (VECTOR_MODE_P (mode))
24153 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
24154 gen_rtx_AND (mode,
24155 gen_lowpart (V4SFmode, x0),
24156 gen_lowpart (V4SFmode, mask))));
24157 else
24158 emit_insn (gen_rtx_SET (VOIDmode, x0,
24159 gen_rtx_AND (mode, x0, mask)));
24161 /* e0 = x0 * a */
24162 emit_insn (gen_rtx_SET (VOIDmode, e0,
24163 gen_rtx_MULT (mode, x0, a)));
24164 /* e1 = e0 * x0 */
24165 emit_insn (gen_rtx_SET (VOIDmode, e1,
24166 gen_rtx_MULT (mode, e0, x0)));
24167 /* e2 = 3. - e1 */
24168 emit_insn (gen_rtx_SET (VOIDmode, e2,
24169 gen_rtx_MINUS (mode, three, e1)));
24170 if (recip)
24171 /* e3 = .5 * x0 */
24172 emit_insn (gen_rtx_SET (VOIDmode, e3,
24173 gen_rtx_MULT (mode, half, x0)));
24174 else
24175 /* e3 = .5 * e0 */
24176 emit_insn (gen_rtx_SET (VOIDmode, e3,
24177 gen_rtx_MULT (mode, half, e0)));
24178 /* ret = e2 * e3 */
24179 emit_insn (gen_rtx_SET (VOIDmode, res,
24180 gen_rtx_MULT (mode, e2, e3)));
24183 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24185 static void ATTRIBUTE_UNUSED
24186 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24187 tree decl)
24189 /* With Binutils 2.15, the "@unwind" marker must be specified on
24190 every occurrence of the ".eh_frame" section, not just the first
24191 one. */
24192 if (TARGET_64BIT
24193 && strcmp (name, ".eh_frame") == 0)
24195 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24196 flags & SECTION_WRITE ? "aw" : "a");
24197 return;
24199 default_elf_asm_named_section (name, flags, decl);
24202 /* Return the mangling of TYPE if it is an extended fundamental type. */
24204 static const char *
24205 ix86_mangle_type (const_tree type)
24207 type = TYPE_MAIN_VARIANT (type);
24209 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24210 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24211 return NULL;
24213 switch (TYPE_MODE (type))
24215 case TFmode:
24216 /* __float128 is "g". */
24217 return "g";
24218 case XFmode:
24219 /* "long double" or __float80 is "e". */
24220 return "e";
24221 default:
24222 return NULL;
24226 /* For 32-bit code we can save PIC register setup by using
24227 __stack_chk_fail_local hidden function instead of calling
24228 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24229 register, so it is better to call __stack_chk_fail directly. */
24231 static tree
24232 ix86_stack_protect_fail (void)
24234 return TARGET_64BIT
24235 ? default_external_stack_protect_fail ()
24236 : default_hidden_stack_protect_fail ();
24239 /* Select a format to encode pointers in exception handling data. CODE
24240 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24241 true if the symbol may be affected by dynamic relocations.
24243 ??? All x86 object file formats are capable of representing this.
24244 After all, the relocation needed is the same as for the call insn.
24245 Whether or not a particular assembler allows us to enter such, I
24246 guess we'll have to see. */
24248 asm_preferred_eh_data_format (int code, int global)
24250 if (flag_pic)
24252 int type = DW_EH_PE_sdata8;
24253 if (!TARGET_64BIT
24254 || ix86_cmodel == CM_SMALL_PIC
24255 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24256 type = DW_EH_PE_sdata4;
24257 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24259 if (ix86_cmodel == CM_SMALL
24260 || (ix86_cmodel == CM_MEDIUM && code))
24261 return DW_EH_PE_udata4;
24262 return DW_EH_PE_absptr;
24265 /* Expand copysign from SIGN to the positive value ABS_VALUE
24266 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24267 the sign-bit. */
24268 static void
24269 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24271 enum machine_mode mode = GET_MODE (sign);
24272 rtx sgn = gen_reg_rtx (mode);
24273 if (mask == NULL_RTX)
24275 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24276 if (!VECTOR_MODE_P (mode))
24278 /* We need to generate a scalar mode mask in this case. */
24279 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24280 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24281 mask = gen_reg_rtx (mode);
24282 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24285 else
24286 mask = gen_rtx_NOT (mode, mask);
24287 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24288 gen_rtx_AND (mode, mask, sign)));
24289 emit_insn (gen_rtx_SET (VOIDmode, result,
24290 gen_rtx_IOR (mode, abs_value, sgn)));
24293 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24294 mask for masking out the sign-bit is stored in *SMASK, if that is
24295 non-null. */
24296 static rtx
24297 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24299 enum machine_mode mode = GET_MODE (op0);
24300 rtx xa, mask;
24302 xa = gen_reg_rtx (mode);
24303 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24304 if (!VECTOR_MODE_P (mode))
24306 /* We need to generate a scalar mode mask in this case. */
24307 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24308 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24309 mask = gen_reg_rtx (mode);
24310 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24312 emit_insn (gen_rtx_SET (VOIDmode, xa,
24313 gen_rtx_AND (mode, op0, mask)));
24315 if (smask)
24316 *smask = mask;
24318 return xa;
24321 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24322 swapping the operands if SWAP_OPERANDS is true. The expanded
24323 code is a forward jump to a newly created label in case the
24324 comparison is true. The generated label rtx is returned. */
24325 static rtx
24326 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24327 bool swap_operands)
24329 rtx label, tmp;
24331 if (swap_operands)
24333 tmp = op0;
24334 op0 = op1;
24335 op1 = tmp;
24338 label = gen_label_rtx ();
24339 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24340 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24341 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24342 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24343 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24344 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24345 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24346 JUMP_LABEL (tmp) = label;
24348 return label;
24351 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24352 using comparison code CODE. Operands are swapped for the comparison if
24353 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24354 static rtx
24355 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24356 bool swap_operands)
24358 enum machine_mode mode = GET_MODE (op0);
24359 rtx mask = gen_reg_rtx (mode);
24361 if (swap_operands)
24363 rtx tmp = op0;
24364 op0 = op1;
24365 op1 = tmp;
24368 if (mode == DFmode)
24369 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24370 gen_rtx_fmt_ee (code, mode, op0, op1)));
24371 else
24372 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24373 gen_rtx_fmt_ee (code, mode, op0, op1)));
24375 return mask;
24378 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24379 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24380 static rtx
24381 ix86_gen_TWO52 (enum machine_mode mode)
24383 REAL_VALUE_TYPE TWO52r;
24384 rtx TWO52;
24386 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24387 TWO52 = const_double_from_real_value (TWO52r, mode);
24388 TWO52 = force_reg (mode, TWO52);
24390 return TWO52;
24393 /* Expand SSE sequence for computing lround from OP1 storing
24394 into OP0. */
24395 void
24396 ix86_expand_lround (rtx op0, rtx op1)
24398 /* C code for the stuff we're doing below:
24399 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24400 return (long)tmp;
24402 enum machine_mode mode = GET_MODE (op1);
24403 const struct real_format *fmt;
24404 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24405 rtx adj;
24407 /* load nextafter (0.5, 0.0) */
24408 fmt = REAL_MODE_FORMAT (mode);
24409 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24410 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24412 /* adj = copysign (0.5, op1) */
24413 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24414 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24416 /* adj = op1 + adj */
24417 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24419 /* op0 = (imode)adj */
24420 expand_fix (op0, adj, 0);
24423 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24424 into OPERAND0. */
24425 void
24426 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24428 /* C code for the stuff we're doing below (for do_floor):
24429 xi = (long)op1;
24430 xi -= (double)xi > op1 ? 1 : 0;
24431 return xi;
24433 enum machine_mode fmode = GET_MODE (op1);
24434 enum machine_mode imode = GET_MODE (op0);
24435 rtx ireg, freg, label, tmp;
24437 /* reg = (long)op1 */
24438 ireg = gen_reg_rtx (imode);
24439 expand_fix (ireg, op1, 0);
24441 /* freg = (double)reg */
24442 freg = gen_reg_rtx (fmode);
24443 expand_float (freg, ireg, 0);
24445 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24446 label = ix86_expand_sse_compare_and_jump (UNLE,
24447 freg, op1, !do_floor);
24448 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24449 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24450 emit_move_insn (ireg, tmp);
24452 emit_label (label);
24453 LABEL_NUSES (label) = 1;
24455 emit_move_insn (op0, ireg);
24458 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24459 result in OPERAND0. */
24460 void
24461 ix86_expand_rint (rtx operand0, rtx operand1)
24463 /* C code for the stuff we're doing below:
24464 xa = fabs (operand1);
24465 if (!isless (xa, 2**52))
24466 return operand1;
24467 xa = xa + 2**52 - 2**52;
24468 return copysign (xa, operand1);
24470 enum machine_mode mode = GET_MODE (operand0);
24471 rtx res, xa, label, TWO52, mask;
24473 res = gen_reg_rtx (mode);
24474 emit_move_insn (res, operand1);
24476 /* xa = abs (operand1) */
24477 xa = ix86_expand_sse_fabs (res, &mask);
24479 /* if (!isless (xa, TWO52)) goto label; */
24480 TWO52 = ix86_gen_TWO52 (mode);
24481 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24483 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24484 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24486 ix86_sse_copysign_to_positive (res, xa, res, mask);
24488 emit_label (label);
24489 LABEL_NUSES (label) = 1;
24491 emit_move_insn (operand0, res);
24494 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24495 into OPERAND0. */
24496 void
24497 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24499 /* C code for the stuff we expand below.
24500 double xa = fabs (x), x2;
24501 if (!isless (xa, TWO52))
24502 return x;
24503 xa = xa + TWO52 - TWO52;
24504 x2 = copysign (xa, x);
24505 Compensate. Floor:
24506 if (x2 > x)
24507 x2 -= 1;
24508 Compensate. Ceil:
24509 if (x2 < x)
24510 x2 -= -1;
24511 return x2;
24513 enum machine_mode mode = GET_MODE (operand0);
24514 rtx xa, TWO52, tmp, label, one, res, mask;
24516 TWO52 = ix86_gen_TWO52 (mode);
24518 /* Temporary for holding the result, initialized to the input
24519 operand to ease control flow. */
24520 res = gen_reg_rtx (mode);
24521 emit_move_insn (res, operand1);
24523 /* xa = abs (operand1) */
24524 xa = ix86_expand_sse_fabs (res, &mask);
24526 /* if (!isless (xa, TWO52)) goto label; */
24527 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24529 /* xa = xa + TWO52 - TWO52; */
24530 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24531 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24533 /* xa = copysign (xa, operand1) */
24534 ix86_sse_copysign_to_positive (xa, xa, res, mask);
24536 /* generate 1.0 or -1.0 */
24537 one = force_reg (mode,
24538 const_double_from_real_value (do_floor
24539 ? dconst1 : dconstm1, mode));
24541 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24542 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24543 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24544 gen_rtx_AND (mode, one, tmp)));
24545 /* We always need to subtract here to preserve signed zero. */
24546 tmp = expand_simple_binop (mode, MINUS,
24547 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24548 emit_move_insn (res, tmp);
24550 emit_label (label);
24551 LABEL_NUSES (label) = 1;
24553 emit_move_insn (operand0, res);
24556 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24557 into OPERAND0. */
24558 void
24559 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
24561 /* C code for the stuff we expand below.
24562 double xa = fabs (x), x2;
24563 if (!isless (xa, TWO52))
24564 return x;
24565 x2 = (double)(long)x;
24566 Compensate. Floor:
24567 if (x2 > x)
24568 x2 -= 1;
24569 Compensate. Ceil:
24570 if (x2 < x)
24571 x2 += 1;
24572 if (HONOR_SIGNED_ZEROS (mode))
24573 return copysign (x2, x);
24574 return x2;
24576 enum machine_mode mode = GET_MODE (operand0);
24577 rtx xa, xi, TWO52, tmp, label, one, res, mask;
24579 TWO52 = ix86_gen_TWO52 (mode);
24581 /* Temporary for holding the result, initialized to the input
24582 operand to ease control flow. */
24583 res = gen_reg_rtx (mode);
24584 emit_move_insn (res, operand1);
24586 /* xa = abs (operand1) */
24587 xa = ix86_expand_sse_fabs (res, &mask);
24589 /* if (!isless (xa, TWO52)) goto label; */
24590 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24592 /* xa = (double)(long)x */
24593 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24594 expand_fix (xi, res, 0);
24595 expand_float (xa, xi, 0);
24597 /* generate 1.0 */
24598 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24600 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24601 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24602 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24603 gen_rtx_AND (mode, one, tmp)));
24604 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
24605 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24606 emit_move_insn (res, tmp);
24608 if (HONOR_SIGNED_ZEROS (mode))
24609 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24611 emit_label (label);
24612 LABEL_NUSES (label) = 1;
24614 emit_move_insn (operand0, res);
24617 /* Expand SSE sequence for computing round from OPERAND1 storing
24618 into OPERAND0. Sequence that works without relying on DImode truncation
24619 via cvttsd2siq that is only available on 64bit targets. */
24620 void
24621 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
24623 /* C code for the stuff we expand below.
24624 double xa = fabs (x), xa2, x2;
24625 if (!isless (xa, TWO52))
24626 return x;
24627 Using the absolute value and copying back sign makes
24628 -0.0 -> -0.0 correct.
24629 xa2 = xa + TWO52 - TWO52;
24630 Compensate.
24631 dxa = xa2 - xa;
24632 if (dxa <= -0.5)
24633 xa2 += 1;
24634 else if (dxa > 0.5)
24635 xa2 -= 1;
24636 x2 = copysign (xa2, x);
24637 return x2;
24639 enum machine_mode mode = GET_MODE (operand0);
24640 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
24642 TWO52 = ix86_gen_TWO52 (mode);
24644 /* Temporary for holding the result, initialized to the input
24645 operand to ease control flow. */
24646 res = gen_reg_rtx (mode);
24647 emit_move_insn (res, operand1);
24649 /* xa = abs (operand1) */
24650 xa = ix86_expand_sse_fabs (res, &mask);
24652 /* if (!isless (xa, TWO52)) goto label; */
24653 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24655 /* xa2 = xa + TWO52 - TWO52; */
24656 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24657 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
24659 /* dxa = xa2 - xa; */
24660 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
24662 /* generate 0.5, 1.0 and -0.5 */
24663 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
24664 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
24665 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
24666 0, OPTAB_DIRECT);
24668 /* Compensate. */
24669 tmp = gen_reg_rtx (mode);
24670 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
24671 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
24672 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24673 gen_rtx_AND (mode, one, tmp)));
24674 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24675 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
24676 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
24677 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24678 gen_rtx_AND (mode, one, tmp)));
24679 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24681 /* res = copysign (xa2, operand1) */
24682 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
24684 emit_label (label);
24685 LABEL_NUSES (label) = 1;
24687 emit_move_insn (operand0, res);
24690 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24691 into OPERAND0. */
24692 void
24693 ix86_expand_trunc (rtx operand0, rtx operand1)
24695 /* C code for SSE variant we expand below.
24696 double xa = fabs (x), x2;
24697 if (!isless (xa, TWO52))
24698 return x;
24699 x2 = (double)(long)x;
24700 if (HONOR_SIGNED_ZEROS (mode))
24701 return copysign (x2, x);
24702 return x2;
24704 enum machine_mode mode = GET_MODE (operand0);
24705 rtx xa, xi, TWO52, label, res, mask;
24707 TWO52 = ix86_gen_TWO52 (mode);
24709 /* Temporary for holding the result, initialized to the input
24710 operand to ease control flow. */
24711 res = gen_reg_rtx (mode);
24712 emit_move_insn (res, operand1);
24714 /* xa = abs (operand1) */
24715 xa = ix86_expand_sse_fabs (res, &mask);
24717 /* if (!isless (xa, TWO52)) goto label; */
24718 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24720 /* x = (double)(long)x */
24721 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24722 expand_fix (xi, res, 0);
24723 expand_float (res, xi, 0);
24725 if (HONOR_SIGNED_ZEROS (mode))
24726 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
24728 emit_label (label);
24729 LABEL_NUSES (label) = 1;
24731 emit_move_insn (operand0, res);
24734 /* Expand SSE sequence for computing trunc from OPERAND1 storing
24735 into OPERAND0. */
24736 void
24737 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
24739 enum machine_mode mode = GET_MODE (operand0);
24740 rtx xa, mask, TWO52, label, one, res, smask, tmp;
24742 /* C code for SSE variant we expand below.
24743 double xa = fabs (x), x2;
24744 if (!isless (xa, TWO52))
24745 return x;
24746 xa2 = xa + TWO52 - TWO52;
24747 Compensate:
24748 if (xa2 > xa)
24749 xa2 -= 1.0;
24750 x2 = copysign (xa2, x);
24751 return x2;
24754 TWO52 = ix86_gen_TWO52 (mode);
24756 /* Temporary for holding the result, initialized to the input
24757 operand to ease control flow. */
24758 res = gen_reg_rtx (mode);
24759 emit_move_insn (res, operand1);
24761 /* xa = abs (operand1) */
24762 xa = ix86_expand_sse_fabs (res, &smask);
24764 /* if (!isless (xa, TWO52)) goto label; */
24765 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24767 /* res = xa + TWO52 - TWO52; */
24768 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24769 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
24770 emit_move_insn (res, tmp);
24772 /* generate 1.0 */
24773 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
24775 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
24776 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
24777 emit_insn (gen_rtx_SET (VOIDmode, mask,
24778 gen_rtx_AND (mode, mask, one)));
24779 tmp = expand_simple_binop (mode, MINUS,
24780 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
24781 emit_move_insn (res, tmp);
24783 /* res = copysign (res, operand1) */
24784 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
24786 emit_label (label);
24787 LABEL_NUSES (label) = 1;
24789 emit_move_insn (operand0, res);
24792 /* Expand SSE sequence for computing round from OPERAND1 storing
24793 into OPERAND0. */
24794 void
24795 ix86_expand_round (rtx operand0, rtx operand1)
24797 /* C code for the stuff we're doing below:
24798 double xa = fabs (x);
24799 if (!isless (xa, TWO52))
24800 return x;
24801 xa = (double)(long)(xa + nextafter (0.5, 0.0));
24802 return copysign (xa, x);
24804 enum machine_mode mode = GET_MODE (operand0);
24805 rtx res, TWO52, xa, label, xi, half, mask;
24806 const struct real_format *fmt;
24807 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24809 /* Temporary for holding the result, initialized to the input
24810 operand to ease control flow. */
24811 res = gen_reg_rtx (mode);
24812 emit_move_insn (res, operand1);
24814 TWO52 = ix86_gen_TWO52 (mode);
24815 xa = ix86_expand_sse_fabs (res, &mask);
24816 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24818 /* load nextafter (0.5, 0.0) */
24819 fmt = REAL_MODE_FORMAT (mode);
24820 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24821 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24823 /* xa = xa + 0.5 */
24824 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
24825 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
24827 /* xa = (double)(int64_t)xa */
24828 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
24829 expand_fix (xi, xa, 0);
24830 expand_float (xa, xi, 0);
24832 /* res = copysign (xa, operand1) */
24833 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
24835 emit_label (label);
24836 LABEL_NUSES (label) = 1;
24838 emit_move_insn (operand0, res);
24842 /* Validate whether a SSE5 instruction is valid or not.
24843 OPERANDS is the array of operands.
24844 NUM is the number of operands.
24845 USES_OC0 is true if the instruction uses OC0 and provides 4 varients.
24846 NUM_MEMORY is the maximum number of memory operands to accept. */
24847 bool ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
24849 int mem_mask;
24850 int mem_count;
24851 int i;
24853 /* Count the number of memory arguments */
24854 mem_mask = 0;
24855 mem_count = 0;
24856 for (i = 0; i < num; i++)
24858 enum machine_mode mode = GET_MODE (operands[i]);
24859 if (register_operand (operands[i], mode))
24862 else if (memory_operand (operands[i], mode))
24864 mem_mask |= (1 << i);
24865 mem_count++;
24868 else
24870 rtx pattern = PATTERN (insn);
24872 /* allow 0 for pcmov */
24873 if (GET_CODE (pattern) != SET
24874 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
24875 || i < 2
24876 || operands[i] != CONST0_RTX (mode))
24877 return false;
24881 /* If there were no memory operations, allow the insn */
24882 if (mem_mask == 0)
24883 return true;
24885 /* Do not allow the destination register to be a memory operand. */
24886 else if (mem_mask & (1 << 0))
24887 return false;
24889 /* If there are too many memory operations, disallow the instruction. While
24890 the hardware only allows 1 memory reference, before register allocation
24891 for some insns, we allow two memory operations sometimes in order to allow
24892 code like the following to be optimized:
24894 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
24896 or similar cases that are vectorized into using the fmaddss
24897 instruction. */
24898 else if (mem_count > num_memory)
24899 return false;
24901 /* Don't allow more than one memory operation if not optimizing. */
24902 else if (mem_count > 1 && !optimize)
24903 return false;
24905 else if (num == 4 && mem_count == 1)
24907 /* formats (destination is the first argument), example fmaddss:
24908 xmm1, xmm1, xmm2, xmm3/mem
24909 xmm1, xmm1, xmm2/mem, xmm3
24910 xmm1, xmm2, xmm3/mem, xmm1
24911 xmm1, xmm2/mem, xmm3, xmm1 */
24912 if (uses_oc0)
24913 return ((mem_mask == (1 << 1))
24914 || (mem_mask == (1 << 2))
24915 || (mem_mask == (1 << 3)));
24917 /* format, example pmacsdd:
24918 xmm1, xmm2, xmm3/mem, xmm1 */
24919 else
24920 return (mem_mask == (1 << 2));
24923 else if (num == 4 && num_memory == 2)
24925 /* If there are two memory operations, we can load one of the memory ops
24926 into the destination register. This is for optimizating the
24927 multiply/add ops, which the combiner has optimized both the multiply
24928 and the add insns to have a memory operation. We have to be careful
24929 that the destination doesn't overlap with the inputs. */
24930 rtx op0 = operands[0];
24932 if (reg_mentioned_p (op0, operands[1])
24933 || reg_mentioned_p (op0, operands[2])
24934 || reg_mentioned_p (op0, operands[3]))
24935 return false;
24937 /* formats (destination is the first argument), example fmaddss:
24938 xmm1, xmm1, xmm2, xmm3/mem
24939 xmm1, xmm1, xmm2/mem, xmm3
24940 xmm1, xmm2, xmm3/mem, xmm1
24941 xmm1, xmm2/mem, xmm3, xmm1
24943 For the oc0 case, we will load either operands[1] or operands[3] into
24944 operands[0], so any combination of 2 memory operands is ok. */
24945 if (uses_oc0)
24946 return true;
24948 /* format, example pmacsdd:
24949 xmm1, xmm2, xmm3/mem, xmm1
24951 For the integer multiply/add instructions be more restrictive and
24952 require operands[2] and operands[3] to be the memory operands. */
24953 else
24954 return (mem_mask == ((1 << 2) | (1 << 3)));
24957 else if (num == 3 && num_memory == 1)
24959 /* formats, example protb:
24960 xmm1, xmm2, xmm3/mem
24961 xmm1, xmm2/mem, xmm3 */
24962 if (uses_oc0)
24963 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
24965 /* format, example comeq:
24966 xmm1, xmm2, xmm3/mem */
24967 else
24968 return (mem_mask == (1 << 2));
24971 else
24972 gcc_unreachable ();
24974 return false;
24978 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
24979 hardware will allow by using the destination register to load one of the
24980 memory operations. Presently this is used by the multiply/add routines to
24981 allow 2 memory references. */
24983 void
24984 ix86_expand_sse5_multiple_memory (rtx operands[],
24985 int num,
24986 enum machine_mode mode)
24988 rtx op0 = operands[0];
24989 if (num != 4
24990 || memory_operand (op0, mode)
24991 || reg_mentioned_p (op0, operands[1])
24992 || reg_mentioned_p (op0, operands[2])
24993 || reg_mentioned_p (op0, operands[3]))
24994 gcc_unreachable ();
24996 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
24997 the destination register. */
24998 if (memory_operand (operands[1], mode))
25000 emit_move_insn (op0, operands[1]);
25001 operands[1] = op0;
25003 else if (memory_operand (operands[3], mode))
25005 emit_move_insn (op0, operands[3]);
25006 operands[3] = op0;
25008 else
25009 gcc_unreachable ();
25011 return;
25015 /* Table of valid machine attributes. */
25016 static const struct attribute_spec ix86_attribute_table[] =
25018 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25019 /* Stdcall attribute says callee is responsible for popping arguments
25020 if they are not variable. */
25021 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25022 /* Fastcall attribute says callee is responsible for popping arguments
25023 if they are not variable. */
25024 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25025 /* Cdecl attribute says the callee is a normal C declaration */
25026 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25027 /* Regparm attribute specifies how many integer arguments are to be
25028 passed in registers. */
25029 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25030 /* Sseregparm attribute says we are using x86_64 calling conventions
25031 for FP arguments. */
25032 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25033 /* force_align_arg_pointer says this function realigns the stack at entry. */
25034 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25035 false, true, true, ix86_handle_cconv_attribute },
25036 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25037 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25038 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25039 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25040 #endif
25041 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25042 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25043 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25044 SUBTARGET_ATTRIBUTE_TABLE,
25045 #endif
25046 { NULL, 0, 0, false, false, false, NULL }
25049 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25050 static int
25051 x86_builtin_vectorization_cost (bool runtime_test)
25053 /* If the branch of the runtime test is taken - i.e. - the vectorized
25054 version is skipped - this incurs a misprediction cost (because the
25055 vectorized version is expected to be the fall-through). So we subtract
25056 the latency of a mispredicted branch from the costs that are incured
25057 when the vectorized version is executed.
25059 TODO: The values in individual target tables have to be tuned or new
25060 fields may be needed. For eg. on K8, the default branch path is the
25061 not-taken path. If the taken path is predicted correctly, the minimum
25062 penalty of going down the taken-path is 1 cycle. If the taken-path is
25063 not predicted correctly, then the minimum penalty is 10 cycles. */
25065 if (runtime_test)
25067 return (-(ix86_cost->cond_taken_branch_cost));
25069 else
25070 return 0;
25073 /* Initialize the GCC target structure. */
25074 #undef TARGET_ATTRIBUTE_TABLE
25075 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25076 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25077 # undef TARGET_MERGE_DECL_ATTRIBUTES
25078 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25079 #endif
25081 #undef TARGET_COMP_TYPE_ATTRIBUTES
25082 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25084 #undef TARGET_INIT_BUILTINS
25085 #define TARGET_INIT_BUILTINS ix86_init_builtins
25086 #undef TARGET_EXPAND_BUILTIN
25087 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25089 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25090 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25091 ix86_builtin_vectorized_function
25093 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25094 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25096 #undef TARGET_BUILTIN_RECIPROCAL
25097 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25099 #undef TARGET_ASM_FUNCTION_EPILOGUE
25100 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25102 #undef TARGET_ENCODE_SECTION_INFO
25103 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25104 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25105 #else
25106 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25107 #endif
25109 #undef TARGET_ASM_OPEN_PAREN
25110 #define TARGET_ASM_OPEN_PAREN ""
25111 #undef TARGET_ASM_CLOSE_PAREN
25112 #define TARGET_ASM_CLOSE_PAREN ""
25114 #undef TARGET_ASM_ALIGNED_HI_OP
25115 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25116 #undef TARGET_ASM_ALIGNED_SI_OP
25117 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25118 #ifdef ASM_QUAD
25119 #undef TARGET_ASM_ALIGNED_DI_OP
25120 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25121 #endif
25123 #undef TARGET_ASM_UNALIGNED_HI_OP
25124 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25125 #undef TARGET_ASM_UNALIGNED_SI_OP
25126 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25127 #undef TARGET_ASM_UNALIGNED_DI_OP
25128 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25130 #undef TARGET_SCHED_ADJUST_COST
25131 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25132 #undef TARGET_SCHED_ISSUE_RATE
25133 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25134 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25135 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25136 ia32_multipass_dfa_lookahead
25138 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25139 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25141 #ifdef HAVE_AS_TLS
25142 #undef TARGET_HAVE_TLS
25143 #define TARGET_HAVE_TLS true
25144 #endif
25145 #undef TARGET_CANNOT_FORCE_CONST_MEM
25146 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25147 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25148 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25150 #undef TARGET_DELEGITIMIZE_ADDRESS
25151 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25153 #undef TARGET_MS_BITFIELD_LAYOUT_P
25154 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25156 #if TARGET_MACHO
25157 #undef TARGET_BINDS_LOCAL_P
25158 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25159 #endif
25160 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25161 #undef TARGET_BINDS_LOCAL_P
25162 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25163 #endif
25165 #undef TARGET_ASM_OUTPUT_MI_THUNK
25166 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25167 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25168 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25170 #undef TARGET_ASM_FILE_START
25171 #define TARGET_ASM_FILE_START x86_file_start
25173 #undef TARGET_DEFAULT_TARGET_FLAGS
25174 #define TARGET_DEFAULT_TARGET_FLAGS \
25175 (TARGET_DEFAULT \
25176 | TARGET_SUBTARGET_DEFAULT \
25177 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25179 #undef TARGET_HANDLE_OPTION
25180 #define TARGET_HANDLE_OPTION ix86_handle_option
25182 #undef TARGET_RTX_COSTS
25183 #define TARGET_RTX_COSTS ix86_rtx_costs
25184 #undef TARGET_ADDRESS_COST
25185 #define TARGET_ADDRESS_COST ix86_address_cost
25187 #undef TARGET_FIXED_CONDITION_CODE_REGS
25188 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25189 #undef TARGET_CC_MODES_COMPATIBLE
25190 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25192 #undef TARGET_MACHINE_DEPENDENT_REORG
25193 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25195 #undef TARGET_BUILD_BUILTIN_VA_LIST
25196 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25198 #undef TARGET_MD_ASM_CLOBBERS
25199 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25201 #undef TARGET_PROMOTE_PROTOTYPES
25202 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25203 #undef TARGET_STRUCT_VALUE_RTX
25204 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25205 #undef TARGET_SETUP_INCOMING_VARARGS
25206 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25207 #undef TARGET_MUST_PASS_IN_STACK
25208 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25209 #undef TARGET_PASS_BY_REFERENCE
25210 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25211 #undef TARGET_INTERNAL_ARG_POINTER
25212 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25213 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25214 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25215 #undef TARGET_STRICT_ARGUMENT_NAMING
25216 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25218 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25219 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25221 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25222 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25224 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25225 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25227 #undef TARGET_C_MODE_FOR_SUFFIX
25228 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25230 #ifdef HAVE_AS_TLS
25231 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25232 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25233 #endif
25235 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25236 #undef TARGET_INSERT_ATTRIBUTES
25237 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25238 #endif
25240 #undef TARGET_MANGLE_TYPE
25241 #define TARGET_MANGLE_TYPE ix86_mangle_type
25243 #undef TARGET_STACK_PROTECT_FAIL
25244 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25246 #undef TARGET_FUNCTION_VALUE
25247 #define TARGET_FUNCTION_VALUE ix86_function_value
25249 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25250 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25252 struct gcc_target targetm = TARGET_INITIALIZER;
25254 #include "gt-i386.h"