gcc/:
[official-gcc.git] / gcc / config / i386 / i386.c
blob6e60fb7e70b44e2990ebb7d9b523454728c0acab
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "c-common.h"
39 #include "except.h"
40 #include "function.h"
41 #include "recog.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "basic-block.h"
46 #include "ggc.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
63 #endif
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
71 : 4)
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 const
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
98 0, /* "large" insn */
99 2, /* MOVE_RATIO */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
125 2, /* Branch cost */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
150 static const
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
170 3, /* MOVE_RATIO */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
196 1, /* Branch cost */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
220 static const
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
240 3, /* MOVE_RATIO */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
268 1, /* Branch cost */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static const
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
312 6, /* MOVE_RATIO */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
338 2, /* Branch cost */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
362 static const
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
439 static const
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 4, /* MOVE_RATIO */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
486 1, /* Branch cost */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
510 static const
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
530 4, /* MOVE_RATIO */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
559 1, /* Branch cost */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
583 static const
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
603 9, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
629 5, /* Branch cost */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
656 static const
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
705 time). */
706 100, /* number of parallel prefetches */
707 3, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
754 9, /* MOVE_RATIO */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
776 /* On K8
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
779 On AMDFAM10
780 MOVD reg64, xmmreg Double FADD 3
781 1/1 1/1
782 MOVD reg32, xmmreg Double FADD 3
783 1/1 1/1 */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 2, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 static const
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
842 6, /* MOVE_RATIO */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
868 2, /* Branch cost */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 {-1, libcall}}},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
893 static const
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
913 17, /* MOVE_RATIO */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
939 1, /* Branch cost */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {-1, libcall}}},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
966 static const
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
986 16, /* MOVE_RATIO */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1039 static const
1040 struct processor_costs atom_cost = {
1041 COSTS_N_INSNS (1), /* cost of an add instruction */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 2, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 256, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 3, /* Branch cost */
1086 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1087 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1088 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1089 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1090 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1091 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1092 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095 {{libcall, {{8, loop}, {15, unrolled_loop},
1096 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097 {libcall, {{24, loop}, {32, unrolled_loop},
1098 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1112 /* Generic64 should produce code tuned for Nocona and K8. */
1113 static const
1114 struct processor_costs generic64_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 /* On all chips taken into consideration lea is 2 cycles and more. With
1117 this cost however our current implementation of synth_mult results in
1118 use of unnecessary temporary registers causing regression on several
1119 SPECfp benchmarks. */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 512, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1164 is increased to perhaps more appropriate value of 5. */
1165 3, /* Branch cost */
1166 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1167 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1168 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1169 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1170 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1171 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1172 {DUMMY_STRINGOP_ALGS,
1173 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174 {DUMMY_STRINGOP_ALGS,
1175 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1176 1, /* scalar_stmt_cost. */
1177 1, /* scalar load_cost. */
1178 1, /* scalar_store_cost. */
1179 1, /* vec_stmt_cost. */
1180 1, /* vec_to_scalar_cost. */
1181 1, /* scalar_to_vec_cost. */
1182 1, /* vec_align_load_cost. */
1183 2, /* vec_unalign_load_cost. */
1184 1, /* vec_store_cost. */
1185 3, /* cond_taken_branch_cost. */
1186 1, /* cond_not_taken_branch_cost. */
1189 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 static const
1191 struct processor_costs generic32_cost = {
1192 COSTS_N_INSNS (1), /* cost of an add instruction */
1193 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1194 COSTS_N_INSNS (1), /* variable shift costs */
1195 COSTS_N_INSNS (1), /* constant shift costs */
1196 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1197 COSTS_N_INSNS (4), /* HI */
1198 COSTS_N_INSNS (3), /* SI */
1199 COSTS_N_INSNS (4), /* DI */
1200 COSTS_N_INSNS (2)}, /* other */
1201 0, /* cost of multiply per each bit set */
1202 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1203 COSTS_N_INSNS (26), /* HI */
1204 COSTS_N_INSNS (42), /* SI */
1205 COSTS_N_INSNS (74), /* DI */
1206 COSTS_N_INSNS (74)}, /* other */
1207 COSTS_N_INSNS (1), /* cost of movsx */
1208 COSTS_N_INSNS (1), /* cost of movzx */
1209 8, /* "large" insn */
1210 17, /* MOVE_RATIO */
1211 4, /* cost for loading QImode using movzbl */
1212 {4, 4, 4}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {4, 4, 4}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 12}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {6, 6, 8}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 2, /* cost of moving MMX register */
1222 {8, 8}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {8, 8}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, /* cost of moving SSE register */
1227 {8, 8, 8}, /* cost of loading SSE registers
1228 in SImode, DImode and TImode */
1229 {8, 8, 8}, /* cost of storing SSE registers
1230 in SImode, DImode and TImode */
1231 5, /* MMX or SSE register to integer */
1232 32, /* size of l1 cache. */
1233 256, /* size of l2 cache. */
1234 64, /* size of prefetch block */
1235 6, /* number of parallel prefetches */
1236 3, /* Branch cost */
1237 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1238 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1239 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1240 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1241 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1242 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1243 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244 DUMMY_STRINGOP_ALGS},
1245 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1246 DUMMY_STRINGOP_ALGS},
1247 1, /* scalar_stmt_cost. */
1248 1, /* scalar load_cost. */
1249 1, /* scalar_store_cost. */
1250 1, /* vec_stmt_cost. */
1251 1, /* vec_to_scalar_cost. */
1252 1, /* scalar_to_vec_cost. */
1253 1, /* vec_align_load_cost. */
1254 2, /* vec_unalign_load_cost. */
1255 1, /* vec_store_cost. */
1256 3, /* cond_taken_branch_cost. */
1257 1, /* cond_not_taken_branch_cost. */
1260 const struct processor_costs *ix86_cost = &pentium_cost;
1262 /* Processor feature/optimization bitmasks. */
1263 #define m_386 (1<<PROCESSOR_I386)
1264 #define m_486 (1<<PROCESSOR_I486)
1265 #define m_PENT (1<<PROCESSOR_PENTIUM)
1266 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1267 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1268 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1269 #define m_CORE2 (1<<PROCESSOR_CORE2)
1270 #define m_ATOM (1<<PROCESSOR_ATOM)
1272 #define m_GEODE (1<<PROCESSOR_GEODE)
1273 #define m_K6 (1<<PROCESSOR_K6)
1274 #define m_K6_GEODE (m_K6 | m_GEODE)
1275 #define m_K8 (1<<PROCESSOR_K8)
1276 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1277 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1278 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1279 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1281 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1282 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1284 /* Generic instruction choice should be common subset of supported CPUs
1285 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1286 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1288 /* Feature tests against the various tunings. */
1289 unsigned char ix86_tune_features[X86_TUNE_LAST];
1291 /* Feature tests against the various tunings used to create ix86_tune_features
1292 based on the processor mask. */
1293 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1294 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1295 negatively, so enabling for Generic64 seems like good code size
1296 tradeoff. We can't enable it for 32bit generic because it does not
1297 work well with PPro base chips. */
1298 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1300 /* X86_TUNE_PUSH_MEMORY */
1301 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1302 | m_NOCONA | m_CORE2 | m_GENERIC,
1304 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1305 m_486 | m_PENT,
1307 /* X86_TUNE_UNROLL_STRLEN */
1308 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309 | m_CORE2 | m_GENERIC,
1311 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1312 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1314 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1315 on simulation result. But after P4 was made, no performance benefit
1316 was observed with branch hints. It also increases the code size.
1317 As a result, icc never generates branch hints. */
1320 /* X86_TUNE_DOUBLE_WITH_ADD */
1321 ~m_386,
1323 /* X86_TUNE_USE_SAHF */
1324 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1325 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1328 partial dependencies. */
1329 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1330 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1332 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1333 register stalls on Generic32 compilation setting as well. However
1334 in current implementation the partial register stalls are not eliminated
1335 very well - they can be introduced via subregs synthesized by combine
1336 and can happen in caller/callee saving sequences. Because this option
1337 pays back little on PPro based chips and is in conflict with partial reg
1338 dependencies used by Athlon/P4 based chips, it is better to leave it off
1339 for generic32 for now. */
1340 m_PPRO,
1342 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1343 m_CORE2 | m_GENERIC,
1345 /* X86_TUNE_USE_HIMODE_FIOP */
1346 m_386 | m_486 | m_K6_GEODE,
1348 /* X86_TUNE_USE_SIMODE_FIOP */
1349 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1351 /* X86_TUNE_USE_MOV0 */
1352 m_K6,
1354 /* X86_TUNE_USE_CLTD */
1355 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1357 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1358 m_PENT4,
1360 /* X86_TUNE_SPLIT_LONG_MOVES */
1361 m_PPRO,
1363 /* X86_TUNE_READ_MODIFY_WRITE */
1364 ~m_PENT,
1366 /* X86_TUNE_READ_MODIFY */
1367 ~(m_PENT | m_PPRO),
1369 /* X86_TUNE_PROMOTE_QIMODE */
1370 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1371 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1373 /* X86_TUNE_FAST_PREFIX */
1374 ~(m_PENT | m_486 | m_386),
1376 /* X86_TUNE_SINGLE_STRINGOP */
1377 m_386 | m_PENT4 | m_NOCONA,
1379 /* X86_TUNE_QIMODE_MATH */
1382 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1383 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1384 might be considered for Generic32 if our scheme for avoiding partial
1385 stalls was more effective. */
1386 ~m_PPRO,
1388 /* X86_TUNE_PROMOTE_QI_REGS */
1391 /* X86_TUNE_PROMOTE_HI_REGS */
1392 m_PPRO,
1394 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1395 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396 | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_ADD_ESP_8 */
1399 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1400 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1402 /* X86_TUNE_SUB_ESP_4 */
1403 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1404 | m_GENERIC,
1406 /* X86_TUNE_SUB_ESP_8 */
1407 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1408 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1410 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1411 for DFmode copies */
1412 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1413 | m_GENERIC | m_GEODE),
1415 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1416 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1418 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1419 conflict here in between PPro/Pentium4 based chips that thread 128bit
1420 SSE registers as single units versus K8 based chips that divide SSE
1421 registers to two 64bit halves. This knob promotes all store destinations
1422 to be 128bit to allow register renaming on 128bit SSE units, but usually
1423 results in one extra microop on 64bit SSE units. Experimental results
1424 shows that disabling this option on P4 brings over 20% SPECfp regression,
1425 while enabling it on K8 brings roughly 2.4% regression that can be partly
1426 masked by careful scheduling of moves. */
1427 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1428 | m_AMDFAM10,
1430 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1431 m_AMDFAM10,
1433 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1434 are resolved on SSE register parts instead of whole registers, so we may
1435 maintain just lower part of scalar values in proper format leaving the
1436 upper part undefined. */
1437 m_ATHLON_K8,
1439 /* X86_TUNE_SSE_TYPELESS_STORES */
1440 m_AMD_MULTIPLE,
1442 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1443 m_PPRO | m_PENT4 | m_NOCONA,
1445 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1446 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1448 /* X86_TUNE_PROLOGUE_USING_MOVE */
1449 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1451 /* X86_TUNE_EPILOGUE_USING_MOVE */
1452 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1454 /* X86_TUNE_SHIFT1 */
1455 ~m_486,
1457 /* X86_TUNE_USE_FFREEP */
1458 m_AMD_MULTIPLE,
1460 /* X86_TUNE_INTER_UNIT_MOVES */
1461 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1463 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1464 ~(m_AMDFAM10),
1466 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1467 than 4 branch instructions in the 16 byte window. */
1468 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1469 | m_GENERIC,
1471 /* X86_TUNE_SCHEDULE */
1472 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1473 | m_GENERIC,
1475 /* X86_TUNE_USE_BT */
1476 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1478 /* X86_TUNE_USE_INCDEC */
1479 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1481 /* X86_TUNE_PAD_RETURNS */
1482 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1484 /* X86_TUNE_EXT_80387_CONSTANTS */
1485 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486 | m_CORE2 | m_GENERIC,
1488 /* X86_TUNE_SHORTEN_X87_SSE */
1489 ~m_K8,
1491 /* X86_TUNE_AVOID_VECTOR_DECODE */
1492 m_K8 | m_GENERIC64,
1494 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1495 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1496 ~(m_386 | m_486),
1498 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1499 vector path on AMD machines. */
1500 m_K8 | m_GENERIC64 | m_AMDFAM10,
1502 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 machines. */
1504 m_K8 | m_GENERIC64 | m_AMDFAM10,
1506 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1507 than a MOV. */
1508 m_PENT,
1510 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1511 but one byte longer. */
1512 m_PENT,
1514 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1515 operand that cannot be represented using a modRM byte. The XOR
1516 replacement is long decoded, so this split helps here as well. */
1517 m_K6,
1519 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 from FP to FP. */
1521 m_AMDFAM10 | m_GENERIC,
1523 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1524 from integer to FP. */
1525 m_AMDFAM10,
1527 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1528 with a subsequent conditional jump instruction into a single
1529 compare-and-branch uop. */
1530 m_CORE2,
1532 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533 will impact LEA instruction selection. */
1534 m_ATOM,
1537 /* Feature tests against the various architecture variations. */
1538 unsigned char ix86_arch_features[X86_ARCH_LAST];
1540 /* Feature tests against the various architecture variations, used to create
1541 ix86_arch_features based on the processor mask. */
1542 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1543 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1544 ~(m_386 | m_486 | m_PENT | m_K6),
1546 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1547 ~m_386,
1549 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1550 ~(m_386 | m_486),
1552 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1553 ~m_386,
1555 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1556 ~m_386,
1559 static const unsigned int x86_accumulate_outgoing_args
1560 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1561 | m_GENERIC;
1563 static const unsigned int x86_arch_always_fancy_math_387
1564 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1565 | m_NOCONA | m_CORE2 | m_GENERIC;
1567 static enum stringop_alg stringop_alg = no_stringop;
1569 /* In case the average insn count for single function invocation is
1570 lower than this constant, emit fast (but longer) prologue and
1571 epilogue code. */
1572 #define FAST_PROLOGUE_INSN_COUNT 20
1574 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1575 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1576 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1577 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1579 /* Array of the smallest class containing reg number REGNO, indexed by
1580 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1582 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1584 /* ax, dx, cx, bx */
1585 AREG, DREG, CREG, BREG,
1586 /* si, di, bp, sp */
1587 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 /* FP registers */
1589 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1590 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1591 /* arg pointer */
1592 NON_Q_REGS,
1593 /* flags, fpsr, fpcr, frame */
1594 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 /* SSE registers */
1596 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1597 SSE_REGS, SSE_REGS,
1598 /* MMX registers */
1599 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1600 MMX_REGS, MMX_REGS,
1601 /* REX registers */
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1604 /* SSE REX registers */
1605 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1606 SSE_REGS, SSE_REGS,
1609 /* The "default" register map used in 32bit mode. */
1611 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1613 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1614 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1615 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1616 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1617 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1619 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1622 /* The "default" register map used in 64bit mode. */
1624 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1626 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1627 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1629 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1630 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1631 8,9,10,11,12,13,14,15, /* extended integer registers */
1632 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1635 /* Define the register numbers to be used in Dwarf debugging information.
1636 The SVR4 reference port C compiler uses the following register numbers
1637 in its Dwarf output code:
1638 0 for %eax (gcc regno = 0)
1639 1 for %ecx (gcc regno = 2)
1640 2 for %edx (gcc regno = 1)
1641 3 for %ebx (gcc regno = 3)
1642 4 for %esp (gcc regno = 7)
1643 5 for %ebp (gcc regno = 6)
1644 6 for %esi (gcc regno = 4)
1645 7 for %edi (gcc regno = 5)
1646 The following three DWARF register numbers are never generated by
1647 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1648 believes these numbers have these meanings.
1649 8 for %eip (no gcc equivalent)
1650 9 for %eflags (gcc regno = 17)
1651 10 for %trapno (no gcc equivalent)
1652 It is not at all clear how we should number the FP stack registers
1653 for the x86 architecture. If the version of SDB on x86/svr4 were
1654 a bit less brain dead with respect to floating-point then we would
1655 have a precedent to follow with respect to DWARF register numbers
1656 for x86 FP registers, but the SDB on x86/svr4 is so completely
1657 broken with respect to FP registers that it is hardly worth thinking
1658 of it as something to strive for compatibility with.
1659 The version of x86/svr4 SDB I have at the moment does (partially)
1660 seem to believe that DWARF register number 11 is associated with
1661 the x86 register %st(0), but that's about all. Higher DWARF
1662 register numbers don't seem to be associated with anything in
1663 particular, and even for DWARF regno 11, SDB only seems to under-
1664 stand that it should say that a variable lives in %st(0) (when
1665 asked via an `=' command) if we said it was in DWARF regno 11,
1666 but SDB still prints garbage when asked for the value of the
1667 variable in question (via a `/' command).
1668 (Also note that the labels SDB prints for various FP stack regs
1669 when doing an `x' command are all wrong.)
1670 Note that these problems generally don't affect the native SVR4
1671 C compiler because it doesn't allow the use of -O with -g and
1672 because when it is *not* optimizing, it allocates a memory
1673 location for each floating-point variable, and the memory
1674 location is what gets described in the DWARF AT_location
1675 attribute for the variable in question.
1676 Regardless of the severe mental illness of the x86/svr4 SDB, we
1677 do something sensible here and we use the following DWARF
1678 register numbers. Note that these are all stack-top-relative
1679 numbers.
1680 11 for %st(0) (gcc regno = 8)
1681 12 for %st(1) (gcc regno = 9)
1682 13 for %st(2) (gcc regno = 10)
1683 14 for %st(3) (gcc regno = 11)
1684 15 for %st(4) (gcc regno = 12)
1685 16 for %st(5) (gcc regno = 13)
1686 17 for %st(6) (gcc regno = 14)
1687 18 for %st(7) (gcc regno = 15)
1689 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1691 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1692 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1693 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1694 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1695 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1697 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1700 /* Test and compare insns in i386.md store the information needed to
1701 generate branch and scc insns here. */
1703 rtx ix86_compare_op0 = NULL_RTX;
1704 rtx ix86_compare_op1 = NULL_RTX;
1706 /* Define parameter passing and return registers. */
1708 static int const x86_64_int_parameter_registers[6] =
1710 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1713 static int const x86_64_ms_abi_int_parameter_registers[4] =
1715 CX_REG, DX_REG, R8_REG, R9_REG
1718 static int const x86_64_int_return_registers[4] =
1720 AX_REG, DX_REG, DI_REG, SI_REG
1723 /* Define the structure for the machine field in struct function. */
1725 struct stack_local_entry GTY(())
1727 unsigned short mode;
1728 unsigned short n;
1729 rtx rtl;
1730 struct stack_local_entry *next;
1733 /* Structure describing stack frame layout.
1734 Stack grows downward:
1736 [arguments]
1737 <- ARG_POINTER
1738 saved pc
1740 saved frame pointer if frame_pointer_needed
1741 <- HARD_FRAME_POINTER
1742 [saved regs]
1744 [padding0]
1746 [saved SSE regs]
1748 [padding1] \
1750 [va_arg registers] (
1751 > to_allocate <- FRAME_POINTER
1752 [frame] (
1754 [padding2] /
1756 struct ix86_frame
1758 int padding0;
1759 int nsseregs;
1760 int nregs;
1761 int padding1;
1762 int va_arg_size;
1763 HOST_WIDE_INT frame;
1764 int padding2;
1765 int outgoing_arguments_size;
1766 int red_zone_size;
1768 HOST_WIDE_INT to_allocate;
1769 /* The offsets relative to ARG_POINTER. */
1770 HOST_WIDE_INT frame_pointer_offset;
1771 HOST_WIDE_INT hard_frame_pointer_offset;
1772 HOST_WIDE_INT stack_pointer_offset;
1774 /* When save_regs_using_mov is set, emit prologue using
1775 move instead of push instructions. */
1776 bool save_regs_using_mov;
1779 /* Code model option. */
1780 enum cmodel ix86_cmodel;
1781 /* Asm dialect. */
1782 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1783 /* TLS dialects. */
1784 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1786 /* Which unit we are generating floating point math for. */
1787 enum fpmath_unit ix86_fpmath;
1789 /* Which cpu are we scheduling for. */
1790 enum attr_cpu ix86_schedule;
1792 /* Which cpu are we optimizing for. */
1793 enum processor_type ix86_tune;
1795 /* Which instruction set architecture to use. */
1796 enum processor_type ix86_arch;
1798 /* true if sse prefetch instruction is not NOOP. */
1799 int x86_prefetch_sse;
1801 /* ix86_regparm_string as a number */
1802 static int ix86_regparm;
1804 /* -mstackrealign option */
1805 extern int ix86_force_align_arg_pointer;
1806 static const char ix86_force_align_arg_pointer_string[]
1807 = "force_align_arg_pointer";
1809 static rtx (*ix86_gen_leave) (void);
1810 static rtx (*ix86_gen_pop1) (rtx);
1811 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1812 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1813 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1814 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1815 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1816 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1818 /* Preferred alignment for stack boundary in bits. */
1819 unsigned int ix86_preferred_stack_boundary;
1821 /* Alignment for incoming stack boundary in bits specified at
1822 command line. */
1823 static unsigned int ix86_user_incoming_stack_boundary;
1825 /* Default alignment for incoming stack boundary in bits. */
1826 static unsigned int ix86_default_incoming_stack_boundary;
1828 /* Alignment for incoming stack boundary in bits. */
1829 unsigned int ix86_incoming_stack_boundary;
1831 /* The abi used by target. */
1832 enum calling_abi ix86_abi;
1834 /* Values 1-5: see jump.c */
1835 int ix86_branch_cost;
1837 /* Calling abi specific va_list type nodes. */
1838 static GTY(()) tree sysv_va_list_type_node;
1839 static GTY(()) tree ms_va_list_type_node;
1841 /* Variables which are this size or smaller are put in the data/bss
1842 or ldata/lbss sections. */
1844 int ix86_section_threshold = 65536;
1846 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1847 char internal_label_prefix[16];
1848 int internal_label_prefix_len;
1850 /* Fence to use after loop using movnt. */
1851 tree x86_mfence;
1853 /* Register class used for passing given 64bit part of the argument.
1854 These represent classes as documented by the PS ABI, with the exception
1855 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1856 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1858 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1859 whenever possible (upper half does contain padding). */
1860 enum x86_64_reg_class
1862 X86_64_NO_CLASS,
1863 X86_64_INTEGER_CLASS,
1864 X86_64_INTEGERSI_CLASS,
1865 X86_64_SSE_CLASS,
1866 X86_64_SSESF_CLASS,
1867 X86_64_SSEDF_CLASS,
1868 X86_64_SSEUP_CLASS,
1869 X86_64_X87_CLASS,
1870 X86_64_X87UP_CLASS,
1871 X86_64_COMPLEX_X87_CLASS,
1872 X86_64_MEMORY_CLASS
1875 #define MAX_CLASSES 4
1877 /* Table of constants used by fldpi, fldln2, etc.... */
1878 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1879 static bool ext_80387_constants_init = 0;
1882 static struct machine_function * ix86_init_machine_status (void);
1883 static rtx ix86_function_value (const_tree, const_tree, bool);
1884 static int ix86_function_regparm (const_tree, const_tree);
1885 static void ix86_compute_frame_layout (struct ix86_frame *);
1886 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1887 rtx, rtx, int);
1888 static void ix86_add_new_builtins (int);
1890 enum ix86_function_specific_strings
1892 IX86_FUNCTION_SPECIFIC_ARCH,
1893 IX86_FUNCTION_SPECIFIC_TUNE,
1894 IX86_FUNCTION_SPECIFIC_FPMATH,
1895 IX86_FUNCTION_SPECIFIC_MAX
1898 static char *ix86_target_string (int, int, const char *, const char *,
1899 const char *, bool);
1900 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1901 static void ix86_function_specific_save (struct cl_target_option *);
1902 static void ix86_function_specific_restore (struct cl_target_option *);
1903 static void ix86_function_specific_print (FILE *, int,
1904 struct cl_target_option *);
1905 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1906 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1907 static bool ix86_can_inline_p (tree, tree);
1908 static void ix86_set_current_function (tree);
1910 static enum calling_abi ix86_function_abi (const_tree);
1913 /* The svr4 ABI for the i386 says that records and unions are returned
1914 in memory. */
1915 #ifndef DEFAULT_PCC_STRUCT_RETURN
1916 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 #endif
1919 /* Whether -mtune= or -march= were specified */
1920 static int ix86_tune_defaulted;
1921 static int ix86_arch_specified;
1923 /* Bit flags that specify the ISA we are compiling for. */
1924 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1926 /* A mask of ix86_isa_flags that includes bit X if X
1927 was set or cleared on the command line. */
1928 static int ix86_isa_flags_explicit;
1930 /* Define a set of ISAs which are available when a given ISA is
1931 enabled. MMX and SSE ISAs are handled separately. */
1933 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1934 #define OPTION_MASK_ISA_3DNOW_SET \
1935 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1937 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1938 #define OPTION_MASK_ISA_SSE2_SET \
1939 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1940 #define OPTION_MASK_ISA_SSE3_SET \
1941 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1942 #define OPTION_MASK_ISA_SSSE3_SET \
1943 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_1_SET \
1945 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1946 #define OPTION_MASK_ISA_SSE4_2_SET \
1947 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1948 #define OPTION_MASK_ISA_AVX_SET \
1949 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1950 #define OPTION_MASK_ISA_FMA_SET \
1951 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1953 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1954 as -msse4.2. */
1955 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1957 #define OPTION_MASK_ISA_SSE4A_SET \
1958 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1959 #define OPTION_MASK_ISA_SSE5_SET \
1960 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1962 /* AES and PCLMUL need SSE2 because they use xmm registers */
1963 #define OPTION_MASK_ISA_AES_SET \
1964 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1965 #define OPTION_MASK_ISA_PCLMUL_SET \
1966 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1968 #define OPTION_MASK_ISA_ABM_SET \
1969 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1970 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1971 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1972 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1974 /* Define a set of ISAs which aren't available when a given ISA is
1975 disabled. MMX and SSE ISAs are handled separately. */
1977 #define OPTION_MASK_ISA_MMX_UNSET \
1978 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1979 #define OPTION_MASK_ISA_3DNOW_UNSET \
1980 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1981 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1983 #define OPTION_MASK_ISA_SSE_UNSET \
1984 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1985 #define OPTION_MASK_ISA_SSE2_UNSET \
1986 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1987 #define OPTION_MASK_ISA_SSE3_UNSET \
1988 (OPTION_MASK_ISA_SSE3 \
1989 | OPTION_MASK_ISA_SSSE3_UNSET \
1990 | OPTION_MASK_ISA_SSE4A_UNSET )
1991 #define OPTION_MASK_ISA_SSSE3_UNSET \
1992 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1993 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1994 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1995 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1996 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1997 #define OPTION_MASK_ISA_AVX_UNSET \
1998 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1999 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2001 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2002 as -mno-sse4.1. */
2003 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2005 #define OPTION_MASK_ISA_SSE4A_UNSET \
2006 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2007 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2008 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2009 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2010 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2011 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2012 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2013 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2015 /* Vectorization library interface and handlers. */
2016 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2017 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2018 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2020 /* Processor target table, indexed by processor number */
2021 struct ptt
2023 const struct processor_costs *cost; /* Processor costs */
2024 const int align_loop; /* Default alignments. */
2025 const int align_loop_max_skip;
2026 const int align_jump;
2027 const int align_jump_max_skip;
2028 const int align_func;
2031 static const struct ptt processor_target_table[PROCESSOR_max] =
2033 {&i386_cost, 4, 3, 4, 3, 4},
2034 {&i486_cost, 16, 15, 16, 15, 16},
2035 {&pentium_cost, 16, 7, 16, 7, 16},
2036 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2037 {&geode_cost, 0, 0, 0, 0, 0},
2038 {&k6_cost, 32, 7, 32, 7, 32},
2039 {&athlon_cost, 16, 7, 16, 7, 16},
2040 {&pentium4_cost, 0, 0, 0, 0, 0},
2041 {&k8_cost, 16, 7, 16, 7, 16},
2042 {&nocona_cost, 0, 0, 0, 0, 0},
2043 {&core2_cost, 16, 10, 16, 10, 16},
2044 {&generic32_cost, 16, 7, 16, 7, 16},
2045 {&generic64_cost, 16, 10, 16, 10, 16},
2046 {&amdfam10_cost, 32, 24, 32, 7, 32},
2047 {&atom_cost, 16, 7, 16, 7, 16}
2050 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2052 "generic",
2053 "i386",
2054 "i486",
2055 "pentium",
2056 "pentium-mmx",
2057 "pentiumpro",
2058 "pentium2",
2059 "pentium3",
2060 "pentium4",
2061 "pentium-m",
2062 "prescott",
2063 "nocona",
2064 "core2",
2065 "atom",
2066 "geode",
2067 "k6",
2068 "k6-2",
2069 "k6-3",
2070 "athlon",
2071 "athlon-4",
2072 "k8",
2073 "amdfam10"
2076 /* Implement TARGET_HANDLE_OPTION. */
2078 static bool
2079 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2081 switch (code)
2083 case OPT_mmmx:
2084 if (value)
2086 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2087 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2089 else
2091 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2092 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2094 return true;
2096 case OPT_m3dnow:
2097 if (value)
2099 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2100 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2102 else
2104 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2105 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2107 return true;
2109 case OPT_m3dnowa:
2110 return false;
2112 case OPT_msse:
2113 if (value)
2115 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2116 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2118 else
2120 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2121 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2123 return true;
2125 case OPT_msse2:
2126 if (value)
2128 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2129 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2131 else
2133 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2134 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2136 return true;
2138 case OPT_msse3:
2139 if (value)
2141 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2142 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2144 else
2146 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2147 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2149 return true;
2151 case OPT_mssse3:
2152 if (value)
2154 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2155 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2157 else
2159 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2160 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2162 return true;
2164 case OPT_msse4_1:
2165 if (value)
2167 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2168 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2170 else
2172 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2173 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2175 return true;
2177 case OPT_msse4_2:
2178 if (value)
2180 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2181 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2183 else
2185 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2186 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2188 return true;
2190 case OPT_mavx:
2191 if (value)
2193 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2194 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2196 else
2198 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2199 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2201 return true;
2203 case OPT_mfma:
2204 if (value)
2206 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2207 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2209 else
2211 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2212 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2214 return true;
2216 case OPT_msse4:
2217 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2218 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2219 return true;
2221 case OPT_mno_sse4:
2222 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2223 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2224 return true;
2226 case OPT_msse4a:
2227 if (value)
2229 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2230 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2232 else
2234 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2235 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2237 return true;
2239 case OPT_msse5:
2240 if (value)
2242 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2243 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2245 else
2247 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2248 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2250 return true;
2252 case OPT_mabm:
2253 if (value)
2255 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2256 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2258 else
2260 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2261 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2263 return true;
2265 case OPT_mpopcnt:
2266 if (value)
2268 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2269 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2271 else
2273 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2274 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2276 return true;
2278 case OPT_msahf:
2279 if (value)
2281 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2282 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2284 else
2286 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2287 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2289 return true;
2291 case OPT_mcx16:
2292 if (value)
2294 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2295 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2297 else
2299 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2300 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2302 return true;
2304 case OPT_maes:
2305 if (value)
2307 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2308 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2310 else
2312 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2313 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2315 return true;
2317 case OPT_mpclmul:
2318 if (value)
2320 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2321 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2323 else
2325 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2326 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2328 return true;
2330 default:
2331 return true;
2335 /* Return a string the documents the current -m options. The caller is
2336 responsible for freeing the string. */
2338 static char *
2339 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2340 const char *fpmath, bool add_nl_p)
2342 struct ix86_target_opts
2344 const char *option; /* option string */
2345 int mask; /* isa mask options */
2348 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2349 preceding options while match those first. */
2350 static struct ix86_target_opts isa_opts[] =
2352 { "-m64", OPTION_MASK_ISA_64BIT },
2353 { "-msse5", OPTION_MASK_ISA_SSE5 },
2354 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2355 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2356 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2357 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2358 { "-msse3", OPTION_MASK_ISA_SSE3 },
2359 { "-msse2", OPTION_MASK_ISA_SSE2 },
2360 { "-msse", OPTION_MASK_ISA_SSE },
2361 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2362 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2363 { "-mmmx", OPTION_MASK_ISA_MMX },
2364 { "-mabm", OPTION_MASK_ISA_ABM },
2365 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2366 { "-maes", OPTION_MASK_ISA_AES },
2367 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2370 /* Flag options. */
2371 static struct ix86_target_opts flag_opts[] =
2373 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2374 { "-m80387", MASK_80387 },
2375 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2376 { "-malign-double", MASK_ALIGN_DOUBLE },
2377 { "-mcld", MASK_CLD },
2378 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2379 { "-mieee-fp", MASK_IEEE_FP },
2380 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2381 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2382 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2383 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2384 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2385 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2386 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2387 { "-mno-red-zone", MASK_NO_RED_ZONE },
2388 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2389 { "-mrecip", MASK_RECIP },
2390 { "-mrtd", MASK_RTD },
2391 { "-msseregparm", MASK_SSEREGPARM },
2392 { "-mstack-arg-probe", MASK_STACK_PROBE },
2393 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2396 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2398 char isa_other[40];
2399 char target_other[40];
2400 unsigned num = 0;
2401 unsigned i, j;
2402 char *ret;
2403 char *ptr;
2404 size_t len;
2405 size_t line_len;
2406 size_t sep_len;
2408 memset (opts, '\0', sizeof (opts));
2410 /* Add -march= option. */
2411 if (arch)
2413 opts[num][0] = "-march=";
2414 opts[num++][1] = arch;
2417 /* Add -mtune= option. */
2418 if (tune)
2420 opts[num][0] = "-mtune=";
2421 opts[num++][1] = tune;
2424 /* Pick out the options in isa options. */
2425 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2427 if ((isa & isa_opts[i].mask) != 0)
2429 opts[num++][0] = isa_opts[i].option;
2430 isa &= ~ isa_opts[i].mask;
2434 if (isa && add_nl_p)
2436 opts[num++][0] = isa_other;
2437 sprintf (isa_other, "(other isa: 0x%x)", isa);
2440 /* Add flag options. */
2441 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2443 if ((flags & flag_opts[i].mask) != 0)
2445 opts[num++][0] = flag_opts[i].option;
2446 flags &= ~ flag_opts[i].mask;
2450 if (flags && add_nl_p)
2452 opts[num++][0] = target_other;
2453 sprintf (target_other, "(other flags: 0x%x)", isa);
2456 /* Add -fpmath= option. */
2457 if (fpmath)
2459 opts[num][0] = "-mfpmath=";
2460 opts[num++][1] = fpmath;
2463 /* Any options? */
2464 if (num == 0)
2465 return NULL;
2467 gcc_assert (num < ARRAY_SIZE (opts));
2469 /* Size the string. */
2470 len = 0;
2471 sep_len = (add_nl_p) ? 3 : 1;
2472 for (i = 0; i < num; i++)
2474 len += sep_len;
2475 for (j = 0; j < 2; j++)
2476 if (opts[i][j])
2477 len += strlen (opts[i][j]);
2480 /* Build the string. */
2481 ret = ptr = (char *) xmalloc (len);
2482 line_len = 0;
2484 for (i = 0; i < num; i++)
2486 size_t len2[2];
2488 for (j = 0; j < 2; j++)
2489 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2491 if (i != 0)
2493 *ptr++ = ' ';
2494 line_len++;
2496 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2498 *ptr++ = '\\';
2499 *ptr++ = '\n';
2500 line_len = 0;
2504 for (j = 0; j < 2; j++)
2505 if (opts[i][j])
2507 memcpy (ptr, opts[i][j], len2[j]);
2508 ptr += len2[j];
2509 line_len += len2[j];
2513 *ptr = '\0';
2514 gcc_assert (ret + len >= ptr);
2516 return ret;
2519 /* Function that is callable from the debugger to print the current
2520 options. */
2521 void
2522 ix86_debug_options (void)
2524 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2525 ix86_arch_string, ix86_tune_string,
2526 ix86_fpmath_string, true);
2528 if (opts)
2530 fprintf (stderr, "%s\n\n", opts);
2531 free (opts);
2533 else
2534 fprintf (stderr, "<no options>\n\n");
2536 return;
2539 /* Sometimes certain combinations of command options do not make
2540 sense on a particular target machine. You can define a macro
2541 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2542 defined, is executed once just after all the command options have
2543 been parsed.
2545 Don't use this macro to turn on various extra optimizations for
2546 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2548 void
2549 override_options (bool main_args_p)
2551 int i;
2552 unsigned int ix86_arch_mask, ix86_tune_mask;
2553 const char *prefix;
2554 const char *suffix;
2555 const char *sw;
2557 /* Comes from final.c -- no real reason to change it. */
2558 #define MAX_CODE_ALIGN 16
2560 enum pta_flags
2562 PTA_SSE = 1 << 0,
2563 PTA_SSE2 = 1 << 1,
2564 PTA_SSE3 = 1 << 2,
2565 PTA_MMX = 1 << 3,
2566 PTA_PREFETCH_SSE = 1 << 4,
2567 PTA_3DNOW = 1 << 5,
2568 PTA_3DNOW_A = 1 << 6,
2569 PTA_64BIT = 1 << 7,
2570 PTA_SSSE3 = 1 << 8,
2571 PTA_CX16 = 1 << 9,
2572 PTA_POPCNT = 1 << 10,
2573 PTA_ABM = 1 << 11,
2574 PTA_SSE4A = 1 << 12,
2575 PTA_NO_SAHF = 1 << 13,
2576 PTA_SSE4_1 = 1 << 14,
2577 PTA_SSE4_2 = 1 << 15,
2578 PTA_SSE5 = 1 << 16,
2579 PTA_AES = 1 << 17,
2580 PTA_PCLMUL = 1 << 18,
2581 PTA_AVX = 1 << 19,
2582 PTA_FMA = 1 << 20
2585 static struct pta
2587 const char *const name; /* processor name or nickname. */
2588 const enum processor_type processor;
2589 const enum attr_cpu schedule;
2590 const unsigned /*enum pta_flags*/ flags;
2592 const processor_alias_table[] =
2594 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2595 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2596 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2597 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2598 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2599 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2600 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2601 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2602 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2603 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2604 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2605 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2606 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2607 PTA_MMX | PTA_SSE},
2608 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2609 PTA_MMX | PTA_SSE},
2610 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2611 PTA_MMX | PTA_SSE | PTA_SSE2},
2612 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2613 PTA_MMX |PTA_SSE | PTA_SSE2},
2614 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2615 PTA_MMX | PTA_SSE | PTA_SSE2},
2616 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2617 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2618 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2619 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2620 | PTA_CX16 | PTA_NO_SAHF},
2621 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2622 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2623 | PTA_SSSE3 | PTA_CX16},
2624 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2625 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2626 | PTA_SSSE3 | PTA_CX16},
2627 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2628 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2629 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2630 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2631 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2632 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2633 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2634 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2635 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2636 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2637 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2638 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2639 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2640 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2641 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2642 {"x86-64", PROCESSOR_K8, CPU_K8,
2643 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2644 {"k8", PROCESSOR_K8, CPU_K8,
2645 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2646 | PTA_SSE2 | PTA_NO_SAHF},
2647 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2648 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2649 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2650 {"opteron", PROCESSOR_K8, CPU_K8,
2651 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2652 | PTA_SSE2 | PTA_NO_SAHF},
2653 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2654 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2655 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2656 {"athlon64", PROCESSOR_K8, CPU_K8,
2657 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2658 | PTA_SSE2 | PTA_NO_SAHF},
2659 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2660 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2661 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2662 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2663 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2664 | PTA_SSE2 | PTA_NO_SAHF},
2665 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2666 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2667 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2668 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2669 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2670 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2671 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2672 0 /* flags are only used for -march switch. */ },
2673 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2674 PTA_64BIT /* flags are only used for -march switch. */ },
2677 int const pta_size = ARRAY_SIZE (processor_alias_table);
2679 /* Set up prefix/suffix so the error messages refer to either the command
2680 line argument, or the attribute(target). */
2681 if (main_args_p)
2683 prefix = "-m";
2684 suffix = "";
2685 sw = "switch";
2687 else
2689 prefix = "option(\"";
2690 suffix = "\")";
2691 sw = "attribute";
2694 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2695 SUBTARGET_OVERRIDE_OPTIONS;
2696 #endif
2698 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2699 SUBSUBTARGET_OVERRIDE_OPTIONS;
2700 #endif
2702 /* -fPIC is the default for x86_64. */
2703 if (TARGET_MACHO && TARGET_64BIT)
2704 flag_pic = 2;
2706 /* Set the default values for switches whose default depends on TARGET_64BIT
2707 in case they weren't overwritten by command line options. */
2708 if (TARGET_64BIT)
2710 /* Mach-O doesn't support omitting the frame pointer for now. */
2711 if (flag_omit_frame_pointer == 2)
2712 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2713 if (flag_asynchronous_unwind_tables == 2)
2714 flag_asynchronous_unwind_tables = 1;
2715 if (flag_pcc_struct_return == 2)
2716 flag_pcc_struct_return = 0;
2718 else
2720 if (flag_omit_frame_pointer == 2)
2721 flag_omit_frame_pointer = 0;
2722 if (flag_asynchronous_unwind_tables == 2)
2723 flag_asynchronous_unwind_tables = 0;
2724 if (flag_pcc_struct_return == 2)
2725 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2728 /* Need to check -mtune=generic first. */
2729 if (ix86_tune_string)
2731 if (!strcmp (ix86_tune_string, "generic")
2732 || !strcmp (ix86_tune_string, "i686")
2733 /* As special support for cross compilers we read -mtune=native
2734 as -mtune=generic. With native compilers we won't see the
2735 -mtune=native, as it was changed by the driver. */
2736 || !strcmp (ix86_tune_string, "native"))
2738 if (TARGET_64BIT)
2739 ix86_tune_string = "generic64";
2740 else
2741 ix86_tune_string = "generic32";
2743 /* If this call is for setting the option attribute, allow the
2744 generic32/generic64 that was previously set. */
2745 else if (!main_args_p
2746 && (!strcmp (ix86_tune_string, "generic32")
2747 || !strcmp (ix86_tune_string, "generic64")))
2749 else if (!strncmp (ix86_tune_string, "generic", 7))
2750 error ("bad value (%s) for %stune=%s %s",
2751 ix86_tune_string, prefix, suffix, sw);
2753 else
2755 if (ix86_arch_string)
2756 ix86_tune_string = ix86_arch_string;
2757 if (!ix86_tune_string)
2759 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2760 ix86_tune_defaulted = 1;
2763 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2764 need to use a sensible tune option. */
2765 if (!strcmp (ix86_tune_string, "generic")
2766 || !strcmp (ix86_tune_string, "x86-64")
2767 || !strcmp (ix86_tune_string, "i686"))
2769 if (TARGET_64BIT)
2770 ix86_tune_string = "generic64";
2771 else
2772 ix86_tune_string = "generic32";
2775 if (ix86_stringop_string)
2777 if (!strcmp (ix86_stringop_string, "rep_byte"))
2778 stringop_alg = rep_prefix_1_byte;
2779 else if (!strcmp (ix86_stringop_string, "libcall"))
2780 stringop_alg = libcall;
2781 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2782 stringop_alg = rep_prefix_4_byte;
2783 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2784 && TARGET_64BIT)
2785 /* rep; movq isn't available in 32-bit code. */
2786 stringop_alg = rep_prefix_8_byte;
2787 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2788 stringop_alg = loop_1_byte;
2789 else if (!strcmp (ix86_stringop_string, "loop"))
2790 stringop_alg = loop;
2791 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2792 stringop_alg = unrolled_loop;
2793 else
2794 error ("bad value (%s) for %sstringop-strategy=%s %s",
2795 ix86_stringop_string, prefix, suffix, sw);
2797 if (!strcmp (ix86_tune_string, "x86-64"))
2798 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2799 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2800 prefix, suffix, prefix, suffix, prefix, suffix);
2802 if (!ix86_arch_string)
2803 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2804 else
2805 ix86_arch_specified = 1;
2807 if (!strcmp (ix86_arch_string, "generic"))
2808 error ("generic CPU can be used only for %stune=%s %s",
2809 prefix, suffix, sw);
2810 if (!strncmp (ix86_arch_string, "generic", 7))
2811 error ("bad value (%s) for %sarch=%s %s",
2812 ix86_arch_string, prefix, suffix, sw);
2814 /* Validate -mabi= value. */
2815 if (ix86_abi_string)
2817 if (strcmp (ix86_abi_string, "sysv") == 0)
2818 ix86_abi = SYSV_ABI;
2819 else if (strcmp (ix86_abi_string, "ms") == 0)
2820 ix86_abi = MS_ABI;
2821 else
2822 error ("unknown ABI (%s) for %sabi=%s %s",
2823 ix86_abi_string, prefix, suffix, sw);
2825 else
2826 ix86_abi = DEFAULT_ABI;
2828 if (ix86_cmodel_string != 0)
2830 if (!strcmp (ix86_cmodel_string, "small"))
2831 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2832 else if (!strcmp (ix86_cmodel_string, "medium"))
2833 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2834 else if (!strcmp (ix86_cmodel_string, "large"))
2835 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2836 else if (flag_pic)
2837 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2838 else if (!strcmp (ix86_cmodel_string, "32"))
2839 ix86_cmodel = CM_32;
2840 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2841 ix86_cmodel = CM_KERNEL;
2842 else
2843 error ("bad value (%s) for %scmodel=%s %s",
2844 ix86_cmodel_string, prefix, suffix, sw);
2846 else
2848 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2849 use of rip-relative addressing. This eliminates fixups that
2850 would otherwise be needed if this object is to be placed in a
2851 DLL, and is essentially just as efficient as direct addressing. */
2852 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2853 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2854 else if (TARGET_64BIT)
2855 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2856 else
2857 ix86_cmodel = CM_32;
2859 if (ix86_asm_string != 0)
2861 if (! TARGET_MACHO
2862 && !strcmp (ix86_asm_string, "intel"))
2863 ix86_asm_dialect = ASM_INTEL;
2864 else if (!strcmp (ix86_asm_string, "att"))
2865 ix86_asm_dialect = ASM_ATT;
2866 else
2867 error ("bad value (%s) for %sasm=%s %s",
2868 ix86_asm_string, prefix, suffix, sw);
2870 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2871 error ("code model %qs not supported in the %s bit mode",
2872 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2873 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2874 sorry ("%i-bit mode not compiled in",
2875 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2877 for (i = 0; i < pta_size; i++)
2878 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2880 ix86_schedule = processor_alias_table[i].schedule;
2881 ix86_arch = processor_alias_table[i].processor;
2882 /* Default cpu tuning to the architecture. */
2883 ix86_tune = ix86_arch;
2885 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2886 error ("CPU you selected does not support x86-64 "
2887 "instruction set");
2889 if (processor_alias_table[i].flags & PTA_MMX
2890 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2891 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2892 if (processor_alias_table[i].flags & PTA_3DNOW
2893 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2894 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2895 if (processor_alias_table[i].flags & PTA_3DNOW_A
2896 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2897 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2898 if (processor_alias_table[i].flags & PTA_SSE
2899 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2900 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2901 if (processor_alias_table[i].flags & PTA_SSE2
2902 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2903 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2904 if (processor_alias_table[i].flags & PTA_SSE3
2905 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2906 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2907 if (processor_alias_table[i].flags & PTA_SSSE3
2908 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2909 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2910 if (processor_alias_table[i].flags & PTA_SSE4_1
2911 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2912 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2913 if (processor_alias_table[i].flags & PTA_SSE4_2
2914 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2915 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2916 if (processor_alias_table[i].flags & PTA_AVX
2917 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2918 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2919 if (processor_alias_table[i].flags & PTA_FMA
2920 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2921 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2922 if (processor_alias_table[i].flags & PTA_SSE4A
2923 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2924 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2925 if (processor_alias_table[i].flags & PTA_SSE5
2926 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2927 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2928 if (processor_alias_table[i].flags & PTA_ABM
2929 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2930 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2931 if (processor_alias_table[i].flags & PTA_CX16
2932 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2933 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2934 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2935 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2936 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2937 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2938 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2939 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2940 if (processor_alias_table[i].flags & PTA_AES
2941 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2942 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2943 if (processor_alias_table[i].flags & PTA_PCLMUL
2944 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2945 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2946 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2947 x86_prefetch_sse = true;
2949 break;
2952 if (i == pta_size)
2953 error ("bad value (%s) for %sarch=%s %s",
2954 ix86_arch_string, prefix, suffix, sw);
2956 ix86_arch_mask = 1u << ix86_arch;
2957 for (i = 0; i < X86_ARCH_LAST; ++i)
2958 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2960 for (i = 0; i < pta_size; i++)
2961 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2963 ix86_schedule = processor_alias_table[i].schedule;
2964 ix86_tune = processor_alias_table[i].processor;
2965 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2967 if (ix86_tune_defaulted)
2969 ix86_tune_string = "x86-64";
2970 for (i = 0; i < pta_size; i++)
2971 if (! strcmp (ix86_tune_string,
2972 processor_alias_table[i].name))
2973 break;
2974 ix86_schedule = processor_alias_table[i].schedule;
2975 ix86_tune = processor_alias_table[i].processor;
2977 else
2978 error ("CPU you selected does not support x86-64 "
2979 "instruction set");
2981 /* Intel CPUs have always interpreted SSE prefetch instructions as
2982 NOPs; so, we can enable SSE prefetch instructions even when
2983 -mtune (rather than -march) points us to a processor that has them.
2984 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2985 higher processors. */
2986 if (TARGET_CMOVE
2987 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2988 x86_prefetch_sse = true;
2989 break;
2991 if (i == pta_size)
2992 error ("bad value (%s) for %stune=%s %s",
2993 ix86_tune_string, prefix, suffix, sw);
2995 ix86_tune_mask = 1u << ix86_tune;
2996 for (i = 0; i < X86_TUNE_LAST; ++i)
2997 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2999 if (optimize_size)
3000 ix86_cost = &ix86_size_cost;
3001 else
3002 ix86_cost = processor_target_table[ix86_tune].cost;
3004 /* Arrange to set up i386_stack_locals for all functions. */
3005 init_machine_status = ix86_init_machine_status;
3007 /* Validate -mregparm= value. */
3008 if (ix86_regparm_string)
3010 if (TARGET_64BIT)
3011 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3012 i = atoi (ix86_regparm_string);
3013 if (i < 0 || i > REGPARM_MAX)
3014 error ("%sregparm=%d%s is not between 0 and %d",
3015 prefix, i, suffix, REGPARM_MAX);
3016 else
3017 ix86_regparm = i;
3019 if (TARGET_64BIT)
3020 ix86_regparm = REGPARM_MAX;
3022 /* If the user has provided any of the -malign-* options,
3023 warn and use that value only if -falign-* is not set.
3024 Remove this code in GCC 3.2 or later. */
3025 if (ix86_align_loops_string)
3027 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3028 prefix, suffix, suffix);
3029 if (align_loops == 0)
3031 i = atoi (ix86_align_loops_string);
3032 if (i < 0 || i > MAX_CODE_ALIGN)
3033 error ("%salign-loops=%d%s is not between 0 and %d",
3034 prefix, i, suffix, MAX_CODE_ALIGN);
3035 else
3036 align_loops = 1 << i;
3040 if (ix86_align_jumps_string)
3042 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3043 prefix, suffix, suffix);
3044 if (align_jumps == 0)
3046 i = atoi (ix86_align_jumps_string);
3047 if (i < 0 || i > MAX_CODE_ALIGN)
3048 error ("%salign-loops=%d%s is not between 0 and %d",
3049 prefix, i, suffix, MAX_CODE_ALIGN);
3050 else
3051 align_jumps = 1 << i;
3055 if (ix86_align_funcs_string)
3057 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3058 prefix, suffix, suffix);
3059 if (align_functions == 0)
3061 i = atoi (ix86_align_funcs_string);
3062 if (i < 0 || i > MAX_CODE_ALIGN)
3063 error ("%salign-loops=%d%s is not between 0 and %d",
3064 prefix, i, suffix, MAX_CODE_ALIGN);
3065 else
3066 align_functions = 1 << i;
3070 /* Default align_* from the processor table. */
3071 if (align_loops == 0)
3073 align_loops = processor_target_table[ix86_tune].align_loop;
3074 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3076 if (align_jumps == 0)
3078 align_jumps = processor_target_table[ix86_tune].align_jump;
3079 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3081 if (align_functions == 0)
3083 align_functions = processor_target_table[ix86_tune].align_func;
3086 /* Validate -mbranch-cost= value, or provide default. */
3087 ix86_branch_cost = ix86_cost->branch_cost;
3088 if (ix86_branch_cost_string)
3090 i = atoi (ix86_branch_cost_string);
3091 if (i < 0 || i > 5)
3092 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3093 else
3094 ix86_branch_cost = i;
3096 if (ix86_section_threshold_string)
3098 i = atoi (ix86_section_threshold_string);
3099 if (i < 0)
3100 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3101 else
3102 ix86_section_threshold = i;
3105 if (ix86_tls_dialect_string)
3107 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3108 ix86_tls_dialect = TLS_DIALECT_GNU;
3109 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3110 ix86_tls_dialect = TLS_DIALECT_GNU2;
3111 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3112 ix86_tls_dialect = TLS_DIALECT_SUN;
3113 else
3114 error ("bad value (%s) for %stls-dialect=%s %s",
3115 ix86_tls_dialect_string, prefix, suffix, sw);
3118 if (ix87_precision_string)
3120 i = atoi (ix87_precision_string);
3121 if (i != 32 && i != 64 && i != 80)
3122 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3125 if (TARGET_64BIT)
3127 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3129 /* Enable by default the SSE and MMX builtins. Do allow the user to
3130 explicitly disable any of these. In particular, disabling SSE and
3131 MMX for kernel code is extremely useful. */
3132 if (!ix86_arch_specified)
3133 ix86_isa_flags
3134 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3135 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3137 if (TARGET_RTD)
3138 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3140 else
3142 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3144 if (!ix86_arch_specified)
3145 ix86_isa_flags
3146 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3148 /* i386 ABI does not specify red zone. It still makes sense to use it
3149 when programmer takes care to stack from being destroyed. */
3150 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3151 target_flags |= MASK_NO_RED_ZONE;
3154 /* Keep nonleaf frame pointers. */
3155 if (flag_omit_frame_pointer)
3156 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3157 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3158 flag_omit_frame_pointer = 1;
3160 /* If we're doing fast math, we don't care about comparison order
3161 wrt NaNs. This lets us use a shorter comparison sequence. */
3162 if (flag_finite_math_only)
3163 target_flags &= ~MASK_IEEE_FP;
3165 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3166 since the insns won't need emulation. */
3167 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3168 target_flags &= ~MASK_NO_FANCY_MATH_387;
3170 /* Likewise, if the target doesn't have a 387, or we've specified
3171 software floating point, don't use 387 inline intrinsics. */
3172 if (!TARGET_80387)
3173 target_flags |= MASK_NO_FANCY_MATH_387;
3175 /* Turn on MMX builtins for -msse. */
3176 if (TARGET_SSE)
3178 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3179 x86_prefetch_sse = true;
3182 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3183 if (TARGET_SSE4_2 || TARGET_ABM)
3184 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3186 /* Validate -mpreferred-stack-boundary= value or default it to
3187 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3188 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3189 if (ix86_preferred_stack_boundary_string)
3191 i = atoi (ix86_preferred_stack_boundary_string);
3192 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3193 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3194 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3195 else
3196 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3199 /* Set the default value for -mstackrealign. */
3200 if (ix86_force_align_arg_pointer == -1)
3201 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3203 /* Validate -mincoming-stack-boundary= value or default it to
3204 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3205 if (ix86_force_align_arg_pointer)
3206 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3207 else
3208 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3209 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3210 if (ix86_incoming_stack_boundary_string)
3212 i = atoi (ix86_incoming_stack_boundary_string);
3213 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3214 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3215 i, TARGET_64BIT ? 4 : 2);
3216 else
3218 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3219 ix86_incoming_stack_boundary
3220 = ix86_user_incoming_stack_boundary;
3224 /* Accept -msseregparm only if at least SSE support is enabled. */
3225 if (TARGET_SSEREGPARM
3226 && ! TARGET_SSE)
3227 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3229 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3230 if (ix86_fpmath_string != 0)
3232 if (! strcmp (ix86_fpmath_string, "387"))
3233 ix86_fpmath = FPMATH_387;
3234 else if (! strcmp (ix86_fpmath_string, "sse"))
3236 if (!TARGET_SSE)
3238 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3239 ix86_fpmath = FPMATH_387;
3241 else
3242 ix86_fpmath = FPMATH_SSE;
3244 else if (! strcmp (ix86_fpmath_string, "387,sse")
3245 || ! strcmp (ix86_fpmath_string, "387+sse")
3246 || ! strcmp (ix86_fpmath_string, "sse,387")
3247 || ! strcmp (ix86_fpmath_string, "sse+387")
3248 || ! strcmp (ix86_fpmath_string, "both"))
3250 if (!TARGET_SSE)
3252 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3253 ix86_fpmath = FPMATH_387;
3255 else if (!TARGET_80387)
3257 warning (0, "387 instruction set disabled, using SSE arithmetics");
3258 ix86_fpmath = FPMATH_SSE;
3260 else
3261 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3263 else
3264 error ("bad value (%s) for %sfpmath=%s %s",
3265 ix86_fpmath_string, prefix, suffix, sw);
3268 /* If the i387 is disabled, then do not return values in it. */
3269 if (!TARGET_80387)
3270 target_flags &= ~MASK_FLOAT_RETURNS;
3272 /* Use external vectorized library in vectorizing intrinsics. */
3273 if (ix86_veclibabi_string)
3275 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3276 ix86_veclib_handler = ix86_veclibabi_svml;
3277 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3278 ix86_veclib_handler = ix86_veclibabi_acml;
3279 else
3280 error ("unknown vectorization library ABI type (%s) for "
3281 "%sveclibabi=%s %s", ix86_veclibabi_string,
3282 prefix, suffix, sw);
3285 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3286 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3287 && !optimize_size)
3288 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3290 /* ??? Unwind info is not correct around the CFG unless either a frame
3291 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3292 unwind info generation to be aware of the CFG and propagating states
3293 around edges. */
3294 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3295 || flag_exceptions || flag_non_call_exceptions)
3296 && flag_omit_frame_pointer
3297 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3299 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3300 warning (0, "unwind tables currently require either a frame pointer "
3301 "or %saccumulate-outgoing-args%s for correctness",
3302 prefix, suffix);
3303 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3306 /* If stack probes are required, the space used for large function
3307 arguments on the stack must also be probed, so enable
3308 -maccumulate-outgoing-args so this happens in the prologue. */
3309 if (TARGET_STACK_PROBE
3310 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3312 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3313 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3314 "for correctness", prefix, suffix);
3315 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3318 /* For sane SSE instruction set generation we need fcomi instruction.
3319 It is safe to enable all CMOVE instructions. */
3320 if (TARGET_SSE)
3321 TARGET_CMOVE = 1;
3323 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3325 char *p;
3326 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3327 p = strchr (internal_label_prefix, 'X');
3328 internal_label_prefix_len = p - internal_label_prefix;
3329 *p = '\0';
3332 /* When scheduling description is not available, disable scheduler pass
3333 so it won't slow down the compilation and make x87 code slower. */
3334 if (!TARGET_SCHEDULE)
3335 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3337 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3338 set_param_value ("simultaneous-prefetches",
3339 ix86_cost->simultaneous_prefetches);
3340 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3341 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3342 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3343 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3344 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3345 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3347 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3348 can be optimized to ap = __builtin_next_arg (0). */
3349 if (!TARGET_64BIT)
3350 targetm.expand_builtin_va_start = NULL;
3352 if (TARGET_64BIT)
3354 ix86_gen_leave = gen_leave_rex64;
3355 ix86_gen_pop1 = gen_popdi1;
3356 ix86_gen_add3 = gen_adddi3;
3357 ix86_gen_sub3 = gen_subdi3;
3358 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3359 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3360 ix86_gen_monitor = gen_sse3_monitor64;
3361 ix86_gen_andsp = gen_anddi3;
3363 else
3365 ix86_gen_leave = gen_leave;
3366 ix86_gen_pop1 = gen_popsi1;
3367 ix86_gen_add3 = gen_addsi3;
3368 ix86_gen_sub3 = gen_subsi3;
3369 ix86_gen_sub3_carry = gen_subsi3_carry;
3370 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3371 ix86_gen_monitor = gen_sse3_monitor;
3372 ix86_gen_andsp = gen_andsi3;
3375 #ifdef USE_IX86_CLD
3376 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3377 if (!TARGET_64BIT)
3378 target_flags |= MASK_CLD & ~target_flags_explicit;
3379 #endif
3381 /* Save the initial options in case the user does function specific options */
3382 if (main_args_p)
3383 target_option_default_node = target_option_current_node
3384 = build_target_option_node ();
3387 /* Save the current options */
3389 static void
3390 ix86_function_specific_save (struct cl_target_option *ptr)
3392 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3393 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3394 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3395 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3396 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3398 ptr->arch = ix86_arch;
3399 ptr->schedule = ix86_schedule;
3400 ptr->tune = ix86_tune;
3401 ptr->fpmath = ix86_fpmath;
3402 ptr->branch_cost = ix86_branch_cost;
3403 ptr->tune_defaulted = ix86_tune_defaulted;
3404 ptr->arch_specified = ix86_arch_specified;
3405 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3406 ptr->target_flags_explicit = target_flags_explicit;
3409 /* Restore the current options */
3411 static void
3412 ix86_function_specific_restore (struct cl_target_option *ptr)
3414 enum processor_type old_tune = ix86_tune;
3415 enum processor_type old_arch = ix86_arch;
3416 unsigned int ix86_arch_mask, ix86_tune_mask;
3417 int i;
3419 ix86_arch = ptr->arch;
3420 ix86_schedule = ptr->schedule;
3421 ix86_tune = ptr->tune;
3422 ix86_fpmath = ptr->fpmath;
3423 ix86_branch_cost = ptr->branch_cost;
3424 ix86_tune_defaulted = ptr->tune_defaulted;
3425 ix86_arch_specified = ptr->arch_specified;
3426 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3427 target_flags_explicit = ptr->target_flags_explicit;
3429 /* Recreate the arch feature tests if the arch changed */
3430 if (old_arch != ix86_arch)
3432 ix86_arch_mask = 1u << ix86_arch;
3433 for (i = 0; i < X86_ARCH_LAST; ++i)
3434 ix86_arch_features[i]
3435 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3438 /* Recreate the tune optimization tests */
3439 if (old_tune != ix86_tune)
3441 ix86_tune_mask = 1u << ix86_tune;
3442 for (i = 0; i < X86_TUNE_LAST; ++i)
3443 ix86_tune_features[i]
3444 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3448 /* Print the current options */
3450 static void
3451 ix86_function_specific_print (FILE *file, int indent,
3452 struct cl_target_option *ptr)
3454 char *target_string
3455 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3456 NULL, NULL, NULL, false);
3458 fprintf (file, "%*sarch = %d (%s)\n",
3459 indent, "",
3460 ptr->arch,
3461 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3462 ? cpu_names[ptr->arch]
3463 : "<unknown>"));
3465 fprintf (file, "%*stune = %d (%s)\n",
3466 indent, "",
3467 ptr->tune,
3468 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3469 ? cpu_names[ptr->tune]
3470 : "<unknown>"));
3472 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3473 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3474 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3475 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3477 if (target_string)
3479 fprintf (file, "%*s%s\n", indent, "", target_string);
3480 free (target_string);
3485 /* Inner function to process the attribute((target(...))), take an argument and
3486 set the current options from the argument. If we have a list, recursively go
3487 over the list. */
3489 static bool
3490 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3492 char *next_optstr;
3493 bool ret = true;
3495 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3496 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3497 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3498 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3500 enum ix86_opt_type
3502 ix86_opt_unknown,
3503 ix86_opt_yes,
3504 ix86_opt_no,
3505 ix86_opt_str,
3506 ix86_opt_isa
3509 static const struct
3511 const char *string;
3512 size_t len;
3513 enum ix86_opt_type type;
3514 int opt;
3515 int mask;
3516 } attrs[] = {
3517 /* isa options */
3518 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3519 IX86_ATTR_ISA ("abm", OPT_mabm),
3520 IX86_ATTR_ISA ("aes", OPT_maes),
3521 IX86_ATTR_ISA ("avx", OPT_mavx),
3522 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3523 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3524 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3525 IX86_ATTR_ISA ("sse", OPT_msse),
3526 IX86_ATTR_ISA ("sse2", OPT_msse2),
3527 IX86_ATTR_ISA ("sse3", OPT_msse3),
3528 IX86_ATTR_ISA ("sse4", OPT_msse4),
3529 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3530 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3531 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3532 IX86_ATTR_ISA ("sse5", OPT_msse5),
3533 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3535 /* string options */
3536 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3537 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3538 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3540 /* flag options */
3541 IX86_ATTR_YES ("cld",
3542 OPT_mcld,
3543 MASK_CLD),
3545 IX86_ATTR_NO ("fancy-math-387",
3546 OPT_mfancy_math_387,
3547 MASK_NO_FANCY_MATH_387),
3549 IX86_ATTR_NO ("fused-madd",
3550 OPT_mfused_madd,
3551 MASK_NO_FUSED_MADD),
3553 IX86_ATTR_YES ("ieee-fp",
3554 OPT_mieee_fp,
3555 MASK_IEEE_FP),
3557 IX86_ATTR_YES ("inline-all-stringops",
3558 OPT_minline_all_stringops,
3559 MASK_INLINE_ALL_STRINGOPS),
3561 IX86_ATTR_YES ("inline-stringops-dynamically",
3562 OPT_minline_stringops_dynamically,
3563 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3565 IX86_ATTR_NO ("align-stringops",
3566 OPT_mno_align_stringops,
3567 MASK_NO_ALIGN_STRINGOPS),
3569 IX86_ATTR_YES ("recip",
3570 OPT_mrecip,
3571 MASK_RECIP),
3575 /* If this is a list, recurse to get the options. */
3576 if (TREE_CODE (args) == TREE_LIST)
3578 bool ret = true;
3580 for (; args; args = TREE_CHAIN (args))
3581 if (TREE_VALUE (args)
3582 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3583 ret = false;
3585 return ret;
3588 else if (TREE_CODE (args) != STRING_CST)
3589 gcc_unreachable ();
3591 /* Handle multiple arguments separated by commas. */
3592 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3594 while (next_optstr && *next_optstr != '\0')
3596 char *p = next_optstr;
3597 char *orig_p = p;
3598 char *comma = strchr (next_optstr, ',');
3599 const char *opt_string;
3600 size_t len, opt_len;
3601 int opt;
3602 bool opt_set_p;
3603 char ch;
3604 unsigned i;
3605 enum ix86_opt_type type = ix86_opt_unknown;
3606 int mask = 0;
3608 if (comma)
3610 *comma = '\0';
3611 len = comma - next_optstr;
3612 next_optstr = comma + 1;
3614 else
3616 len = strlen (p);
3617 next_optstr = NULL;
3620 /* Recognize no-xxx. */
3621 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3623 opt_set_p = false;
3624 p += 3;
3625 len -= 3;
3627 else
3628 opt_set_p = true;
3630 /* Find the option. */
3631 ch = *p;
3632 opt = N_OPTS;
3633 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3635 type = attrs[i].type;
3636 opt_len = attrs[i].len;
3637 if (ch == attrs[i].string[0]
3638 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3639 && memcmp (p, attrs[i].string, opt_len) == 0)
3641 opt = attrs[i].opt;
3642 mask = attrs[i].mask;
3643 opt_string = attrs[i].string;
3644 break;
3648 /* Process the option. */
3649 if (opt == N_OPTS)
3651 error ("attribute(target(\"%s\")) is unknown", orig_p);
3652 ret = false;
3655 else if (type == ix86_opt_isa)
3656 ix86_handle_option (opt, p, opt_set_p);
3658 else if (type == ix86_opt_yes || type == ix86_opt_no)
3660 if (type == ix86_opt_no)
3661 opt_set_p = !opt_set_p;
3663 if (opt_set_p)
3664 target_flags |= mask;
3665 else
3666 target_flags &= ~mask;
3669 else if (type == ix86_opt_str)
3671 if (p_strings[opt])
3673 error ("option(\"%s\") was already specified", opt_string);
3674 ret = false;
3676 else
3677 p_strings[opt] = xstrdup (p + opt_len);
3680 else
3681 gcc_unreachable ();
3684 return ret;
3687 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3689 tree
3690 ix86_valid_target_attribute_tree (tree args)
3692 const char *orig_arch_string = ix86_arch_string;
3693 const char *orig_tune_string = ix86_tune_string;
3694 const char *orig_fpmath_string = ix86_fpmath_string;
3695 int orig_tune_defaulted = ix86_tune_defaulted;
3696 int orig_arch_specified = ix86_arch_specified;
3697 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3698 tree t = NULL_TREE;
3699 int i;
3700 struct cl_target_option *def
3701 = TREE_TARGET_OPTION (target_option_default_node);
3703 /* Process each of the options on the chain. */
3704 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3705 return NULL_TREE;
3707 /* If the changed options are different from the default, rerun override_options,
3708 and then save the options away. The string options are are attribute options,
3709 and will be undone when we copy the save structure. */
3710 if (ix86_isa_flags != def->ix86_isa_flags
3711 || target_flags != def->target_flags
3712 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3713 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3714 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3716 /* If we are using the default tune= or arch=, undo the string assigned,
3717 and use the default. */
3718 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3719 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3720 else if (!orig_arch_specified)
3721 ix86_arch_string = NULL;
3723 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3724 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3725 else if (orig_tune_defaulted)
3726 ix86_tune_string = NULL;
3728 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3729 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3730 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3731 else if (!TARGET_64BIT && TARGET_SSE)
3732 ix86_fpmath_string = "sse,387";
3734 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3735 override_options (false);
3737 /* Add any builtin functions with the new isa if any. */
3738 ix86_add_new_builtins (ix86_isa_flags);
3740 /* Save the current options unless we are validating options for
3741 #pragma. */
3742 t = build_target_option_node ();
3744 ix86_arch_string = orig_arch_string;
3745 ix86_tune_string = orig_tune_string;
3746 ix86_fpmath_string = orig_fpmath_string;
3748 /* Free up memory allocated to hold the strings */
3749 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3750 if (option_strings[i])
3751 free (option_strings[i]);
3754 return t;
3757 /* Hook to validate attribute((target("string"))). */
3759 static bool
3760 ix86_valid_target_attribute_p (tree fndecl,
3761 tree ARG_UNUSED (name),
3762 tree args,
3763 int ARG_UNUSED (flags))
3765 struct cl_target_option cur_target;
3766 bool ret = true;
3767 tree old_optimize = build_optimization_node ();
3768 tree new_target, new_optimize;
3769 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3771 /* If the function changed the optimization levels as well as setting target
3772 options, start with the optimizations specified. */
3773 if (func_optimize && func_optimize != old_optimize)
3774 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3776 /* The target attributes may also change some optimization flags, so update
3777 the optimization options if necessary. */
3778 cl_target_option_save (&cur_target);
3779 new_target = ix86_valid_target_attribute_tree (args);
3780 new_optimize = build_optimization_node ();
3782 if (!new_target)
3783 ret = false;
3785 else if (fndecl)
3787 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3789 if (old_optimize != new_optimize)
3790 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3793 cl_target_option_restore (&cur_target);
3795 if (old_optimize != new_optimize)
3796 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3798 return ret;
3802 /* Hook to determine if one function can safely inline another. */
3804 static bool
3805 ix86_can_inline_p (tree caller, tree callee)
3807 bool ret = false;
3808 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3809 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3811 /* If callee has no option attributes, then it is ok to inline. */
3812 if (!callee_tree)
3813 ret = true;
3815 /* If caller has no option attributes, but callee does then it is not ok to
3816 inline. */
3817 else if (!caller_tree)
3818 ret = false;
3820 else
3822 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3823 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3825 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3826 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3827 function. */
3828 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3829 != callee_opts->ix86_isa_flags)
3830 ret = false;
3832 /* See if we have the same non-isa options. */
3833 else if (caller_opts->target_flags != callee_opts->target_flags)
3834 ret = false;
3836 /* See if arch, tune, etc. are the same. */
3837 else if (caller_opts->arch != callee_opts->arch)
3838 ret = false;
3840 else if (caller_opts->tune != callee_opts->tune)
3841 ret = false;
3843 else if (caller_opts->fpmath != callee_opts->fpmath)
3844 ret = false;
3846 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3847 ret = false;
3849 else
3850 ret = true;
3853 return ret;
3857 /* Remember the last target of ix86_set_current_function. */
3858 static GTY(()) tree ix86_previous_fndecl;
3860 /* Establish appropriate back-end context for processing the function
3861 FNDECL. The argument might be NULL to indicate processing at top
3862 level, outside of any function scope. */
3863 static void
3864 ix86_set_current_function (tree fndecl)
3866 /* Only change the context if the function changes. This hook is called
3867 several times in the course of compiling a function, and we don't want to
3868 slow things down too much or call target_reinit when it isn't safe. */
3869 if (fndecl && fndecl != ix86_previous_fndecl)
3871 tree old_tree = (ix86_previous_fndecl
3872 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3873 : NULL_TREE);
3875 tree new_tree = (fndecl
3876 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3877 : NULL_TREE);
3879 ix86_previous_fndecl = fndecl;
3880 if (old_tree == new_tree)
3883 else if (new_tree)
3885 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3886 target_reinit ();
3889 else if (old_tree)
3891 struct cl_target_option *def
3892 = TREE_TARGET_OPTION (target_option_current_node);
3894 cl_target_option_restore (def);
3895 target_reinit ();
3901 /* Return true if this goes in large data/bss. */
3903 static bool
3904 ix86_in_large_data_p (tree exp)
3906 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3907 return false;
3909 /* Functions are never large data. */
3910 if (TREE_CODE (exp) == FUNCTION_DECL)
3911 return false;
3913 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3915 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3916 if (strcmp (section, ".ldata") == 0
3917 || strcmp (section, ".lbss") == 0)
3918 return true;
3919 return false;
3921 else
3923 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3925 /* If this is an incomplete type with size 0, then we can't put it
3926 in data because it might be too big when completed. */
3927 if (!size || size > ix86_section_threshold)
3928 return true;
3931 return false;
3934 /* Switch to the appropriate section for output of DECL.
3935 DECL is either a `VAR_DECL' node or a constant of some sort.
3936 RELOC indicates whether forming the initial value of DECL requires
3937 link-time relocations. */
3939 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3940 ATTRIBUTE_UNUSED;
3942 static section *
3943 x86_64_elf_select_section (tree decl, int reloc,
3944 unsigned HOST_WIDE_INT align)
3946 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3947 && ix86_in_large_data_p (decl))
3949 const char *sname = NULL;
3950 unsigned int flags = SECTION_WRITE;
3951 switch (categorize_decl_for_section (decl, reloc))
3953 case SECCAT_DATA:
3954 sname = ".ldata";
3955 break;
3956 case SECCAT_DATA_REL:
3957 sname = ".ldata.rel";
3958 break;
3959 case SECCAT_DATA_REL_LOCAL:
3960 sname = ".ldata.rel.local";
3961 break;
3962 case SECCAT_DATA_REL_RO:
3963 sname = ".ldata.rel.ro";
3964 break;
3965 case SECCAT_DATA_REL_RO_LOCAL:
3966 sname = ".ldata.rel.ro.local";
3967 break;
3968 case SECCAT_BSS:
3969 sname = ".lbss";
3970 flags |= SECTION_BSS;
3971 break;
3972 case SECCAT_RODATA:
3973 case SECCAT_RODATA_MERGE_STR:
3974 case SECCAT_RODATA_MERGE_STR_INIT:
3975 case SECCAT_RODATA_MERGE_CONST:
3976 sname = ".lrodata";
3977 flags = 0;
3978 break;
3979 case SECCAT_SRODATA:
3980 case SECCAT_SDATA:
3981 case SECCAT_SBSS:
3982 gcc_unreachable ();
3983 case SECCAT_TEXT:
3984 case SECCAT_TDATA:
3985 case SECCAT_TBSS:
3986 /* We don't split these for medium model. Place them into
3987 default sections and hope for best. */
3988 break;
3989 case SECCAT_EMUTLS_VAR:
3990 case SECCAT_EMUTLS_TMPL:
3991 gcc_unreachable ();
3993 if (sname)
3995 /* We might get called with string constants, but get_named_section
3996 doesn't like them as they are not DECLs. Also, we need to set
3997 flags in that case. */
3998 if (!DECL_P (decl))
3999 return get_section (sname, flags, NULL);
4000 return get_named_section (decl, sname, reloc);
4003 return default_elf_select_section (decl, reloc, align);
4006 /* Build up a unique section name, expressed as a
4007 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4008 RELOC indicates whether the initial value of EXP requires
4009 link-time relocations. */
4011 static void ATTRIBUTE_UNUSED
4012 x86_64_elf_unique_section (tree decl, int reloc)
4014 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4015 && ix86_in_large_data_p (decl))
4017 const char *prefix = NULL;
4018 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4019 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4021 switch (categorize_decl_for_section (decl, reloc))
4023 case SECCAT_DATA:
4024 case SECCAT_DATA_REL:
4025 case SECCAT_DATA_REL_LOCAL:
4026 case SECCAT_DATA_REL_RO:
4027 case SECCAT_DATA_REL_RO_LOCAL:
4028 prefix = one_only ? ".ld" : ".ldata";
4029 break;
4030 case SECCAT_BSS:
4031 prefix = one_only ? ".lb" : ".lbss";
4032 break;
4033 case SECCAT_RODATA:
4034 case SECCAT_RODATA_MERGE_STR:
4035 case SECCAT_RODATA_MERGE_STR_INIT:
4036 case SECCAT_RODATA_MERGE_CONST:
4037 prefix = one_only ? ".lr" : ".lrodata";
4038 break;
4039 case SECCAT_SRODATA:
4040 case SECCAT_SDATA:
4041 case SECCAT_SBSS:
4042 gcc_unreachable ();
4043 case SECCAT_TEXT:
4044 case SECCAT_TDATA:
4045 case SECCAT_TBSS:
4046 /* We don't split these for medium model. Place them into
4047 default sections and hope for best. */
4048 break;
4049 case SECCAT_EMUTLS_VAR:
4050 prefix = targetm.emutls.var_section;
4051 break;
4052 case SECCAT_EMUTLS_TMPL:
4053 prefix = targetm.emutls.tmpl_section;
4054 break;
4056 if (prefix)
4058 const char *name, *linkonce;
4059 char *string;
4061 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4062 name = targetm.strip_name_encoding (name);
4064 /* If we're using one_only, then there needs to be a .gnu.linkonce
4065 prefix to the section name. */
4066 linkonce = one_only ? ".gnu.linkonce" : "";
4068 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4070 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4071 return;
4074 default_unique_section (decl, reloc);
4077 #ifdef COMMON_ASM_OP
4078 /* This says how to output assembler code to declare an
4079 uninitialized external linkage data object.
4081 For medium model x86-64 we need to use .largecomm opcode for
4082 large objects. */
4083 void
4084 x86_elf_aligned_common (FILE *file,
4085 const char *name, unsigned HOST_WIDE_INT size,
4086 int align)
4088 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4089 && size > (unsigned int)ix86_section_threshold)
4090 fprintf (file, ".largecomm\t");
4091 else
4092 fprintf (file, "%s", COMMON_ASM_OP);
4093 assemble_name (file, name);
4094 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4095 size, align / BITS_PER_UNIT);
4097 #endif
4099 /* Utility function for targets to use in implementing
4100 ASM_OUTPUT_ALIGNED_BSS. */
4102 void
4103 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4104 const char *name, unsigned HOST_WIDE_INT size,
4105 int align)
4107 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4108 && size > (unsigned int)ix86_section_threshold)
4109 switch_to_section (get_named_section (decl, ".lbss", 0));
4110 else
4111 switch_to_section (bss_section);
4112 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4113 #ifdef ASM_DECLARE_OBJECT_NAME
4114 last_assemble_variable_decl = decl;
4115 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4116 #else
4117 /* Standard thing is just output label for the object. */
4118 ASM_OUTPUT_LABEL (file, name);
4119 #endif /* ASM_DECLARE_OBJECT_NAME */
4120 ASM_OUTPUT_SKIP (file, size ? size : 1);
4123 void
4124 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4126 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4127 make the problem with not enough registers even worse. */
4128 #ifdef INSN_SCHEDULING
4129 if (level > 1)
4130 flag_schedule_insns = 0;
4131 #endif
4133 if (TARGET_MACHO)
4134 /* The Darwin libraries never set errno, so we might as well
4135 avoid calling them when that's the only reason we would. */
4136 flag_errno_math = 0;
4138 /* The default values of these switches depend on the TARGET_64BIT
4139 that is not known at this moment. Mark these values with 2 and
4140 let user the to override these. In case there is no command line option
4141 specifying them, we will set the defaults in override_options. */
4142 if (optimize >= 1)
4143 flag_omit_frame_pointer = 2;
4144 flag_pcc_struct_return = 2;
4145 flag_asynchronous_unwind_tables = 2;
4146 flag_vect_cost_model = 1;
4147 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4148 SUBTARGET_OPTIMIZATION_OPTIONS;
4149 #endif
4152 /* Decide whether we can make a sibling call to a function. DECL is the
4153 declaration of the function being targeted by the call and EXP is the
4154 CALL_EXPR representing the call. */
4156 static bool
4157 ix86_function_ok_for_sibcall (tree decl, tree exp)
4159 tree func;
4160 rtx a, b;
4162 /* If we are generating position-independent code, we cannot sibcall
4163 optimize any indirect call, or a direct call to a global function,
4164 as the PLT requires %ebx be live. */
4165 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4166 return false;
4168 if (decl)
4169 func = decl;
4170 else
4172 func = TREE_TYPE (CALL_EXPR_FN (exp));
4173 if (POINTER_TYPE_P (func))
4174 func = TREE_TYPE (func);
4177 /* Check that the return value locations are the same. Like
4178 if we are returning floats on the 80387 register stack, we cannot
4179 make a sibcall from a function that doesn't return a float to a
4180 function that does or, conversely, from a function that does return
4181 a float to a function that doesn't; the necessary stack adjustment
4182 would not be executed. This is also the place we notice
4183 differences in the return value ABI. Note that it is ok for one
4184 of the functions to have void return type as long as the return
4185 value of the other is passed in a register. */
4186 a = ix86_function_value (TREE_TYPE (exp), func, false);
4187 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4188 cfun->decl, false);
4189 if (STACK_REG_P (a) || STACK_REG_P (b))
4191 if (!rtx_equal_p (a, b))
4192 return false;
4194 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4196 else if (!rtx_equal_p (a, b))
4197 return false;
4199 /* If this call is indirect, we'll need to be able to use a call-clobbered
4200 register for the address of the target function. Make sure that all
4201 such registers are not used for passing parameters. */
4202 if (!decl && !TARGET_64BIT)
4204 tree type;
4206 /* We're looking at the CALL_EXPR, we need the type of the function. */
4207 type = CALL_EXPR_FN (exp); /* pointer expression */
4208 type = TREE_TYPE (type); /* pointer type */
4209 type = TREE_TYPE (type); /* function type */
4211 if (ix86_function_regparm (type, NULL) >= 3)
4213 /* ??? Need to count the actual number of registers to be used,
4214 not the possible number of registers. Fix later. */
4215 return false;
4219 /* Dllimport'd functions are also called indirectly. */
4220 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4221 && !TARGET_64BIT
4222 && decl && DECL_DLLIMPORT_P (decl)
4223 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4224 return false;
4226 /* If we need to align the outgoing stack, then sibcalling would
4227 unalign the stack, which may break the called function. */
4228 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4229 return false;
4231 /* Otherwise okay. That also includes certain types of indirect calls. */
4232 return true;
4235 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4236 calling convention attributes;
4237 arguments as in struct attribute_spec.handler. */
4239 static tree
4240 ix86_handle_cconv_attribute (tree *node, tree name,
4241 tree args,
4242 int flags ATTRIBUTE_UNUSED,
4243 bool *no_add_attrs)
4245 if (TREE_CODE (*node) != FUNCTION_TYPE
4246 && TREE_CODE (*node) != METHOD_TYPE
4247 && TREE_CODE (*node) != FIELD_DECL
4248 && TREE_CODE (*node) != TYPE_DECL)
4250 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4251 IDENTIFIER_POINTER (name));
4252 *no_add_attrs = true;
4253 return NULL_TREE;
4256 /* Can combine regparm with all attributes but fastcall. */
4257 if (is_attribute_p ("regparm", name))
4259 tree cst;
4261 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4263 error ("fastcall and regparm attributes are not compatible");
4266 cst = TREE_VALUE (args);
4267 if (TREE_CODE (cst) != INTEGER_CST)
4269 warning (OPT_Wattributes,
4270 "%qs attribute requires an integer constant argument",
4271 IDENTIFIER_POINTER (name));
4272 *no_add_attrs = true;
4274 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4276 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4277 IDENTIFIER_POINTER (name), REGPARM_MAX);
4278 *no_add_attrs = true;
4281 return NULL_TREE;
4284 if (TARGET_64BIT)
4286 /* Do not warn when emulating the MS ABI. */
4287 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4288 warning (OPT_Wattributes, "%qs attribute ignored",
4289 IDENTIFIER_POINTER (name));
4290 *no_add_attrs = true;
4291 return NULL_TREE;
4294 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4295 if (is_attribute_p ("fastcall", name))
4297 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4299 error ("fastcall and cdecl attributes are not compatible");
4301 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4303 error ("fastcall and stdcall attributes are not compatible");
4305 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4307 error ("fastcall and regparm attributes are not compatible");
4311 /* Can combine stdcall with fastcall (redundant), regparm and
4312 sseregparm. */
4313 else if (is_attribute_p ("stdcall", name))
4315 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4317 error ("stdcall and cdecl attributes are not compatible");
4319 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4321 error ("stdcall and fastcall attributes are not compatible");
4325 /* Can combine cdecl with regparm and sseregparm. */
4326 else if (is_attribute_p ("cdecl", name))
4328 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4330 error ("stdcall and cdecl attributes are not compatible");
4332 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4334 error ("fastcall and cdecl attributes are not compatible");
4338 /* Can combine sseregparm with all attributes. */
4340 return NULL_TREE;
4343 /* Return 0 if the attributes for two types are incompatible, 1 if they
4344 are compatible, and 2 if they are nearly compatible (which causes a
4345 warning to be generated). */
4347 static int
4348 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4350 /* Check for mismatch of non-default calling convention. */
4351 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4353 if (TREE_CODE (type1) != FUNCTION_TYPE
4354 && TREE_CODE (type1) != METHOD_TYPE)
4355 return 1;
4357 /* Check for mismatched fastcall/regparm types. */
4358 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4359 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4360 || (ix86_function_regparm (type1, NULL)
4361 != ix86_function_regparm (type2, NULL)))
4362 return 0;
4364 /* Check for mismatched sseregparm types. */
4365 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4366 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4367 return 0;
4369 /* Check for mismatched return types (cdecl vs stdcall). */
4370 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4371 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4372 return 0;
4374 return 1;
4377 /* Return the regparm value for a function with the indicated TYPE and DECL.
4378 DECL may be NULL when calling function indirectly
4379 or considering a libcall. */
4381 static int
4382 ix86_function_regparm (const_tree type, const_tree decl)
4384 tree attr;
4385 int regparm;
4387 static bool error_issued;
4389 if (TARGET_64BIT)
4390 return (ix86_function_type_abi (type) == SYSV_ABI
4391 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4393 regparm = ix86_regparm;
4394 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4395 if (attr)
4397 regparm
4398 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4400 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4402 /* We can't use regparm(3) for nested functions because
4403 these pass static chain pointer in %ecx register. */
4404 if (!error_issued && regparm == 3
4405 && decl_function_context (decl)
4406 && !DECL_NO_STATIC_CHAIN (decl))
4408 error ("nested functions are limited to 2 register parameters");
4409 error_issued = true;
4410 return 0;
4414 return regparm;
4417 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4418 return 2;
4420 /* Use register calling convention for local functions when possible. */
4421 if (decl
4422 && TREE_CODE (decl) == FUNCTION_DECL
4423 && optimize
4424 && !profile_flag)
4426 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4427 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4428 if (i && i->local)
4430 int local_regparm, globals = 0, regno;
4431 struct function *f;
4433 /* Make sure no regparm register is taken by a
4434 fixed register variable. */
4435 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4436 if (fixed_regs[local_regparm])
4437 break;
4439 /* We can't use regparm(3) for nested functions as these use
4440 static chain pointer in third argument. */
4441 if (local_regparm == 3
4442 && decl_function_context (decl)
4443 && !DECL_NO_STATIC_CHAIN (decl))
4444 local_regparm = 2;
4446 /* If the function realigns its stackpointer, the prologue will
4447 clobber %ecx. If we've already generated code for the callee,
4448 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4449 scanning the attributes for the self-realigning property. */
4450 f = DECL_STRUCT_FUNCTION (decl);
4451 /* Since current internal arg pointer won't conflict with
4452 parameter passing regs, so no need to change stack
4453 realignment and adjust regparm number.
4455 Each fixed register usage increases register pressure,
4456 so less registers should be used for argument passing.
4457 This functionality can be overriden by an explicit
4458 regparm value. */
4459 for (regno = 0; regno <= DI_REG; regno++)
4460 if (fixed_regs[regno])
4461 globals++;
4463 local_regparm
4464 = globals < local_regparm ? local_regparm - globals : 0;
4466 if (local_regparm > regparm)
4467 regparm = local_regparm;
4471 return regparm;
4474 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4475 DFmode (2) arguments in SSE registers for a function with the
4476 indicated TYPE and DECL. DECL may be NULL when calling function
4477 indirectly or considering a libcall. Otherwise return 0. */
4479 static int
4480 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4482 gcc_assert (!TARGET_64BIT);
4484 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4485 by the sseregparm attribute. */
4486 if (TARGET_SSEREGPARM
4487 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4489 if (!TARGET_SSE)
4491 if (warn)
4493 if (decl)
4494 error ("Calling %qD with attribute sseregparm without "
4495 "SSE/SSE2 enabled", decl);
4496 else
4497 error ("Calling %qT with attribute sseregparm without "
4498 "SSE/SSE2 enabled", type);
4500 return 0;
4503 return 2;
4506 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4507 (and DFmode for SSE2) arguments in SSE registers. */
4508 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4510 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4511 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4512 if (i && i->local)
4513 return TARGET_SSE2 ? 2 : 1;
4516 return 0;
4519 /* Return true if EAX is live at the start of the function. Used by
4520 ix86_expand_prologue to determine if we need special help before
4521 calling allocate_stack_worker. */
4523 static bool
4524 ix86_eax_live_at_start_p (void)
4526 /* Cheat. Don't bother working forward from ix86_function_regparm
4527 to the function type to whether an actual argument is located in
4528 eax. Instead just look at cfg info, which is still close enough
4529 to correct at this point. This gives false positives for broken
4530 functions that might use uninitialized data that happens to be
4531 allocated in eax, but who cares? */
4532 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4535 /* Value is the number of bytes of arguments automatically
4536 popped when returning from a subroutine call.
4537 FUNDECL is the declaration node of the function (as a tree),
4538 FUNTYPE is the data type of the function (as a tree),
4539 or for a library call it is an identifier node for the subroutine name.
4540 SIZE is the number of bytes of arguments passed on the stack.
4542 On the 80386, the RTD insn may be used to pop them if the number
4543 of args is fixed, but if the number is variable then the caller
4544 must pop them all. RTD can't be used for library calls now
4545 because the library is compiled with the Unix compiler.
4546 Use of RTD is a selectable option, since it is incompatible with
4547 standard Unix calling sequences. If the option is not selected,
4548 the caller must always pop the args.
4550 The attribute stdcall is equivalent to RTD on a per module basis. */
4553 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4555 int rtd;
4557 /* None of the 64-bit ABIs pop arguments. */
4558 if (TARGET_64BIT)
4559 return 0;
4561 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4563 /* Cdecl functions override -mrtd, and never pop the stack. */
4564 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4566 /* Stdcall and fastcall functions will pop the stack if not
4567 variable args. */
4568 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4569 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4570 rtd = 1;
4572 if (rtd && ! stdarg_p (funtype))
4573 return size;
4576 /* Lose any fake structure return argument if it is passed on the stack. */
4577 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4578 && !KEEP_AGGREGATE_RETURN_POINTER)
4580 int nregs = ix86_function_regparm (funtype, fundecl);
4581 if (nregs == 0)
4582 return GET_MODE_SIZE (Pmode);
4585 return 0;
4588 /* Argument support functions. */
4590 /* Return true when register may be used to pass function parameters. */
4591 bool
4592 ix86_function_arg_regno_p (int regno)
4594 int i;
4595 const int *parm_regs;
4597 if (!TARGET_64BIT)
4599 if (TARGET_MACHO)
4600 return (regno < REGPARM_MAX
4601 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4602 else
4603 return (regno < REGPARM_MAX
4604 || (TARGET_MMX && MMX_REGNO_P (regno)
4605 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4606 || (TARGET_SSE && SSE_REGNO_P (regno)
4607 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4610 if (TARGET_MACHO)
4612 if (SSE_REGNO_P (regno) && TARGET_SSE)
4613 return true;
4615 else
4617 if (TARGET_SSE && SSE_REGNO_P (regno)
4618 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4619 return true;
4622 /* TODO: The function should depend on current function ABI but
4623 builtins.c would need updating then. Therefore we use the
4624 default ABI. */
4626 /* RAX is used as hidden argument to va_arg functions. */
4627 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4628 return true;
4630 if (ix86_abi == MS_ABI)
4631 parm_regs = x86_64_ms_abi_int_parameter_registers;
4632 else
4633 parm_regs = x86_64_int_parameter_registers;
4634 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4635 : X86_64_REGPARM_MAX); i++)
4636 if (regno == parm_regs[i])
4637 return true;
4638 return false;
4641 /* Return if we do not know how to pass TYPE solely in registers. */
4643 static bool
4644 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4646 if (must_pass_in_stack_var_size_or_pad (mode, type))
4647 return true;
4649 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4650 The layout_type routine is crafty and tries to trick us into passing
4651 currently unsupported vector types on the stack by using TImode. */
4652 return (!TARGET_64BIT && mode == TImode
4653 && type && TREE_CODE (type) != VECTOR_TYPE);
4656 /* It returns the size, in bytes, of the area reserved for arguments passed
4657 in registers for the function represented by fndecl dependent to the used
4658 abi format. */
4660 ix86_reg_parm_stack_space (const_tree fndecl)
4662 enum calling_abi call_abi = SYSV_ABI;
4663 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4664 call_abi = ix86_function_abi (fndecl);
4665 else
4666 call_abi = ix86_function_type_abi (fndecl);
4667 if (call_abi == MS_ABI)
4668 return 32;
4669 return 0;
4672 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4673 call abi used. */
4674 enum calling_abi
4675 ix86_function_type_abi (const_tree fntype)
4677 if (TARGET_64BIT && fntype != NULL)
4679 enum calling_abi abi = ix86_abi;
4680 if (abi == SYSV_ABI)
4682 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4683 abi = MS_ABI;
4685 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4686 abi = SYSV_ABI;
4687 return abi;
4689 return ix86_abi;
4692 static enum calling_abi
4693 ix86_function_abi (const_tree fndecl)
4695 if (! fndecl)
4696 return ix86_abi;
4697 return ix86_function_type_abi (TREE_TYPE (fndecl));
4700 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4701 call abi used. */
4702 enum calling_abi
4703 ix86_cfun_abi (void)
4705 if (! cfun || ! TARGET_64BIT)
4706 return ix86_abi;
4707 return cfun->machine->call_abi;
4710 /* regclass.c */
4711 extern void init_regs (void);
4713 /* Implementation of call abi switching target hook. Specific to FNDECL
4714 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4715 for more details. */
4716 void
4717 ix86_call_abi_override (const_tree fndecl)
4719 if (fndecl == NULL_TREE)
4720 cfun->machine->call_abi = ix86_abi;
4721 else
4722 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4725 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4726 re-initialization of init_regs each time we switch function context since
4727 this is needed only during RTL expansion. */
4728 static void
4729 ix86_maybe_switch_abi (void)
4731 if (TARGET_64BIT &&
4732 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4733 reinit_regs ();
4736 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4737 for a call to a function whose data type is FNTYPE.
4738 For a library call, FNTYPE is 0. */
4740 void
4741 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4742 tree fntype, /* tree ptr for function decl */
4743 rtx libname, /* SYMBOL_REF of library name or 0 */
4744 tree fndecl)
4746 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4747 memset (cum, 0, sizeof (*cum));
4749 if (fndecl)
4750 cum->call_abi = ix86_function_abi (fndecl);
4751 else
4752 cum->call_abi = ix86_function_type_abi (fntype);
4753 /* Set up the number of registers to use for passing arguments. */
4755 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4756 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4757 cum->nregs = ix86_regparm;
4758 if (TARGET_64BIT)
4760 if (cum->call_abi != ix86_abi)
4761 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4762 : X64_REGPARM_MAX;
4764 if (TARGET_SSE)
4766 cum->sse_nregs = SSE_REGPARM_MAX;
4767 if (TARGET_64BIT)
4769 if (cum->call_abi != ix86_abi)
4770 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4771 : X64_SSE_REGPARM_MAX;
4774 if (TARGET_MMX)
4775 cum->mmx_nregs = MMX_REGPARM_MAX;
4776 cum->warn_avx = true;
4777 cum->warn_sse = true;
4778 cum->warn_mmx = true;
4780 /* Because type might mismatch in between caller and callee, we need to
4781 use actual type of function for local calls.
4782 FIXME: cgraph_analyze can be told to actually record if function uses
4783 va_start so for local functions maybe_vaarg can be made aggressive
4784 helping K&R code.
4785 FIXME: once typesytem is fixed, we won't need this code anymore. */
4786 if (i && i->local)
4787 fntype = TREE_TYPE (fndecl);
4788 cum->maybe_vaarg = (fntype
4789 ? (!prototype_p (fntype) || stdarg_p (fntype))
4790 : !libname);
4792 if (!TARGET_64BIT)
4794 /* If there are variable arguments, then we won't pass anything
4795 in registers in 32-bit mode. */
4796 if (stdarg_p (fntype))
4798 cum->nregs = 0;
4799 cum->sse_nregs = 0;
4800 cum->mmx_nregs = 0;
4801 cum->warn_avx = 0;
4802 cum->warn_sse = 0;
4803 cum->warn_mmx = 0;
4804 return;
4807 /* Use ecx and edx registers if function has fastcall attribute,
4808 else look for regparm information. */
4809 if (fntype)
4811 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4813 cum->nregs = 2;
4814 cum->fastcall = 1;
4816 else
4817 cum->nregs = ix86_function_regparm (fntype, fndecl);
4820 /* Set up the number of SSE registers used for passing SFmode
4821 and DFmode arguments. Warn for mismatching ABI. */
4822 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4826 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4827 But in the case of vector types, it is some vector mode.
4829 When we have only some of our vector isa extensions enabled, then there
4830 are some modes for which vector_mode_supported_p is false. For these
4831 modes, the generic vector support in gcc will choose some non-vector mode
4832 in order to implement the type. By computing the natural mode, we'll
4833 select the proper ABI location for the operand and not depend on whatever
4834 the middle-end decides to do with these vector types.
4836 The midde-end can't deal with the vector types > 16 bytes. In this
4837 case, we return the original mode and warn ABI change if CUM isn't
4838 NULL. */
4840 static enum machine_mode
4841 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4843 enum machine_mode mode = TYPE_MODE (type);
4845 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4847 HOST_WIDE_INT size = int_size_in_bytes (type);
4848 if ((size == 8 || size == 16 || size == 32)
4849 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4850 && TYPE_VECTOR_SUBPARTS (type) > 1)
4852 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4854 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4855 mode = MIN_MODE_VECTOR_FLOAT;
4856 else
4857 mode = MIN_MODE_VECTOR_INT;
4859 /* Get the mode which has this inner mode and number of units. */
4860 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4861 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4862 && GET_MODE_INNER (mode) == innermode)
4864 if (size == 32 && !TARGET_AVX)
4866 static bool warnedavx;
4868 if (cum
4869 && !warnedavx
4870 && cum->warn_avx)
4872 warnedavx = true;
4873 warning (0, "AVX vector argument without AVX "
4874 "enabled changes the ABI");
4876 return TYPE_MODE (type);
4878 else
4879 return mode;
4882 gcc_unreachable ();
4886 return mode;
4889 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4890 this may not agree with the mode that the type system has chosen for the
4891 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4892 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4894 static rtx
4895 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4896 unsigned int regno)
4898 rtx tmp;
4900 if (orig_mode != BLKmode)
4901 tmp = gen_rtx_REG (orig_mode, regno);
4902 else
4904 tmp = gen_rtx_REG (mode, regno);
4905 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4906 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4909 return tmp;
4912 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4913 of this code is to classify each 8bytes of incoming argument by the register
4914 class and assign registers accordingly. */
4916 /* Return the union class of CLASS1 and CLASS2.
4917 See the x86-64 PS ABI for details. */
4919 static enum x86_64_reg_class
4920 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4922 /* Rule #1: If both classes are equal, this is the resulting class. */
4923 if (class1 == class2)
4924 return class1;
4926 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4927 the other class. */
4928 if (class1 == X86_64_NO_CLASS)
4929 return class2;
4930 if (class2 == X86_64_NO_CLASS)
4931 return class1;
4933 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4934 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4935 return X86_64_MEMORY_CLASS;
4937 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4938 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4939 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4940 return X86_64_INTEGERSI_CLASS;
4941 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4942 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4943 return X86_64_INTEGER_CLASS;
4945 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4946 MEMORY is used. */
4947 if (class1 == X86_64_X87_CLASS
4948 || class1 == X86_64_X87UP_CLASS
4949 || class1 == X86_64_COMPLEX_X87_CLASS
4950 || class2 == X86_64_X87_CLASS
4951 || class2 == X86_64_X87UP_CLASS
4952 || class2 == X86_64_COMPLEX_X87_CLASS)
4953 return X86_64_MEMORY_CLASS;
4955 /* Rule #6: Otherwise class SSE is used. */
4956 return X86_64_SSE_CLASS;
4959 /* Classify the argument of type TYPE and mode MODE.
4960 CLASSES will be filled by the register class used to pass each word
4961 of the operand. The number of words is returned. In case the parameter
4962 should be passed in memory, 0 is returned. As a special case for zero
4963 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4965 BIT_OFFSET is used internally for handling records and specifies offset
4966 of the offset in bits modulo 256 to avoid overflow cases.
4968 See the x86-64 PS ABI for details.
4971 static int
4972 classify_argument (enum machine_mode mode, const_tree type,
4973 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4975 HOST_WIDE_INT bytes =
4976 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4977 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4979 /* Variable sized entities are always passed/returned in memory. */
4980 if (bytes < 0)
4981 return 0;
4983 if (mode != VOIDmode
4984 && targetm.calls.must_pass_in_stack (mode, type))
4985 return 0;
4987 if (type && AGGREGATE_TYPE_P (type))
4989 int i;
4990 tree field;
4991 enum x86_64_reg_class subclasses[MAX_CLASSES];
4993 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4994 if (bytes > 32)
4995 return 0;
4997 for (i = 0; i < words; i++)
4998 classes[i] = X86_64_NO_CLASS;
5000 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5001 signalize memory class, so handle it as special case. */
5002 if (!words)
5004 classes[0] = X86_64_NO_CLASS;
5005 return 1;
5008 /* Classify each field of record and merge classes. */
5009 switch (TREE_CODE (type))
5011 case RECORD_TYPE:
5012 /* And now merge the fields of structure. */
5013 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5015 if (TREE_CODE (field) == FIELD_DECL)
5017 int num;
5019 if (TREE_TYPE (field) == error_mark_node)
5020 continue;
5022 /* Bitfields are always classified as integer. Handle them
5023 early, since later code would consider them to be
5024 misaligned integers. */
5025 if (DECL_BIT_FIELD (field))
5027 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5028 i < ((int_bit_position (field) + (bit_offset % 64))
5029 + tree_low_cst (DECL_SIZE (field), 0)
5030 + 63) / 8 / 8; i++)
5031 classes[i] =
5032 merge_classes (X86_64_INTEGER_CLASS,
5033 classes[i]);
5035 else
5037 int pos;
5039 type = TREE_TYPE (field);
5041 /* Flexible array member is ignored. */
5042 if (TYPE_MODE (type) == BLKmode
5043 && TREE_CODE (type) == ARRAY_TYPE
5044 && TYPE_SIZE (type) == NULL_TREE
5045 && TYPE_DOMAIN (type) != NULL_TREE
5046 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5047 == NULL_TREE))
5049 static bool warned;
5051 if (!warned && warn_psabi)
5053 warned = true;
5054 inform (input_location,
5055 "The ABI of passing struct with"
5056 " a flexible array member has"
5057 " changed in GCC 4.4");
5059 continue;
5061 num = classify_argument (TYPE_MODE (type), type,
5062 subclasses,
5063 (int_bit_position (field)
5064 + bit_offset) % 256);
5065 if (!num)
5066 return 0;
5067 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5068 for (i = 0; i < num && (i + pos) < words; i++)
5069 classes[i + pos] =
5070 merge_classes (subclasses[i], classes[i + pos]);
5074 break;
5076 case ARRAY_TYPE:
5077 /* Arrays are handled as small records. */
5079 int num;
5080 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5081 TREE_TYPE (type), subclasses, bit_offset);
5082 if (!num)
5083 return 0;
5085 /* The partial classes are now full classes. */
5086 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5087 subclasses[0] = X86_64_SSE_CLASS;
5088 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5089 && !((bit_offset % 64) == 0 && bytes == 4))
5090 subclasses[0] = X86_64_INTEGER_CLASS;
5092 for (i = 0; i < words; i++)
5093 classes[i] = subclasses[i % num];
5095 break;
5097 case UNION_TYPE:
5098 case QUAL_UNION_TYPE:
5099 /* Unions are similar to RECORD_TYPE but offset is always 0.
5101 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5103 if (TREE_CODE (field) == FIELD_DECL)
5105 int num;
5107 if (TREE_TYPE (field) == error_mark_node)
5108 continue;
5110 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5111 TREE_TYPE (field), subclasses,
5112 bit_offset);
5113 if (!num)
5114 return 0;
5115 for (i = 0; i < num; i++)
5116 classes[i] = merge_classes (subclasses[i], classes[i]);
5119 break;
5121 default:
5122 gcc_unreachable ();
5125 if (words > 2)
5127 /* When size > 16 bytes, if the first one isn't
5128 X86_64_SSE_CLASS or any other ones aren't
5129 X86_64_SSEUP_CLASS, everything should be passed in
5130 memory. */
5131 if (classes[0] != X86_64_SSE_CLASS)
5132 return 0;
5134 for (i = 1; i < words; i++)
5135 if (classes[i] != X86_64_SSEUP_CLASS)
5136 return 0;
5139 /* Final merger cleanup. */
5140 for (i = 0; i < words; i++)
5142 /* If one class is MEMORY, everything should be passed in
5143 memory. */
5144 if (classes[i] == X86_64_MEMORY_CLASS)
5145 return 0;
5147 /* The X86_64_SSEUP_CLASS should be always preceded by
5148 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5149 if (classes[i] == X86_64_SSEUP_CLASS
5150 && classes[i - 1] != X86_64_SSE_CLASS
5151 && classes[i - 1] != X86_64_SSEUP_CLASS)
5153 /* The first one should never be X86_64_SSEUP_CLASS. */
5154 gcc_assert (i != 0);
5155 classes[i] = X86_64_SSE_CLASS;
5158 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5159 everything should be passed in memory. */
5160 if (classes[i] == X86_64_X87UP_CLASS
5161 && (classes[i - 1] != X86_64_X87_CLASS))
5163 static bool warned;
5165 /* The first one should never be X86_64_X87UP_CLASS. */
5166 gcc_assert (i != 0);
5167 if (!warned && warn_psabi)
5169 warned = true;
5170 inform (input_location,
5171 "The ABI of passing union with long double"
5172 " has changed in GCC 4.4");
5174 return 0;
5177 return words;
5180 /* Compute alignment needed. We align all types to natural boundaries with
5181 exception of XFmode that is aligned to 64bits. */
5182 if (mode != VOIDmode && mode != BLKmode)
5184 int mode_alignment = GET_MODE_BITSIZE (mode);
5186 if (mode == XFmode)
5187 mode_alignment = 128;
5188 else if (mode == XCmode)
5189 mode_alignment = 256;
5190 if (COMPLEX_MODE_P (mode))
5191 mode_alignment /= 2;
5192 /* Misaligned fields are always returned in memory. */
5193 if (bit_offset % mode_alignment)
5194 return 0;
5197 /* for V1xx modes, just use the base mode */
5198 if (VECTOR_MODE_P (mode) && mode != V1DImode
5199 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5200 mode = GET_MODE_INNER (mode);
5202 /* Classification of atomic types. */
5203 switch (mode)
5205 case SDmode:
5206 case DDmode:
5207 classes[0] = X86_64_SSE_CLASS;
5208 return 1;
5209 case TDmode:
5210 classes[0] = X86_64_SSE_CLASS;
5211 classes[1] = X86_64_SSEUP_CLASS;
5212 return 2;
5213 case DImode:
5214 case SImode:
5215 case HImode:
5216 case QImode:
5217 case CSImode:
5218 case CHImode:
5219 case CQImode:
5221 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5223 if (size <= 32)
5225 classes[0] = X86_64_INTEGERSI_CLASS;
5226 return 1;
5228 else if (size <= 64)
5230 classes[0] = X86_64_INTEGER_CLASS;
5231 return 1;
5233 else if (size <= 64+32)
5235 classes[0] = X86_64_INTEGER_CLASS;
5236 classes[1] = X86_64_INTEGERSI_CLASS;
5237 return 2;
5239 else if (size <= 64+64)
5241 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5242 return 2;
5244 else
5245 gcc_unreachable ();
5247 case CDImode:
5248 case TImode:
5249 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5250 return 2;
5251 case COImode:
5252 case OImode:
5253 /* OImode shouldn't be used directly. */
5254 gcc_unreachable ();
5255 case CTImode:
5256 return 0;
5257 case SFmode:
5258 if (!(bit_offset % 64))
5259 classes[0] = X86_64_SSESF_CLASS;
5260 else
5261 classes[0] = X86_64_SSE_CLASS;
5262 return 1;
5263 case DFmode:
5264 classes[0] = X86_64_SSEDF_CLASS;
5265 return 1;
5266 case XFmode:
5267 classes[0] = X86_64_X87_CLASS;
5268 classes[1] = X86_64_X87UP_CLASS;
5269 return 2;
5270 case TFmode:
5271 classes[0] = X86_64_SSE_CLASS;
5272 classes[1] = X86_64_SSEUP_CLASS;
5273 return 2;
5274 case SCmode:
5275 classes[0] = X86_64_SSE_CLASS;
5276 if (!(bit_offset % 64))
5277 return 1;
5278 else
5280 static bool warned;
5282 if (!warned && warn_psabi)
5284 warned = true;
5285 inform (input_location,
5286 "The ABI of passing structure with complex float"
5287 " member has changed in GCC 4.4");
5289 classes[1] = X86_64_SSESF_CLASS;
5290 return 2;
5292 case DCmode:
5293 classes[0] = X86_64_SSEDF_CLASS;
5294 classes[1] = X86_64_SSEDF_CLASS;
5295 return 2;
5296 case XCmode:
5297 classes[0] = X86_64_COMPLEX_X87_CLASS;
5298 return 1;
5299 case TCmode:
5300 /* This modes is larger than 16 bytes. */
5301 return 0;
5302 case V8SFmode:
5303 case V8SImode:
5304 case V32QImode:
5305 case V16HImode:
5306 case V4DFmode:
5307 case V4DImode:
5308 classes[0] = X86_64_SSE_CLASS;
5309 classes[1] = X86_64_SSEUP_CLASS;
5310 classes[2] = X86_64_SSEUP_CLASS;
5311 classes[3] = X86_64_SSEUP_CLASS;
5312 return 4;
5313 case V4SFmode:
5314 case V4SImode:
5315 case V16QImode:
5316 case V8HImode:
5317 case V2DFmode:
5318 case V2DImode:
5319 classes[0] = X86_64_SSE_CLASS;
5320 classes[1] = X86_64_SSEUP_CLASS;
5321 return 2;
5322 case V1DImode:
5323 case V2SFmode:
5324 case V2SImode:
5325 case V4HImode:
5326 case V8QImode:
5327 classes[0] = X86_64_SSE_CLASS;
5328 return 1;
5329 case BLKmode:
5330 case VOIDmode:
5331 return 0;
5332 default:
5333 gcc_assert (VECTOR_MODE_P (mode));
5335 if (bytes > 16)
5336 return 0;
5338 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5340 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5341 classes[0] = X86_64_INTEGERSI_CLASS;
5342 else
5343 classes[0] = X86_64_INTEGER_CLASS;
5344 classes[1] = X86_64_INTEGER_CLASS;
5345 return 1 + (bytes > 8);
5349 /* Examine the argument and return set number of register required in each
5350 class. Return 0 iff parameter should be passed in memory. */
5351 static int
5352 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5353 int *int_nregs, int *sse_nregs)
5355 enum x86_64_reg_class regclass[MAX_CLASSES];
5356 int n = classify_argument (mode, type, regclass, 0);
5358 *int_nregs = 0;
5359 *sse_nregs = 0;
5360 if (!n)
5361 return 0;
5362 for (n--; n >= 0; n--)
5363 switch (regclass[n])
5365 case X86_64_INTEGER_CLASS:
5366 case X86_64_INTEGERSI_CLASS:
5367 (*int_nregs)++;
5368 break;
5369 case X86_64_SSE_CLASS:
5370 case X86_64_SSESF_CLASS:
5371 case X86_64_SSEDF_CLASS:
5372 (*sse_nregs)++;
5373 break;
5374 case X86_64_NO_CLASS:
5375 case X86_64_SSEUP_CLASS:
5376 break;
5377 case X86_64_X87_CLASS:
5378 case X86_64_X87UP_CLASS:
5379 if (!in_return)
5380 return 0;
5381 break;
5382 case X86_64_COMPLEX_X87_CLASS:
5383 return in_return ? 2 : 0;
5384 case X86_64_MEMORY_CLASS:
5385 gcc_unreachable ();
5387 return 1;
5390 /* Construct container for the argument used by GCC interface. See
5391 FUNCTION_ARG for the detailed description. */
5393 static rtx
5394 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5395 const_tree type, int in_return, int nintregs, int nsseregs,
5396 const int *intreg, int sse_regno)
5398 /* The following variables hold the static issued_error state. */
5399 static bool issued_sse_arg_error;
5400 static bool issued_sse_ret_error;
5401 static bool issued_x87_ret_error;
5403 enum machine_mode tmpmode;
5404 int bytes =
5405 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5406 enum x86_64_reg_class regclass[MAX_CLASSES];
5407 int n;
5408 int i;
5409 int nexps = 0;
5410 int needed_sseregs, needed_intregs;
5411 rtx exp[MAX_CLASSES];
5412 rtx ret;
5414 n = classify_argument (mode, type, regclass, 0);
5415 if (!n)
5416 return NULL;
5417 if (!examine_argument (mode, type, in_return, &needed_intregs,
5418 &needed_sseregs))
5419 return NULL;
5420 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5421 return NULL;
5423 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5424 some less clueful developer tries to use floating-point anyway. */
5425 if (needed_sseregs && !TARGET_SSE)
5427 if (in_return)
5429 if (!issued_sse_ret_error)
5431 error ("SSE register return with SSE disabled");
5432 issued_sse_ret_error = true;
5435 else if (!issued_sse_arg_error)
5437 error ("SSE register argument with SSE disabled");
5438 issued_sse_arg_error = true;
5440 return NULL;
5443 /* Likewise, error if the ABI requires us to return values in the
5444 x87 registers and the user specified -mno-80387. */
5445 if (!TARGET_80387 && in_return)
5446 for (i = 0; i < n; i++)
5447 if (regclass[i] == X86_64_X87_CLASS
5448 || regclass[i] == X86_64_X87UP_CLASS
5449 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5451 if (!issued_x87_ret_error)
5453 error ("x87 register return with x87 disabled");
5454 issued_x87_ret_error = true;
5456 return NULL;
5459 /* First construct simple cases. Avoid SCmode, since we want to use
5460 single register to pass this type. */
5461 if (n == 1 && mode != SCmode)
5462 switch (regclass[0])
5464 case X86_64_INTEGER_CLASS:
5465 case X86_64_INTEGERSI_CLASS:
5466 return gen_rtx_REG (mode, intreg[0]);
5467 case X86_64_SSE_CLASS:
5468 case X86_64_SSESF_CLASS:
5469 case X86_64_SSEDF_CLASS:
5470 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5471 case X86_64_X87_CLASS:
5472 case X86_64_COMPLEX_X87_CLASS:
5473 return gen_rtx_REG (mode, FIRST_STACK_REG);
5474 case X86_64_NO_CLASS:
5475 /* Zero sized array, struct or class. */
5476 return NULL;
5477 default:
5478 gcc_unreachable ();
5480 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5481 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5482 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5483 if (n == 4
5484 && regclass[0] == X86_64_SSE_CLASS
5485 && regclass[1] == X86_64_SSEUP_CLASS
5486 && regclass[2] == X86_64_SSEUP_CLASS
5487 && regclass[3] == X86_64_SSEUP_CLASS
5488 && mode != BLKmode)
5489 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5491 if (n == 2
5492 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5493 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5494 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5495 && regclass[1] == X86_64_INTEGER_CLASS
5496 && (mode == CDImode || mode == TImode || mode == TFmode)
5497 && intreg[0] + 1 == intreg[1])
5498 return gen_rtx_REG (mode, intreg[0]);
5500 /* Otherwise figure out the entries of the PARALLEL. */
5501 for (i = 0; i < n; i++)
5503 int pos;
5505 switch (regclass[i])
5507 case X86_64_NO_CLASS:
5508 break;
5509 case X86_64_INTEGER_CLASS:
5510 case X86_64_INTEGERSI_CLASS:
5511 /* Merge TImodes on aligned occasions here too. */
5512 if (i * 8 + 8 > bytes)
5513 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5514 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5515 tmpmode = SImode;
5516 else
5517 tmpmode = DImode;
5518 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5519 if (tmpmode == BLKmode)
5520 tmpmode = DImode;
5521 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5522 gen_rtx_REG (tmpmode, *intreg),
5523 GEN_INT (i*8));
5524 intreg++;
5525 break;
5526 case X86_64_SSESF_CLASS:
5527 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5528 gen_rtx_REG (SFmode,
5529 SSE_REGNO (sse_regno)),
5530 GEN_INT (i*8));
5531 sse_regno++;
5532 break;
5533 case X86_64_SSEDF_CLASS:
5534 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5535 gen_rtx_REG (DFmode,
5536 SSE_REGNO (sse_regno)),
5537 GEN_INT (i*8));
5538 sse_regno++;
5539 break;
5540 case X86_64_SSE_CLASS:
5541 pos = i;
5542 switch (n)
5544 case 1:
5545 tmpmode = DImode;
5546 break;
5547 case 2:
5548 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5550 tmpmode = TImode;
5551 i++;
5553 else
5554 tmpmode = DImode;
5555 break;
5556 case 4:
5557 gcc_assert (i == 0
5558 && regclass[1] == X86_64_SSEUP_CLASS
5559 && regclass[2] == X86_64_SSEUP_CLASS
5560 && regclass[3] == X86_64_SSEUP_CLASS);
5561 tmpmode = OImode;
5562 i += 3;
5563 break;
5564 default:
5565 gcc_unreachable ();
5567 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5568 gen_rtx_REG (tmpmode,
5569 SSE_REGNO (sse_regno)),
5570 GEN_INT (pos*8));
5571 sse_regno++;
5572 break;
5573 default:
5574 gcc_unreachable ();
5578 /* Empty aligned struct, union or class. */
5579 if (nexps == 0)
5580 return NULL;
5582 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5583 for (i = 0; i < nexps; i++)
5584 XVECEXP (ret, 0, i) = exp [i];
5585 return ret;
5588 /* Update the data in CUM to advance over an argument of mode MODE
5589 and data type TYPE. (TYPE is null for libcalls where that information
5590 may not be available.) */
5592 static void
5593 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5594 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5596 switch (mode)
5598 default:
5599 break;
5601 case BLKmode:
5602 if (bytes < 0)
5603 break;
5604 /* FALLTHRU */
5606 case DImode:
5607 case SImode:
5608 case HImode:
5609 case QImode:
5610 cum->words += words;
5611 cum->nregs -= words;
5612 cum->regno += words;
5614 if (cum->nregs <= 0)
5616 cum->nregs = 0;
5617 cum->regno = 0;
5619 break;
5621 case OImode:
5622 /* OImode shouldn't be used directly. */
5623 gcc_unreachable ();
5625 case DFmode:
5626 if (cum->float_in_sse < 2)
5627 break;
5628 case SFmode:
5629 if (cum->float_in_sse < 1)
5630 break;
5631 /* FALLTHRU */
5633 case V8SFmode:
5634 case V8SImode:
5635 case V32QImode:
5636 case V16HImode:
5637 case V4DFmode:
5638 case V4DImode:
5639 case TImode:
5640 case V16QImode:
5641 case V8HImode:
5642 case V4SImode:
5643 case V2DImode:
5644 case V4SFmode:
5645 case V2DFmode:
5646 if (!type || !AGGREGATE_TYPE_P (type))
5648 cum->sse_words += words;
5649 cum->sse_nregs -= 1;
5650 cum->sse_regno += 1;
5651 if (cum->sse_nregs <= 0)
5653 cum->sse_nregs = 0;
5654 cum->sse_regno = 0;
5657 break;
5659 case V8QImode:
5660 case V4HImode:
5661 case V2SImode:
5662 case V2SFmode:
5663 case V1DImode:
5664 if (!type || !AGGREGATE_TYPE_P (type))
5666 cum->mmx_words += words;
5667 cum->mmx_nregs -= 1;
5668 cum->mmx_regno += 1;
5669 if (cum->mmx_nregs <= 0)
5671 cum->mmx_nregs = 0;
5672 cum->mmx_regno = 0;
5675 break;
5679 static void
5680 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5681 tree type, HOST_WIDE_INT words, int named)
5683 int int_nregs, sse_nregs;
5685 /* Unnamed 256bit vector mode parameters are passed on stack. */
5686 if (!named && VALID_AVX256_REG_MODE (mode))
5687 return;
5689 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5690 cum->words += words;
5691 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5693 cum->nregs -= int_nregs;
5694 cum->sse_nregs -= sse_nregs;
5695 cum->regno += int_nregs;
5696 cum->sse_regno += sse_nregs;
5698 else
5699 cum->words += words;
5702 static void
5703 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5704 HOST_WIDE_INT words)
5706 /* Otherwise, this should be passed indirect. */
5707 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5709 cum->words += words;
5710 if (cum->nregs > 0)
5712 cum->nregs -= 1;
5713 cum->regno += 1;
5717 void
5718 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5719 tree type, int named)
5721 HOST_WIDE_INT bytes, words;
5723 if (mode == BLKmode)
5724 bytes = int_size_in_bytes (type);
5725 else
5726 bytes = GET_MODE_SIZE (mode);
5727 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5729 if (type)
5730 mode = type_natural_mode (type, NULL);
5732 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5733 function_arg_advance_ms_64 (cum, bytes, words);
5734 else if (TARGET_64BIT)
5735 function_arg_advance_64 (cum, mode, type, words, named);
5736 else
5737 function_arg_advance_32 (cum, mode, type, bytes, words);
5740 /* Define where to put the arguments to a function.
5741 Value is zero to push the argument on the stack,
5742 or a hard register in which to store the argument.
5744 MODE is the argument's machine mode.
5745 TYPE is the data type of the argument (as a tree).
5746 This is null for libcalls where that information may
5747 not be available.
5748 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5749 the preceding args and about the function being called.
5750 NAMED is nonzero if this argument is a named parameter
5751 (otherwise it is an extra parameter matching an ellipsis). */
5753 static rtx
5754 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5755 enum machine_mode orig_mode, tree type,
5756 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5758 static bool warnedsse, warnedmmx;
5760 /* Avoid the AL settings for the Unix64 ABI. */
5761 if (mode == VOIDmode)
5762 return constm1_rtx;
5764 switch (mode)
5766 default:
5767 break;
5769 case BLKmode:
5770 if (bytes < 0)
5771 break;
5772 /* FALLTHRU */
5773 case DImode:
5774 case SImode:
5775 case HImode:
5776 case QImode:
5777 if (words <= cum->nregs)
5779 int regno = cum->regno;
5781 /* Fastcall allocates the first two DWORD (SImode) or
5782 smaller arguments to ECX and EDX if it isn't an
5783 aggregate type . */
5784 if (cum->fastcall)
5786 if (mode == BLKmode
5787 || mode == DImode
5788 || (type && AGGREGATE_TYPE_P (type)))
5789 break;
5791 /* ECX not EAX is the first allocated register. */
5792 if (regno == AX_REG)
5793 regno = CX_REG;
5795 return gen_rtx_REG (mode, regno);
5797 break;
5799 case DFmode:
5800 if (cum->float_in_sse < 2)
5801 break;
5802 case SFmode:
5803 if (cum->float_in_sse < 1)
5804 break;
5805 /* FALLTHRU */
5806 case TImode:
5807 /* In 32bit, we pass TImode in xmm registers. */
5808 case V16QImode:
5809 case V8HImode:
5810 case V4SImode:
5811 case V2DImode:
5812 case V4SFmode:
5813 case V2DFmode:
5814 if (!type || !AGGREGATE_TYPE_P (type))
5816 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5818 warnedsse = true;
5819 warning (0, "SSE vector argument without SSE enabled "
5820 "changes the ABI");
5822 if (cum->sse_nregs)
5823 return gen_reg_or_parallel (mode, orig_mode,
5824 cum->sse_regno + FIRST_SSE_REG);
5826 break;
5828 case OImode:
5829 /* OImode shouldn't be used directly. */
5830 gcc_unreachable ();
5832 case V8SFmode:
5833 case V8SImode:
5834 case V32QImode:
5835 case V16HImode:
5836 case V4DFmode:
5837 case V4DImode:
5838 if (!type || !AGGREGATE_TYPE_P (type))
5840 if (cum->sse_nregs)
5841 return gen_reg_or_parallel (mode, orig_mode,
5842 cum->sse_regno + FIRST_SSE_REG);
5844 break;
5846 case V8QImode:
5847 case V4HImode:
5848 case V2SImode:
5849 case V2SFmode:
5850 case V1DImode:
5851 if (!type || !AGGREGATE_TYPE_P (type))
5853 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5855 warnedmmx = true;
5856 warning (0, "MMX vector argument without MMX enabled "
5857 "changes the ABI");
5859 if (cum->mmx_nregs)
5860 return gen_reg_or_parallel (mode, orig_mode,
5861 cum->mmx_regno + FIRST_MMX_REG);
5863 break;
5866 return NULL_RTX;
5869 static rtx
5870 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5871 enum machine_mode orig_mode, tree type, int named)
5873 /* Handle a hidden AL argument containing number of registers
5874 for varargs x86-64 functions. */
5875 if (mode == VOIDmode)
5876 return GEN_INT (cum->maybe_vaarg
5877 ? (cum->sse_nregs < 0
5878 ? (cum->call_abi == ix86_abi
5879 ? SSE_REGPARM_MAX
5880 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5881 : X64_SSE_REGPARM_MAX))
5882 : cum->sse_regno)
5883 : -1);
5885 switch (mode)
5887 default:
5888 break;
5890 case V8SFmode:
5891 case V8SImode:
5892 case V32QImode:
5893 case V16HImode:
5894 case V4DFmode:
5895 case V4DImode:
5896 /* Unnamed 256bit vector mode parameters are passed on stack. */
5897 if (!named)
5898 return NULL;
5899 break;
5902 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5903 cum->sse_nregs,
5904 &x86_64_int_parameter_registers [cum->regno],
5905 cum->sse_regno);
5908 static rtx
5909 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5910 enum machine_mode orig_mode, int named,
5911 HOST_WIDE_INT bytes)
5913 unsigned int regno;
5915 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5916 We use value of -2 to specify that current function call is MSABI. */
5917 if (mode == VOIDmode)
5918 return GEN_INT (-2);
5920 /* If we've run out of registers, it goes on the stack. */
5921 if (cum->nregs == 0)
5922 return NULL_RTX;
5924 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5926 /* Only floating point modes are passed in anything but integer regs. */
5927 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5929 if (named)
5930 regno = cum->regno + FIRST_SSE_REG;
5931 else
5933 rtx t1, t2;
5935 /* Unnamed floating parameters are passed in both the
5936 SSE and integer registers. */
5937 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5938 t2 = gen_rtx_REG (mode, regno);
5939 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5940 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5941 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5944 /* Handle aggregated types passed in register. */
5945 if (orig_mode == BLKmode)
5947 if (bytes > 0 && bytes <= 8)
5948 mode = (bytes > 4 ? DImode : SImode);
5949 if (mode == BLKmode)
5950 mode = DImode;
5953 return gen_reg_or_parallel (mode, orig_mode, regno);
5957 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5958 tree type, int named)
5960 enum machine_mode mode = omode;
5961 HOST_WIDE_INT bytes, words;
5963 if (mode == BLKmode)
5964 bytes = int_size_in_bytes (type);
5965 else
5966 bytes = GET_MODE_SIZE (mode);
5967 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5969 /* To simplify the code below, represent vector types with a vector mode
5970 even if MMX/SSE are not active. */
5971 if (type && TREE_CODE (type) == VECTOR_TYPE)
5972 mode = type_natural_mode (type, cum);
5974 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5975 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5976 else if (TARGET_64BIT)
5977 return function_arg_64 (cum, mode, omode, type, named);
5978 else
5979 return function_arg_32 (cum, mode, omode, type, bytes, words);
5982 /* A C expression that indicates when an argument must be passed by
5983 reference. If nonzero for an argument, a copy of that argument is
5984 made in memory and a pointer to the argument is passed instead of
5985 the argument itself. The pointer is passed in whatever way is
5986 appropriate for passing a pointer to that type. */
5988 static bool
5989 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5990 enum machine_mode mode ATTRIBUTE_UNUSED,
5991 const_tree type, bool named ATTRIBUTE_UNUSED)
5993 /* See Windows x64 Software Convention. */
5994 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5996 int msize = (int) GET_MODE_SIZE (mode);
5997 if (type)
5999 /* Arrays are passed by reference. */
6000 if (TREE_CODE (type) == ARRAY_TYPE)
6001 return true;
6003 if (AGGREGATE_TYPE_P (type))
6005 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6006 are passed by reference. */
6007 msize = int_size_in_bytes (type);
6011 /* __m128 is passed by reference. */
6012 switch (msize) {
6013 case 1: case 2: case 4: case 8:
6014 break;
6015 default:
6016 return true;
6019 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6020 return 1;
6022 return 0;
6025 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6026 ABI. */
6027 static bool
6028 contains_aligned_value_p (tree type)
6030 enum machine_mode mode = TYPE_MODE (type);
6031 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6032 || mode == TDmode
6033 || mode == TFmode
6034 || mode == TCmode)
6035 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6036 return true;
6037 if (TYPE_ALIGN (type) < 128)
6038 return false;
6040 if (AGGREGATE_TYPE_P (type))
6042 /* Walk the aggregates recursively. */
6043 switch (TREE_CODE (type))
6045 case RECORD_TYPE:
6046 case UNION_TYPE:
6047 case QUAL_UNION_TYPE:
6049 tree field;
6051 /* Walk all the structure fields. */
6052 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6054 if (TREE_CODE (field) == FIELD_DECL
6055 && contains_aligned_value_p (TREE_TYPE (field)))
6056 return true;
6058 break;
6061 case ARRAY_TYPE:
6062 /* Just for use if some languages passes arrays by value. */
6063 if (contains_aligned_value_p (TREE_TYPE (type)))
6064 return true;
6065 break;
6067 default:
6068 gcc_unreachable ();
6071 return false;
6074 /* Gives the alignment boundary, in bits, of an argument with the
6075 specified mode and type. */
6078 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6080 int align;
6081 if (type)
6083 /* Since canonical type is used for call, we convert it to
6084 canonical type if needed. */
6085 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6086 type = TYPE_CANONICAL (type);
6087 align = TYPE_ALIGN (type);
6089 else
6090 align = GET_MODE_ALIGNMENT (mode);
6091 if (align < PARM_BOUNDARY)
6092 align = PARM_BOUNDARY;
6093 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6094 natural boundaries. */
6095 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6097 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6098 make an exception for SSE modes since these require 128bit
6099 alignment.
6101 The handling here differs from field_alignment. ICC aligns MMX
6102 arguments to 4 byte boundaries, while structure fields are aligned
6103 to 8 byte boundaries. */
6104 if (!type)
6106 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6107 align = PARM_BOUNDARY;
6109 else
6111 if (!contains_aligned_value_p (type))
6112 align = PARM_BOUNDARY;
6115 if (align > BIGGEST_ALIGNMENT)
6116 align = BIGGEST_ALIGNMENT;
6117 return align;
6120 /* Return true if N is a possible register number of function value. */
6122 bool
6123 ix86_function_value_regno_p (int regno)
6125 switch (regno)
6127 case 0:
6128 return true;
6130 case FIRST_FLOAT_REG:
6131 /* TODO: The function should depend on current function ABI but
6132 builtins.c would need updating then. Therefore we use the
6133 default ABI. */
6134 if (TARGET_64BIT && ix86_abi == MS_ABI)
6135 return false;
6136 return TARGET_FLOAT_RETURNS_IN_80387;
6138 case FIRST_SSE_REG:
6139 return TARGET_SSE;
6141 case FIRST_MMX_REG:
6142 if (TARGET_MACHO || TARGET_64BIT)
6143 return false;
6144 return TARGET_MMX;
6147 return false;
6150 /* Define how to find the value returned by a function.
6151 VALTYPE is the data type of the value (as a tree).
6152 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6153 otherwise, FUNC is 0. */
6155 static rtx
6156 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6157 const_tree fntype, const_tree fn)
6159 unsigned int regno;
6161 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6162 we normally prevent this case when mmx is not available. However
6163 some ABIs may require the result to be returned like DImode. */
6164 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6165 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6167 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6168 we prevent this case when sse is not available. However some ABIs
6169 may require the result to be returned like integer TImode. */
6170 else if (mode == TImode
6171 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6172 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6174 /* 32-byte vector modes in %ymm0. */
6175 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6176 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6178 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6179 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6180 regno = FIRST_FLOAT_REG;
6181 else
6182 /* Most things go in %eax. */
6183 regno = AX_REG;
6185 /* Override FP return register with %xmm0 for local functions when
6186 SSE math is enabled or for functions with sseregparm attribute. */
6187 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6189 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6190 if ((sse_level >= 1 && mode == SFmode)
6191 || (sse_level == 2 && mode == DFmode))
6192 regno = FIRST_SSE_REG;
6195 /* OImode shouldn't be used directly. */
6196 gcc_assert (mode != OImode);
6198 return gen_rtx_REG (orig_mode, regno);
6201 static rtx
6202 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6203 const_tree valtype)
6205 rtx ret;
6207 /* Handle libcalls, which don't provide a type node. */
6208 if (valtype == NULL)
6210 switch (mode)
6212 case SFmode:
6213 case SCmode:
6214 case DFmode:
6215 case DCmode:
6216 case TFmode:
6217 case SDmode:
6218 case DDmode:
6219 case TDmode:
6220 return gen_rtx_REG (mode, FIRST_SSE_REG);
6221 case XFmode:
6222 case XCmode:
6223 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6224 case TCmode:
6225 return NULL;
6226 default:
6227 return gen_rtx_REG (mode, AX_REG);
6231 ret = construct_container (mode, orig_mode, valtype, 1,
6232 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6233 x86_64_int_return_registers, 0);
6235 /* For zero sized structures, construct_container returns NULL, but we
6236 need to keep rest of compiler happy by returning meaningful value. */
6237 if (!ret)
6238 ret = gen_rtx_REG (orig_mode, AX_REG);
6240 return ret;
6243 static rtx
6244 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6246 unsigned int regno = AX_REG;
6248 if (TARGET_SSE)
6250 switch (GET_MODE_SIZE (mode))
6252 case 16:
6253 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6254 && !COMPLEX_MODE_P (mode))
6255 regno = FIRST_SSE_REG;
6256 break;
6257 case 8:
6258 case 4:
6259 if (mode == SFmode || mode == DFmode)
6260 regno = FIRST_SSE_REG;
6261 break;
6262 default:
6263 break;
6266 return gen_rtx_REG (orig_mode, regno);
6269 static rtx
6270 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6271 enum machine_mode orig_mode, enum machine_mode mode)
6273 const_tree fn, fntype;
6275 fn = NULL_TREE;
6276 if (fntype_or_decl && DECL_P (fntype_or_decl))
6277 fn = fntype_or_decl;
6278 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6280 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6281 return function_value_ms_64 (orig_mode, mode);
6282 else if (TARGET_64BIT)
6283 return function_value_64 (orig_mode, mode, valtype);
6284 else
6285 return function_value_32 (orig_mode, mode, fntype, fn);
6288 static rtx
6289 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6290 bool outgoing ATTRIBUTE_UNUSED)
6292 enum machine_mode mode, orig_mode;
6294 orig_mode = TYPE_MODE (valtype);
6295 mode = type_natural_mode (valtype, NULL);
6296 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6300 ix86_libcall_value (enum machine_mode mode)
6302 return ix86_function_value_1 (NULL, NULL, mode, mode);
6305 /* Return true iff type is returned in memory. */
6307 static int ATTRIBUTE_UNUSED
6308 return_in_memory_32 (const_tree type, enum machine_mode mode)
6310 HOST_WIDE_INT size;
6312 if (mode == BLKmode)
6313 return 1;
6315 size = int_size_in_bytes (type);
6317 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6318 return 0;
6320 if (VECTOR_MODE_P (mode) || mode == TImode)
6322 /* User-created vectors small enough to fit in EAX. */
6323 if (size < 8)
6324 return 0;
6326 /* MMX/3dNow values are returned in MM0,
6327 except when it doesn't exits. */
6328 if (size == 8)
6329 return (TARGET_MMX ? 0 : 1);
6331 /* SSE values are returned in XMM0, except when it doesn't exist. */
6332 if (size == 16)
6333 return (TARGET_SSE ? 0 : 1);
6335 /* AVX values are returned in YMM0, except when it doesn't exist. */
6336 if (size == 32)
6337 return TARGET_AVX ? 0 : 1;
6340 if (mode == XFmode)
6341 return 0;
6343 if (size > 12)
6344 return 1;
6346 /* OImode shouldn't be used directly. */
6347 gcc_assert (mode != OImode);
6349 return 0;
6352 static int ATTRIBUTE_UNUSED
6353 return_in_memory_64 (const_tree type, enum machine_mode mode)
6355 int needed_intregs, needed_sseregs;
6356 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6359 static int ATTRIBUTE_UNUSED
6360 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6362 HOST_WIDE_INT size = int_size_in_bytes (type);
6364 /* __m128 is returned in xmm0. */
6365 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6366 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6367 return 0;
6369 /* Otherwise, the size must be exactly in [1248]. */
6370 return (size != 1 && size != 2 && size != 4 && size != 8);
6373 static bool
6374 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6376 #ifdef SUBTARGET_RETURN_IN_MEMORY
6377 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6378 #else
6379 const enum machine_mode mode = type_natural_mode (type, NULL);
6381 if (TARGET_64BIT)
6383 if (ix86_function_type_abi (fntype) == MS_ABI)
6384 return return_in_memory_ms_64 (type, mode);
6385 else
6386 return return_in_memory_64 (type, mode);
6388 else
6389 return return_in_memory_32 (type, mode);
6390 #endif
6393 /* Return false iff TYPE is returned in memory. This version is used
6394 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6395 but differs notably in that when MMX is available, 8-byte vectors
6396 are returned in memory, rather than in MMX registers. */
6398 bool
6399 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6401 int size;
6402 enum machine_mode mode = type_natural_mode (type, NULL);
6404 if (TARGET_64BIT)
6405 return return_in_memory_64 (type, mode);
6407 if (mode == BLKmode)
6408 return 1;
6410 size = int_size_in_bytes (type);
6412 if (VECTOR_MODE_P (mode))
6414 /* Return in memory only if MMX registers *are* available. This
6415 seems backwards, but it is consistent with the existing
6416 Solaris x86 ABI. */
6417 if (size == 8)
6418 return TARGET_MMX;
6419 if (size == 16)
6420 return !TARGET_SSE;
6422 else if (mode == TImode)
6423 return !TARGET_SSE;
6424 else if (mode == XFmode)
6425 return 0;
6427 return size > 12;
6430 /* When returning SSE vector types, we have a choice of either
6431 (1) being abi incompatible with a -march switch, or
6432 (2) generating an error.
6433 Given no good solution, I think the safest thing is one warning.
6434 The user won't be able to use -Werror, but....
6436 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6437 called in response to actually generating a caller or callee that
6438 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6439 via aggregate_value_p for general type probing from tree-ssa. */
6441 static rtx
6442 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6444 static bool warnedsse, warnedmmx;
6446 if (!TARGET_64BIT && type)
6448 /* Look at the return type of the function, not the function type. */
6449 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6451 if (!TARGET_SSE && !warnedsse)
6453 if (mode == TImode
6454 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6456 warnedsse = true;
6457 warning (0, "SSE vector return without SSE enabled "
6458 "changes the ABI");
6462 if (!TARGET_MMX && !warnedmmx)
6464 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6466 warnedmmx = true;
6467 warning (0, "MMX vector return without MMX enabled "
6468 "changes the ABI");
6473 return NULL;
6477 /* Create the va_list data type. */
6479 /* Returns the calling convention specific va_list date type.
6480 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6482 static tree
6483 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6485 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6487 /* For i386 we use plain pointer to argument area. */
6488 if (!TARGET_64BIT || abi == MS_ABI)
6489 return build_pointer_type (char_type_node);
6491 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6492 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6494 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6495 unsigned_type_node);
6496 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6497 unsigned_type_node);
6498 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6499 ptr_type_node);
6500 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6501 ptr_type_node);
6503 va_list_gpr_counter_field = f_gpr;
6504 va_list_fpr_counter_field = f_fpr;
6506 DECL_FIELD_CONTEXT (f_gpr) = record;
6507 DECL_FIELD_CONTEXT (f_fpr) = record;
6508 DECL_FIELD_CONTEXT (f_ovf) = record;
6509 DECL_FIELD_CONTEXT (f_sav) = record;
6511 TREE_CHAIN (record) = type_decl;
6512 TYPE_NAME (record) = type_decl;
6513 TYPE_FIELDS (record) = f_gpr;
6514 TREE_CHAIN (f_gpr) = f_fpr;
6515 TREE_CHAIN (f_fpr) = f_ovf;
6516 TREE_CHAIN (f_ovf) = f_sav;
6518 layout_type (record);
6520 /* The correct type is an array type of one element. */
6521 return build_array_type (record, build_index_type (size_zero_node));
6524 /* Setup the builtin va_list data type and for 64-bit the additional
6525 calling convention specific va_list data types. */
6527 static tree
6528 ix86_build_builtin_va_list (void)
6530 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6532 /* Initialize abi specific va_list builtin types. */
6533 if (TARGET_64BIT)
6535 tree t;
6536 if (ix86_abi == MS_ABI)
6538 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6539 if (TREE_CODE (t) != RECORD_TYPE)
6540 t = build_variant_type_copy (t);
6541 sysv_va_list_type_node = t;
6543 else
6545 t = ret;
6546 if (TREE_CODE (t) != RECORD_TYPE)
6547 t = build_variant_type_copy (t);
6548 sysv_va_list_type_node = t;
6550 if (ix86_abi != MS_ABI)
6552 t = ix86_build_builtin_va_list_abi (MS_ABI);
6553 if (TREE_CODE (t) != RECORD_TYPE)
6554 t = build_variant_type_copy (t);
6555 ms_va_list_type_node = t;
6557 else
6559 t = ret;
6560 if (TREE_CODE (t) != RECORD_TYPE)
6561 t = build_variant_type_copy (t);
6562 ms_va_list_type_node = t;
6566 return ret;
6569 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6571 static void
6572 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6574 rtx save_area, mem;
6575 rtx label;
6576 rtx label_ref;
6577 rtx tmp_reg;
6578 rtx nsse_reg;
6579 alias_set_type set;
6580 int i;
6581 int regparm = ix86_regparm;
6583 if (cum->call_abi != ix86_abi)
6584 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6586 /* GPR size of varargs save area. */
6587 if (cfun->va_list_gpr_size)
6588 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6589 else
6590 ix86_varargs_gpr_size = 0;
6592 /* FPR size of varargs save area. We don't need it if we don't pass
6593 anything in SSE registers. */
6594 if (cum->sse_nregs && cfun->va_list_fpr_size)
6595 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6596 else
6597 ix86_varargs_fpr_size = 0;
6599 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6600 return;
6602 save_area = frame_pointer_rtx;
6603 set = get_varargs_alias_set ();
6605 for (i = cum->regno;
6606 i < regparm
6607 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6608 i++)
6610 mem = gen_rtx_MEM (Pmode,
6611 plus_constant (save_area, i * UNITS_PER_WORD));
6612 MEM_NOTRAP_P (mem) = 1;
6613 set_mem_alias_set (mem, set);
6614 emit_move_insn (mem, gen_rtx_REG (Pmode,
6615 x86_64_int_parameter_registers[i]));
6618 if (ix86_varargs_fpr_size)
6620 /* Now emit code to save SSE registers. The AX parameter contains number
6621 of SSE parameter registers used to call this function. We use
6622 sse_prologue_save insn template that produces computed jump across
6623 SSE saves. We need some preparation work to get this working. */
6625 label = gen_label_rtx ();
6626 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6628 /* Compute address to jump to :
6629 label - eax*4 + nnamed_sse_arguments*4 Or
6630 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6631 tmp_reg = gen_reg_rtx (Pmode);
6632 nsse_reg = gen_reg_rtx (Pmode);
6633 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6634 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6635 gen_rtx_MULT (Pmode, nsse_reg,
6636 GEN_INT (4))));
6638 /* vmovaps is one byte longer than movaps. */
6639 if (TARGET_AVX)
6640 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6641 gen_rtx_PLUS (Pmode, tmp_reg,
6642 nsse_reg)));
6644 if (cum->sse_regno)
6645 emit_move_insn
6646 (nsse_reg,
6647 gen_rtx_CONST (DImode,
6648 gen_rtx_PLUS (DImode,
6649 label_ref,
6650 GEN_INT (cum->sse_regno
6651 * (TARGET_AVX ? 5 : 4)))));
6652 else
6653 emit_move_insn (nsse_reg, label_ref);
6654 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6656 /* Compute address of memory block we save into. We always use pointer
6657 pointing 127 bytes after first byte to store - this is needed to keep
6658 instruction size limited by 4 bytes (5 bytes for AVX) with one
6659 byte displacement. */
6660 tmp_reg = gen_reg_rtx (Pmode);
6661 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6662 plus_constant (save_area,
6663 ix86_varargs_gpr_size + 127)));
6664 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6665 MEM_NOTRAP_P (mem) = 1;
6666 set_mem_alias_set (mem, set);
6667 set_mem_align (mem, BITS_PER_WORD);
6669 /* And finally do the dirty job! */
6670 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6671 GEN_INT (cum->sse_regno), label));
6675 static void
6676 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6678 alias_set_type set = get_varargs_alias_set ();
6679 int i;
6681 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6683 rtx reg, mem;
6685 mem = gen_rtx_MEM (Pmode,
6686 plus_constant (virtual_incoming_args_rtx,
6687 i * UNITS_PER_WORD));
6688 MEM_NOTRAP_P (mem) = 1;
6689 set_mem_alias_set (mem, set);
6691 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6692 emit_move_insn (mem, reg);
6696 static void
6697 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6698 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6699 int no_rtl)
6701 CUMULATIVE_ARGS next_cum;
6702 tree fntype;
6704 /* This argument doesn't appear to be used anymore. Which is good,
6705 because the old code here didn't suppress rtl generation. */
6706 gcc_assert (!no_rtl);
6708 if (!TARGET_64BIT)
6709 return;
6711 fntype = TREE_TYPE (current_function_decl);
6713 /* For varargs, we do not want to skip the dummy va_dcl argument.
6714 For stdargs, we do want to skip the last named argument. */
6715 next_cum = *cum;
6716 if (stdarg_p (fntype))
6717 function_arg_advance (&next_cum, mode, type, 1);
6719 if (cum->call_abi == MS_ABI)
6720 setup_incoming_varargs_ms_64 (&next_cum);
6721 else
6722 setup_incoming_varargs_64 (&next_cum);
6725 /* Checks if TYPE is of kind va_list char *. */
6727 static bool
6728 is_va_list_char_pointer (tree type)
6730 tree canonic;
6732 /* For 32-bit it is always true. */
6733 if (!TARGET_64BIT)
6734 return true;
6735 canonic = ix86_canonical_va_list_type (type);
6736 return (canonic == ms_va_list_type_node
6737 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6740 /* Implement va_start. */
6742 static void
6743 ix86_va_start (tree valist, rtx nextarg)
6745 HOST_WIDE_INT words, n_gpr, n_fpr;
6746 tree f_gpr, f_fpr, f_ovf, f_sav;
6747 tree gpr, fpr, ovf, sav, t;
6748 tree type;
6750 /* Only 64bit target needs something special. */
6751 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6753 std_expand_builtin_va_start (valist, nextarg);
6754 return;
6757 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6758 f_fpr = TREE_CHAIN (f_gpr);
6759 f_ovf = TREE_CHAIN (f_fpr);
6760 f_sav = TREE_CHAIN (f_ovf);
6762 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6763 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6764 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6765 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6766 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6768 /* Count number of gp and fp argument registers used. */
6769 words = crtl->args.info.words;
6770 n_gpr = crtl->args.info.regno;
6771 n_fpr = crtl->args.info.sse_regno;
6773 if (cfun->va_list_gpr_size)
6775 type = TREE_TYPE (gpr);
6776 t = build2 (MODIFY_EXPR, type,
6777 gpr, build_int_cst (type, n_gpr * 8));
6778 TREE_SIDE_EFFECTS (t) = 1;
6779 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6782 if (TARGET_SSE && cfun->va_list_fpr_size)
6784 type = TREE_TYPE (fpr);
6785 t = build2 (MODIFY_EXPR, type, fpr,
6786 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6787 TREE_SIDE_EFFECTS (t) = 1;
6788 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6791 /* Find the overflow area. */
6792 type = TREE_TYPE (ovf);
6793 t = make_tree (type, crtl->args.internal_arg_pointer);
6794 if (words != 0)
6795 t = build2 (POINTER_PLUS_EXPR, type, t,
6796 size_int (words * UNITS_PER_WORD));
6797 t = build2 (MODIFY_EXPR, type, ovf, t);
6798 TREE_SIDE_EFFECTS (t) = 1;
6799 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6801 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6803 /* Find the register save area.
6804 Prologue of the function save it right above stack frame. */
6805 type = TREE_TYPE (sav);
6806 t = make_tree (type, frame_pointer_rtx);
6807 if (!ix86_varargs_gpr_size)
6808 t = build2 (POINTER_PLUS_EXPR, type, t,
6809 size_int (-8 * X86_64_REGPARM_MAX));
6810 t = build2 (MODIFY_EXPR, type, sav, t);
6811 TREE_SIDE_EFFECTS (t) = 1;
6812 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6816 /* Implement va_arg. */
6818 static tree
6819 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6820 gimple_seq *post_p)
6822 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6823 tree f_gpr, f_fpr, f_ovf, f_sav;
6824 tree gpr, fpr, ovf, sav, t;
6825 int size, rsize;
6826 tree lab_false, lab_over = NULL_TREE;
6827 tree addr, t2;
6828 rtx container;
6829 int indirect_p = 0;
6830 tree ptrtype;
6831 enum machine_mode nat_mode;
6832 int arg_boundary;
6834 /* Only 64bit target needs something special. */
6835 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6836 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6838 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6839 f_fpr = TREE_CHAIN (f_gpr);
6840 f_ovf = TREE_CHAIN (f_fpr);
6841 f_sav = TREE_CHAIN (f_ovf);
6843 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6844 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6845 valist = build_va_arg_indirect_ref (valist);
6846 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6847 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6848 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6850 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6851 if (indirect_p)
6852 type = build_pointer_type (type);
6853 size = int_size_in_bytes (type);
6854 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6856 nat_mode = type_natural_mode (type, NULL);
6857 switch (nat_mode)
6859 case V8SFmode:
6860 case V8SImode:
6861 case V32QImode:
6862 case V16HImode:
6863 case V4DFmode:
6864 case V4DImode:
6865 /* Unnamed 256bit vector mode parameters are passed on stack. */
6866 if (ix86_cfun_abi () == SYSV_ABI)
6868 container = NULL;
6869 break;
6872 default:
6873 container = construct_container (nat_mode, TYPE_MODE (type),
6874 type, 0, X86_64_REGPARM_MAX,
6875 X86_64_SSE_REGPARM_MAX, intreg,
6877 break;
6880 /* Pull the value out of the saved registers. */
6882 addr = create_tmp_var (ptr_type_node, "addr");
6883 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6885 if (container)
6887 int needed_intregs, needed_sseregs;
6888 bool need_temp;
6889 tree int_addr, sse_addr;
6891 lab_false = create_artificial_label ();
6892 lab_over = create_artificial_label ();
6894 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6896 need_temp = (!REG_P (container)
6897 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6898 || TYPE_ALIGN (type) > 128));
6900 /* In case we are passing structure, verify that it is consecutive block
6901 on the register save area. If not we need to do moves. */
6902 if (!need_temp && !REG_P (container))
6904 /* Verify that all registers are strictly consecutive */
6905 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6907 int i;
6909 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6911 rtx slot = XVECEXP (container, 0, i);
6912 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6913 || INTVAL (XEXP (slot, 1)) != i * 16)
6914 need_temp = 1;
6917 else
6919 int i;
6921 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6923 rtx slot = XVECEXP (container, 0, i);
6924 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6925 || INTVAL (XEXP (slot, 1)) != i * 8)
6926 need_temp = 1;
6930 if (!need_temp)
6932 int_addr = addr;
6933 sse_addr = addr;
6935 else
6937 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6938 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6939 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6940 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6943 /* First ensure that we fit completely in registers. */
6944 if (needed_intregs)
6946 t = build_int_cst (TREE_TYPE (gpr),
6947 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6948 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6949 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6950 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6951 gimplify_and_add (t, pre_p);
6953 if (needed_sseregs)
6955 t = build_int_cst (TREE_TYPE (fpr),
6956 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6957 + X86_64_REGPARM_MAX * 8);
6958 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6959 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6960 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6961 gimplify_and_add (t, pre_p);
6964 /* Compute index to start of area used for integer regs. */
6965 if (needed_intregs)
6967 /* int_addr = gpr + sav; */
6968 t = fold_convert (sizetype, gpr);
6969 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6970 gimplify_assign (int_addr, t, pre_p);
6972 if (needed_sseregs)
6974 /* sse_addr = fpr + sav; */
6975 t = fold_convert (sizetype, fpr);
6976 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6977 gimplify_assign (sse_addr, t, pre_p);
6979 if (need_temp)
6981 int i;
6982 tree temp = create_tmp_var (type, "va_arg_tmp");
6984 /* addr = &temp; */
6985 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6986 gimplify_assign (addr, t, pre_p);
6988 for (i = 0; i < XVECLEN (container, 0); i++)
6990 rtx slot = XVECEXP (container, 0, i);
6991 rtx reg = XEXP (slot, 0);
6992 enum machine_mode mode = GET_MODE (reg);
6993 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6994 tree addr_type = build_pointer_type (piece_type);
6995 tree daddr_type = build_pointer_type_for_mode (piece_type,
6996 ptr_mode, true);
6997 tree src_addr, src;
6998 int src_offset;
6999 tree dest_addr, dest;
7001 if (SSE_REGNO_P (REGNO (reg)))
7003 src_addr = sse_addr;
7004 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7006 else
7008 src_addr = int_addr;
7009 src_offset = REGNO (reg) * 8;
7011 src_addr = fold_convert (addr_type, src_addr);
7012 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7013 size_int (src_offset));
7014 src = build_va_arg_indirect_ref (src_addr);
7016 dest_addr = fold_convert (daddr_type, addr);
7017 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7018 size_int (INTVAL (XEXP (slot, 1))));
7019 dest = build_va_arg_indirect_ref (dest_addr);
7021 gimplify_assign (dest, src, pre_p);
7025 if (needed_intregs)
7027 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7028 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7029 gimplify_assign (gpr, t, pre_p);
7032 if (needed_sseregs)
7034 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7035 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7036 gimplify_assign (fpr, t, pre_p);
7039 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7041 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7044 /* ... otherwise out of the overflow area. */
7046 /* When we align parameter on stack for caller, if the parameter
7047 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7048 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7049 here with caller. */
7050 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7051 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7052 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7054 /* Care for on-stack alignment if needed. */
7055 if (arg_boundary <= 64
7056 || integer_zerop (TYPE_SIZE (type)))
7057 t = ovf;
7058 else
7060 HOST_WIDE_INT align = arg_boundary / 8;
7061 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7062 size_int (align - 1));
7063 t = fold_convert (sizetype, t);
7064 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7065 size_int (-align));
7066 t = fold_convert (TREE_TYPE (ovf), t);
7068 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7069 gimplify_assign (addr, t, pre_p);
7071 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7072 size_int (rsize * UNITS_PER_WORD));
7073 gimplify_assign (unshare_expr (ovf), t, pre_p);
7075 if (container)
7076 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7078 ptrtype = build_pointer_type (type);
7079 addr = fold_convert (ptrtype, addr);
7081 if (indirect_p)
7082 addr = build_va_arg_indirect_ref (addr);
7083 return build_va_arg_indirect_ref (addr);
7086 /* Return nonzero if OPNUM's MEM should be matched
7087 in movabs* patterns. */
7090 ix86_check_movabs (rtx insn, int opnum)
7092 rtx set, mem;
7094 set = PATTERN (insn);
7095 if (GET_CODE (set) == PARALLEL)
7096 set = XVECEXP (set, 0, 0);
7097 gcc_assert (GET_CODE (set) == SET);
7098 mem = XEXP (set, opnum);
7099 while (GET_CODE (mem) == SUBREG)
7100 mem = SUBREG_REG (mem);
7101 gcc_assert (MEM_P (mem));
7102 return (volatile_ok || !MEM_VOLATILE_P (mem));
7105 /* Initialize the table of extra 80387 mathematical constants. */
7107 static void
7108 init_ext_80387_constants (void)
7110 static const char * cst[5] =
7112 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7113 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7114 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7115 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7116 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7118 int i;
7120 for (i = 0; i < 5; i++)
7122 real_from_string (&ext_80387_constants_table[i], cst[i]);
7123 /* Ensure each constant is rounded to XFmode precision. */
7124 real_convert (&ext_80387_constants_table[i],
7125 XFmode, &ext_80387_constants_table[i]);
7128 ext_80387_constants_init = 1;
7131 /* Return true if the constant is something that can be loaded with
7132 a special instruction. */
7135 standard_80387_constant_p (rtx x)
7137 enum machine_mode mode = GET_MODE (x);
7139 REAL_VALUE_TYPE r;
7141 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7142 return -1;
7144 if (x == CONST0_RTX (mode))
7145 return 1;
7146 if (x == CONST1_RTX (mode))
7147 return 2;
7149 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7151 /* For XFmode constants, try to find a special 80387 instruction when
7152 optimizing for size or on those CPUs that benefit from them. */
7153 if (mode == XFmode
7154 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7156 int i;
7158 if (! ext_80387_constants_init)
7159 init_ext_80387_constants ();
7161 for (i = 0; i < 5; i++)
7162 if (real_identical (&r, &ext_80387_constants_table[i]))
7163 return i + 3;
7166 /* Load of the constant -0.0 or -1.0 will be split as
7167 fldz;fchs or fld1;fchs sequence. */
7168 if (real_isnegzero (&r))
7169 return 8;
7170 if (real_identical (&r, &dconstm1))
7171 return 9;
7173 return 0;
7176 /* Return the opcode of the special instruction to be used to load
7177 the constant X. */
7179 const char *
7180 standard_80387_constant_opcode (rtx x)
7182 switch (standard_80387_constant_p (x))
7184 case 1:
7185 return "fldz";
7186 case 2:
7187 return "fld1";
7188 case 3:
7189 return "fldlg2";
7190 case 4:
7191 return "fldln2";
7192 case 5:
7193 return "fldl2e";
7194 case 6:
7195 return "fldl2t";
7196 case 7:
7197 return "fldpi";
7198 case 8:
7199 case 9:
7200 return "#";
7201 default:
7202 gcc_unreachable ();
7206 /* Return the CONST_DOUBLE representing the 80387 constant that is
7207 loaded by the specified special instruction. The argument IDX
7208 matches the return value from standard_80387_constant_p. */
7211 standard_80387_constant_rtx (int idx)
7213 int i;
7215 if (! ext_80387_constants_init)
7216 init_ext_80387_constants ();
7218 switch (idx)
7220 case 3:
7221 case 4:
7222 case 5:
7223 case 6:
7224 case 7:
7225 i = idx - 3;
7226 break;
7228 default:
7229 gcc_unreachable ();
7232 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7233 XFmode);
7236 /* Return 1 if mode is a valid mode for sse. */
7237 static int
7238 standard_sse_mode_p (enum machine_mode mode)
7240 switch (mode)
7242 case V16QImode:
7243 case V8HImode:
7244 case V4SImode:
7245 case V2DImode:
7246 case V4SFmode:
7247 case V2DFmode:
7248 return 1;
7250 default:
7251 return 0;
7255 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7256 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7257 modes and AVX is enabled. */
7260 standard_sse_constant_p (rtx x)
7262 enum machine_mode mode = GET_MODE (x);
7264 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7265 return 1;
7266 if (vector_all_ones_operand (x, mode))
7268 if (standard_sse_mode_p (mode))
7269 return TARGET_SSE2 ? 2 : -2;
7270 else if (VALID_AVX256_REG_MODE (mode))
7271 return TARGET_AVX ? 3 : -3;
7274 return 0;
7277 /* Return the opcode of the special instruction to be used to load
7278 the constant X. */
7280 const char *
7281 standard_sse_constant_opcode (rtx insn, rtx x)
7283 switch (standard_sse_constant_p (x))
7285 case 1:
7286 switch (get_attr_mode (insn))
7288 case MODE_V4SF:
7289 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7290 case MODE_V2DF:
7291 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7292 case MODE_TI:
7293 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7294 case MODE_V8SF:
7295 return "vxorps\t%x0, %x0, %x0";
7296 case MODE_V4DF:
7297 return "vxorpd\t%x0, %x0, %x0";
7298 case MODE_OI:
7299 return "vpxor\t%x0, %x0, %x0";
7300 default:
7301 gcc_unreachable ();
7303 case 2:
7304 if (TARGET_AVX)
7305 switch (get_attr_mode (insn))
7307 case MODE_V4SF:
7308 case MODE_V2DF:
7309 case MODE_TI:
7310 return "vpcmpeqd\t%0, %0, %0";
7311 break;
7312 default:
7313 gcc_unreachable ();
7315 else
7316 return "pcmpeqd\t%0, %0";
7318 gcc_unreachable ();
7321 /* Returns 1 if OP contains a symbol reference */
7324 symbolic_reference_mentioned_p (rtx op)
7326 const char *fmt;
7327 int i;
7329 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7330 return 1;
7332 fmt = GET_RTX_FORMAT (GET_CODE (op));
7333 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7335 if (fmt[i] == 'E')
7337 int j;
7339 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7340 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7341 return 1;
7344 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7345 return 1;
7348 return 0;
7351 /* Return 1 if it is appropriate to emit `ret' instructions in the
7352 body of a function. Do this only if the epilogue is simple, needing a
7353 couple of insns. Prior to reloading, we can't tell how many registers
7354 must be saved, so return 0 then. Return 0 if there is no frame
7355 marker to de-allocate. */
7358 ix86_can_use_return_insn_p (void)
7360 struct ix86_frame frame;
7362 if (! reload_completed || frame_pointer_needed)
7363 return 0;
7365 /* Don't allow more than 32 pop, since that's all we can do
7366 with one instruction. */
7367 if (crtl->args.pops_args
7368 && crtl->args.size >= 32768)
7369 return 0;
7371 ix86_compute_frame_layout (&frame);
7372 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7375 /* Value should be nonzero if functions must have frame pointers.
7376 Zero means the frame pointer need not be set up (and parms may
7377 be accessed via the stack pointer) in functions that seem suitable. */
7380 ix86_frame_pointer_required (void)
7382 /* If we accessed previous frames, then the generated code expects
7383 to be able to access the saved ebp value in our frame. */
7384 if (cfun->machine->accesses_prev_frame)
7385 return 1;
7387 /* Several x86 os'es need a frame pointer for other reasons,
7388 usually pertaining to setjmp. */
7389 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7390 return 1;
7392 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7393 the frame pointer by default. Turn it back on now if we've not
7394 got a leaf function. */
7395 if (TARGET_OMIT_LEAF_FRAME_POINTER
7396 && (!current_function_is_leaf
7397 || ix86_current_function_calls_tls_descriptor))
7398 return 1;
7400 if (crtl->profile)
7401 return 1;
7403 return 0;
7406 /* Record that the current function accesses previous call frames. */
7408 void
7409 ix86_setup_frame_addresses (void)
7411 cfun->machine->accesses_prev_frame = 1;
7414 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7415 # define USE_HIDDEN_LINKONCE 1
7416 #else
7417 # define USE_HIDDEN_LINKONCE 0
7418 #endif
7420 static int pic_labels_used;
7422 /* Fills in the label name that should be used for a pc thunk for
7423 the given register. */
7425 static void
7426 get_pc_thunk_name (char name[32], unsigned int regno)
7428 gcc_assert (!TARGET_64BIT);
7430 if (USE_HIDDEN_LINKONCE)
7431 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7432 else
7433 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7437 /* This function generates code for -fpic that loads %ebx with
7438 the return address of the caller and then returns. */
7440 void
7441 ix86_file_end (void)
7443 rtx xops[2];
7444 int regno;
7446 for (regno = 0; regno < 8; ++regno)
7448 char name[32];
7450 if (! ((pic_labels_used >> regno) & 1))
7451 continue;
7453 get_pc_thunk_name (name, regno);
7455 #if TARGET_MACHO
7456 if (TARGET_MACHO)
7458 switch_to_section (darwin_sections[text_coal_section]);
7459 fputs ("\t.weak_definition\t", asm_out_file);
7460 assemble_name (asm_out_file, name);
7461 fputs ("\n\t.private_extern\t", asm_out_file);
7462 assemble_name (asm_out_file, name);
7463 fputs ("\n", asm_out_file);
7464 ASM_OUTPUT_LABEL (asm_out_file, name);
7466 else
7467 #endif
7468 if (USE_HIDDEN_LINKONCE)
7470 tree decl;
7472 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7473 error_mark_node);
7474 TREE_PUBLIC (decl) = 1;
7475 TREE_STATIC (decl) = 1;
7476 DECL_ONE_ONLY (decl) = 1;
7478 (*targetm.asm_out.unique_section) (decl, 0);
7479 switch_to_section (get_named_section (decl, NULL, 0));
7481 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7482 fputs ("\t.hidden\t", asm_out_file);
7483 assemble_name (asm_out_file, name);
7484 fputc ('\n', asm_out_file);
7485 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7487 else
7489 switch_to_section (text_section);
7490 ASM_OUTPUT_LABEL (asm_out_file, name);
7493 xops[0] = gen_rtx_REG (Pmode, regno);
7494 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7495 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7496 output_asm_insn ("ret", xops);
7499 if (NEED_INDICATE_EXEC_STACK)
7500 file_end_indicate_exec_stack ();
7503 /* Emit code for the SET_GOT patterns. */
7505 const char *
7506 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7508 rtx xops[3];
7510 xops[0] = dest;
7512 if (TARGET_VXWORKS_RTP && flag_pic)
7514 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7515 xops[2] = gen_rtx_MEM (Pmode,
7516 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7517 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7519 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7520 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7521 an unadorned address. */
7522 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7523 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7524 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7525 return "";
7528 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7530 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7532 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7534 if (!flag_pic)
7535 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7536 else
7537 output_asm_insn ("call\t%a2", xops);
7539 #if TARGET_MACHO
7540 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7541 is what will be referenced by the Mach-O PIC subsystem. */
7542 if (!label)
7543 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7544 #endif
7546 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7547 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7549 if (flag_pic)
7550 output_asm_insn ("pop%z0\t%0", xops);
7552 else
7554 char name[32];
7555 get_pc_thunk_name (name, REGNO (dest));
7556 pic_labels_used |= 1 << REGNO (dest);
7558 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7559 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7560 output_asm_insn ("call\t%X2", xops);
7561 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7562 is what will be referenced by the Mach-O PIC subsystem. */
7563 #if TARGET_MACHO
7564 if (!label)
7565 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7566 else
7567 targetm.asm_out.internal_label (asm_out_file, "L",
7568 CODE_LABEL_NUMBER (label));
7569 #endif
7572 if (TARGET_MACHO)
7573 return "";
7575 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7576 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7577 else
7578 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7580 return "";
7583 /* Generate an "push" pattern for input ARG. */
7585 static rtx
7586 gen_push (rtx arg)
7588 return gen_rtx_SET (VOIDmode,
7589 gen_rtx_MEM (Pmode,
7590 gen_rtx_PRE_DEC (Pmode,
7591 stack_pointer_rtx)),
7592 arg);
7595 /* Return >= 0 if there is an unused call-clobbered register available
7596 for the entire function. */
7598 static unsigned int
7599 ix86_select_alt_pic_regnum (void)
7601 if (current_function_is_leaf && !crtl->profile
7602 && !ix86_current_function_calls_tls_descriptor)
7604 int i, drap;
7605 /* Can't use the same register for both PIC and DRAP. */
7606 if (crtl->drap_reg)
7607 drap = REGNO (crtl->drap_reg);
7608 else
7609 drap = -1;
7610 for (i = 2; i >= 0; --i)
7611 if (i != drap && !df_regs_ever_live_p (i))
7612 return i;
7615 return INVALID_REGNUM;
7618 /* Return 1 if we need to save REGNO. */
7619 static int
7620 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7622 if (pic_offset_table_rtx
7623 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7624 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7625 || crtl->profile
7626 || crtl->calls_eh_return
7627 || crtl->uses_const_pool))
7629 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7630 return 0;
7631 return 1;
7634 if (crtl->calls_eh_return && maybe_eh_return)
7636 unsigned i;
7637 for (i = 0; ; i++)
7639 unsigned test = EH_RETURN_DATA_REGNO (i);
7640 if (test == INVALID_REGNUM)
7641 break;
7642 if (test == regno)
7643 return 1;
7647 if (crtl->drap_reg
7648 && regno == REGNO (crtl->drap_reg))
7649 return 1;
7651 return (df_regs_ever_live_p (regno)
7652 && !call_used_regs[regno]
7653 && !fixed_regs[regno]
7654 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7657 /* Return number of saved general prupose registers. */
7659 static int
7660 ix86_nsaved_regs (void)
7662 int nregs = 0;
7663 int regno;
7665 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7666 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7667 nregs ++;
7668 return nregs;
7671 /* Return number of saved SSE registrers. */
7673 static int
7674 ix86_nsaved_sseregs (void)
7676 int nregs = 0;
7677 int regno;
7679 if (ix86_cfun_abi () != MS_ABI)
7680 return 0;
7681 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7682 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7683 nregs ++;
7684 return nregs;
7687 /* Given FROM and TO register numbers, say whether this elimination is
7688 allowed. If stack alignment is needed, we can only replace argument
7689 pointer with hard frame pointer, or replace frame pointer with stack
7690 pointer. Otherwise, frame pointer elimination is automatically
7691 handled and all other eliminations are valid. */
7694 ix86_can_eliminate (int from, int to)
7696 if (stack_realign_fp)
7697 return ((from == ARG_POINTER_REGNUM
7698 && to == HARD_FRAME_POINTER_REGNUM)
7699 || (from == FRAME_POINTER_REGNUM
7700 && to == STACK_POINTER_REGNUM));
7701 else
7702 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7705 /* Return the offset between two registers, one to be eliminated, and the other
7706 its replacement, at the start of a routine. */
7708 HOST_WIDE_INT
7709 ix86_initial_elimination_offset (int from, int to)
7711 struct ix86_frame frame;
7712 ix86_compute_frame_layout (&frame);
7714 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7715 return frame.hard_frame_pointer_offset;
7716 else if (from == FRAME_POINTER_REGNUM
7717 && to == HARD_FRAME_POINTER_REGNUM)
7718 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7719 else
7721 gcc_assert (to == STACK_POINTER_REGNUM);
7723 if (from == ARG_POINTER_REGNUM)
7724 return frame.stack_pointer_offset;
7726 gcc_assert (from == FRAME_POINTER_REGNUM);
7727 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7731 /* In a dynamically-aligned function, we can't know the offset from
7732 stack pointer to frame pointer, so we must ensure that setjmp
7733 eliminates fp against the hard fp (%ebp) rather than trying to
7734 index from %esp up to the top of the frame across a gap that is
7735 of unknown (at compile-time) size. */
7736 static rtx
7737 ix86_builtin_setjmp_frame_value (void)
7739 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7742 /* Fill structure ix86_frame about frame of currently computed function. */
7744 static void
7745 ix86_compute_frame_layout (struct ix86_frame *frame)
7747 HOST_WIDE_INT total_size;
7748 unsigned int stack_alignment_needed;
7749 HOST_WIDE_INT offset;
7750 unsigned int preferred_alignment;
7751 HOST_WIDE_INT size = get_frame_size ();
7753 frame->nregs = ix86_nsaved_regs ();
7754 frame->nsseregs = ix86_nsaved_sseregs ();
7755 total_size = size;
7757 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7758 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7760 /* MS ABI seem to require stack alignment to be always 16 except for function
7761 prologues. */
7762 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7764 preferred_alignment = 16;
7765 stack_alignment_needed = 16;
7766 crtl->preferred_stack_boundary = 128;
7767 crtl->stack_alignment_needed = 128;
7770 gcc_assert (!size || stack_alignment_needed);
7771 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7772 gcc_assert (preferred_alignment <= stack_alignment_needed);
7774 /* During reload iteration the amount of registers saved can change.
7775 Recompute the value as needed. Do not recompute when amount of registers
7776 didn't change as reload does multiple calls to the function and does not
7777 expect the decision to change within single iteration. */
7778 if (!optimize_function_for_size_p (cfun)
7779 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7781 int count = frame->nregs;
7783 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7784 /* The fast prologue uses move instead of push to save registers. This
7785 is significantly longer, but also executes faster as modern hardware
7786 can execute the moves in parallel, but can't do that for push/pop.
7788 Be careful about choosing what prologue to emit: When function takes
7789 many instructions to execute we may use slow version as well as in
7790 case function is known to be outside hot spot (this is known with
7791 feedback only). Weight the size of function by number of registers
7792 to save as it is cheap to use one or two push instructions but very
7793 slow to use many of them. */
7794 if (count)
7795 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7796 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7797 || (flag_branch_probabilities
7798 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7799 cfun->machine->use_fast_prologue_epilogue = false;
7800 else
7801 cfun->machine->use_fast_prologue_epilogue
7802 = !expensive_function_p (count);
7804 if (TARGET_PROLOGUE_USING_MOVE
7805 && cfun->machine->use_fast_prologue_epilogue)
7806 frame->save_regs_using_mov = true;
7807 else
7808 frame->save_regs_using_mov = false;
7811 /* Skip return address and saved base pointer. */
7812 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7814 frame->hard_frame_pointer_offset = offset;
7816 /* Set offset to aligned because the realigned frame starts from
7817 here. */
7818 if (stack_realign_fp)
7819 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7821 /* Register save area */
7822 offset += frame->nregs * UNITS_PER_WORD;
7824 /* Align SSE reg save area. */
7825 if (frame->nsseregs)
7826 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7827 else
7828 frame->padding0 = 0;
7830 /* SSE register save area. */
7831 offset += frame->padding0 + frame->nsseregs * 16;
7833 /* Va-arg area */
7834 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7835 offset += frame->va_arg_size;
7837 /* Align start of frame for local function. */
7838 frame->padding1 = ((offset + stack_alignment_needed - 1)
7839 & -stack_alignment_needed) - offset;
7841 offset += frame->padding1;
7843 /* Frame pointer points here. */
7844 frame->frame_pointer_offset = offset;
7846 offset += size;
7848 /* Add outgoing arguments area. Can be skipped if we eliminated
7849 all the function calls as dead code.
7850 Skipping is however impossible when function calls alloca. Alloca
7851 expander assumes that last crtl->outgoing_args_size
7852 of stack frame are unused. */
7853 if (ACCUMULATE_OUTGOING_ARGS
7854 && (!current_function_is_leaf || cfun->calls_alloca
7855 || ix86_current_function_calls_tls_descriptor))
7857 offset += crtl->outgoing_args_size;
7858 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7860 else
7861 frame->outgoing_arguments_size = 0;
7863 /* Align stack boundary. Only needed if we're calling another function
7864 or using alloca. */
7865 if (!current_function_is_leaf || cfun->calls_alloca
7866 || ix86_current_function_calls_tls_descriptor)
7867 frame->padding2 = ((offset + preferred_alignment - 1)
7868 & -preferred_alignment) - offset;
7869 else
7870 frame->padding2 = 0;
7872 offset += frame->padding2;
7874 /* We've reached end of stack frame. */
7875 frame->stack_pointer_offset = offset;
7877 /* Size prologue needs to allocate. */
7878 frame->to_allocate =
7879 (size + frame->padding1 + frame->padding2
7880 + frame->outgoing_arguments_size + frame->va_arg_size);
7882 if ((!frame->to_allocate && frame->nregs <= 1)
7883 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7884 frame->save_regs_using_mov = false;
7886 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7887 && current_function_is_leaf
7888 && !ix86_current_function_calls_tls_descriptor)
7890 frame->red_zone_size = frame->to_allocate;
7891 if (frame->save_regs_using_mov)
7892 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7893 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7894 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7896 else
7897 frame->red_zone_size = 0;
7898 frame->to_allocate -= frame->red_zone_size;
7899 frame->stack_pointer_offset -= frame->red_zone_size;
7900 #if 0
7901 fprintf (stderr, "\n");
7902 fprintf (stderr, "size: %ld\n", (long)size);
7903 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7904 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7905 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7906 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7907 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7908 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7909 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7910 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7911 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7912 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7913 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7914 (long)frame->hard_frame_pointer_offset);
7915 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7916 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7917 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7918 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7919 #endif
7922 /* Emit code to save registers in the prologue. */
7924 static void
7925 ix86_emit_save_regs (void)
7927 unsigned int regno;
7928 rtx insn;
7930 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7931 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7933 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7934 RTX_FRAME_RELATED_P (insn) = 1;
7938 /* Emit code to save registers using MOV insns. First register
7939 is restored from POINTER + OFFSET. */
7940 static void
7941 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7943 unsigned int regno;
7944 rtx insn;
7946 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7947 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7949 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7950 Pmode, offset),
7951 gen_rtx_REG (Pmode, regno));
7952 RTX_FRAME_RELATED_P (insn) = 1;
7953 offset += UNITS_PER_WORD;
7957 /* Emit code to save registers using MOV insns. First register
7958 is restored from POINTER + OFFSET. */
7959 static void
7960 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7962 unsigned int regno;
7963 rtx insn;
7964 rtx mem;
7966 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7967 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7969 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7970 set_mem_align (mem, 128);
7971 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7972 RTX_FRAME_RELATED_P (insn) = 1;
7973 offset += 16;
7977 /* Expand prologue or epilogue stack adjustment.
7978 The pattern exist to put a dependency on all ebp-based memory accesses.
7979 STYLE should be negative if instructions should be marked as frame related,
7980 zero if %r11 register is live and cannot be freely used and positive
7981 otherwise. */
7983 static void
7984 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7986 rtx insn;
7988 if (! TARGET_64BIT)
7989 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7990 else if (x86_64_immediate_operand (offset, DImode))
7991 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7992 else
7994 rtx r11;
7995 /* r11 is used by indirect sibcall return as well, set before the
7996 epilogue and used after the epilogue. ATM indirect sibcall
7997 shouldn't be used together with huge frame sizes in one
7998 function because of the frame_size check in sibcall.c. */
7999 gcc_assert (style);
8000 r11 = gen_rtx_REG (DImode, R11_REG);
8001 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8002 if (style < 0)
8003 RTX_FRAME_RELATED_P (insn) = 1;
8004 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8005 offset));
8007 if (style < 0)
8008 RTX_FRAME_RELATED_P (insn) = 1;
8011 /* Find an available register to be used as dynamic realign argument
8012 pointer regsiter. Such a register will be written in prologue and
8013 used in begin of body, so it must not be
8014 1. parameter passing register.
8015 2. GOT pointer.
8016 We reuse static-chain register if it is available. Otherwise, we
8017 use DI for i386 and R13 for x86-64. We chose R13 since it has
8018 shorter encoding.
8020 Return: the regno of chosen register. */
8022 static unsigned int
8023 find_drap_reg (void)
8025 tree decl = cfun->decl;
8027 if (TARGET_64BIT)
8029 /* Use R13 for nested function or function need static chain.
8030 Since function with tail call may use any caller-saved
8031 registers in epilogue, DRAP must not use caller-saved
8032 register in such case. */
8033 if ((decl_function_context (decl)
8034 && !DECL_NO_STATIC_CHAIN (decl))
8035 || crtl->tail_call_emit)
8036 return R13_REG;
8038 return R10_REG;
8040 else
8042 /* Use DI for nested function or function need static chain.
8043 Since function with tail call may use any caller-saved
8044 registers in epilogue, DRAP must not use caller-saved
8045 register in such case. */
8046 if ((decl_function_context (decl)
8047 && !DECL_NO_STATIC_CHAIN (decl))
8048 || crtl->tail_call_emit)
8049 return DI_REG;
8051 /* Reuse static chain register if it isn't used for parameter
8052 passing. */
8053 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8054 && !lookup_attribute ("fastcall",
8055 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8056 return CX_REG;
8057 else
8058 return DI_REG;
8062 /* Update incoming stack boundary and estimated stack alignment. */
8064 static void
8065 ix86_update_stack_boundary (void)
8067 /* Prefer the one specified at command line. */
8068 ix86_incoming_stack_boundary
8069 = (ix86_user_incoming_stack_boundary
8070 ? ix86_user_incoming_stack_boundary
8071 : ix86_default_incoming_stack_boundary);
8073 /* Incoming stack alignment can be changed on individual functions
8074 via force_align_arg_pointer attribute. We use the smallest
8075 incoming stack boundary. */
8076 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8077 && lookup_attribute (ix86_force_align_arg_pointer_string,
8078 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8079 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8081 /* The incoming stack frame has to be aligned at least at
8082 parm_stack_boundary. */
8083 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8084 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8086 /* Stack at entrance of main is aligned by runtime. We use the
8087 smallest incoming stack boundary. */
8088 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8089 && DECL_NAME (current_function_decl)
8090 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8091 && DECL_FILE_SCOPE_P (current_function_decl))
8092 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8094 /* x86_64 vararg needs 16byte stack alignment for register save
8095 area. */
8096 if (TARGET_64BIT
8097 && cfun->stdarg
8098 && crtl->stack_alignment_estimated < 128)
8099 crtl->stack_alignment_estimated = 128;
8102 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8103 needed or an rtx for DRAP otherwise. */
8105 static rtx
8106 ix86_get_drap_rtx (void)
8108 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8109 crtl->need_drap = true;
8111 if (stack_realign_drap)
8113 /* Assign DRAP to vDRAP and returns vDRAP */
8114 unsigned int regno = find_drap_reg ();
8115 rtx drap_vreg;
8116 rtx arg_ptr;
8117 rtx seq, insn;
8119 arg_ptr = gen_rtx_REG (Pmode, regno);
8120 crtl->drap_reg = arg_ptr;
8122 start_sequence ();
8123 drap_vreg = copy_to_reg (arg_ptr);
8124 seq = get_insns ();
8125 end_sequence ();
8127 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8128 RTX_FRAME_RELATED_P (insn) = 1;
8129 return drap_vreg;
8131 else
8132 return NULL;
8135 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8137 static rtx
8138 ix86_internal_arg_pointer (void)
8140 return virtual_incoming_args_rtx;
8143 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8144 This is called from dwarf2out.c to emit call frame instructions
8145 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8146 static void
8147 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8149 rtx unspec = SET_SRC (pattern);
8150 gcc_assert (GET_CODE (unspec) == UNSPEC);
8152 switch (index)
8154 case UNSPEC_REG_SAVE:
8155 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8156 SET_DEST (pattern));
8157 break;
8158 case UNSPEC_DEF_CFA:
8159 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8160 INTVAL (XVECEXP (unspec, 0, 0)));
8161 break;
8162 default:
8163 gcc_unreachable ();
8167 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8168 to be generated in correct form. */
8169 static void
8170 ix86_finalize_stack_realign_flags (void)
8172 /* Check if stack realign is really needed after reload, and
8173 stores result in cfun */
8174 unsigned int incoming_stack_boundary
8175 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8176 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8177 unsigned int stack_realign = (incoming_stack_boundary
8178 < (current_function_is_leaf
8179 ? crtl->max_used_stack_slot_alignment
8180 : crtl->stack_alignment_needed));
8182 if (crtl->stack_realign_finalized)
8184 /* After stack_realign_needed is finalized, we can't no longer
8185 change it. */
8186 gcc_assert (crtl->stack_realign_needed == stack_realign);
8188 else
8190 crtl->stack_realign_needed = stack_realign;
8191 crtl->stack_realign_finalized = true;
8195 /* Expand the prologue into a bunch of separate insns. */
8197 void
8198 ix86_expand_prologue (void)
8200 rtx insn;
8201 bool pic_reg_used;
8202 struct ix86_frame frame;
8203 HOST_WIDE_INT allocate;
8205 ix86_finalize_stack_realign_flags ();
8207 /* DRAP should not coexist with stack_realign_fp */
8208 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8210 ix86_compute_frame_layout (&frame);
8212 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8213 of DRAP is needed and stack realignment is really needed after reload */
8214 if (crtl->drap_reg && crtl->stack_realign_needed)
8216 rtx x, y;
8217 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8218 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8219 ? 0 : UNITS_PER_WORD);
8221 gcc_assert (stack_realign_drap);
8223 /* Grab the argument pointer. */
8224 x = plus_constant (stack_pointer_rtx,
8225 (UNITS_PER_WORD + param_ptr_offset));
8226 y = crtl->drap_reg;
8228 /* Only need to push parameter pointer reg if it is caller
8229 saved reg */
8230 if (!call_used_regs[REGNO (crtl->drap_reg)])
8232 /* Push arg pointer reg */
8233 insn = emit_insn (gen_push (y));
8234 RTX_FRAME_RELATED_P (insn) = 1;
8237 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8238 RTX_FRAME_RELATED_P (insn) = 1;
8240 /* Align the stack. */
8241 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8242 stack_pointer_rtx,
8243 GEN_INT (-align_bytes)));
8244 RTX_FRAME_RELATED_P (insn) = 1;
8246 /* Replicate the return address on the stack so that return
8247 address can be reached via (argp - 1) slot. This is needed
8248 to implement macro RETURN_ADDR_RTX and intrinsic function
8249 expand_builtin_return_addr etc. */
8250 x = crtl->drap_reg;
8251 x = gen_frame_mem (Pmode,
8252 plus_constant (x, -UNITS_PER_WORD));
8253 insn = emit_insn (gen_push (x));
8254 RTX_FRAME_RELATED_P (insn) = 1;
8257 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8258 slower on all targets. Also sdb doesn't like it. */
8260 if (frame_pointer_needed)
8262 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8263 RTX_FRAME_RELATED_P (insn) = 1;
8265 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8266 RTX_FRAME_RELATED_P (insn) = 1;
8269 if (stack_realign_fp)
8271 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8272 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8274 /* Align the stack. */
8275 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8276 stack_pointer_rtx,
8277 GEN_INT (-align_bytes)));
8278 RTX_FRAME_RELATED_P (insn) = 1;
8281 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8283 if (!frame.save_regs_using_mov)
8284 ix86_emit_save_regs ();
8285 else
8286 allocate += frame.nregs * UNITS_PER_WORD;
8288 /* When using red zone we may start register saving before allocating
8289 the stack frame saving one cycle of the prologue. However I will
8290 avoid doing this if I am going to have to probe the stack since
8291 at least on x86_64 the stack probe can turn into a call that clobbers
8292 a red zone location */
8293 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8294 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8295 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8296 && !crtl->stack_realign_needed)
8297 ? hard_frame_pointer_rtx
8298 : stack_pointer_rtx,
8299 -frame.nregs * UNITS_PER_WORD);
8301 if (allocate == 0)
8303 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8304 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8305 GEN_INT (-allocate), -1);
8306 else
8308 /* Only valid for Win32. */
8309 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8310 bool eax_live;
8311 rtx t;
8313 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8315 if (cfun->machine->call_abi == MS_ABI)
8316 eax_live = false;
8317 else
8318 eax_live = ix86_eax_live_at_start_p ();
8320 if (eax_live)
8322 emit_insn (gen_push (eax));
8323 allocate -= UNITS_PER_WORD;
8326 emit_move_insn (eax, GEN_INT (allocate));
8328 if (TARGET_64BIT)
8329 insn = gen_allocate_stack_worker_64 (eax, eax);
8330 else
8331 insn = gen_allocate_stack_worker_32 (eax, eax);
8332 insn = emit_insn (insn);
8333 RTX_FRAME_RELATED_P (insn) = 1;
8334 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8335 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8336 add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
8338 if (eax_live)
8340 if (frame_pointer_needed)
8341 t = plus_constant (hard_frame_pointer_rtx,
8342 allocate
8343 - frame.to_allocate
8344 - frame.nregs * UNITS_PER_WORD);
8345 else
8346 t = plus_constant (stack_pointer_rtx, allocate);
8347 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8351 if (frame.save_regs_using_mov
8352 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8353 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8355 if (!frame_pointer_needed
8356 || !frame.to_allocate
8357 || crtl->stack_realign_needed)
8358 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8359 frame.to_allocate
8360 + frame.nsseregs * 16 + frame.padding0);
8361 else
8362 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8363 -frame.nregs * UNITS_PER_WORD);
8365 if (!frame_pointer_needed
8366 || !frame.to_allocate
8367 || crtl->stack_realign_needed)
8368 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8369 frame.to_allocate);
8370 else
8371 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8372 - frame.nregs * UNITS_PER_WORD
8373 - frame.nsseregs * 16
8374 - frame.padding0);
8376 pic_reg_used = false;
8377 if (pic_offset_table_rtx
8378 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8379 || crtl->profile))
8381 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8383 if (alt_pic_reg_used != INVALID_REGNUM)
8384 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8386 pic_reg_used = true;
8389 if (pic_reg_used)
8391 if (TARGET_64BIT)
8393 if (ix86_cmodel == CM_LARGE_PIC)
8395 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8396 rtx label = gen_label_rtx ();
8397 emit_label (label);
8398 LABEL_PRESERVE_P (label) = 1;
8399 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8400 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8401 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8402 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8403 pic_offset_table_rtx, tmp_reg));
8405 else
8406 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8408 else
8409 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8412 /* In the pic_reg_used case, make sure that the got load isn't deleted
8413 when mcount needs it. Blockage to avoid call movement across mcount
8414 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8415 note. */
8416 if (crtl->profile && pic_reg_used)
8417 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8419 if (crtl->drap_reg && !crtl->stack_realign_needed)
8421 /* vDRAP is setup but after reload it turns out stack realign
8422 isn't necessary, here we will emit prologue to setup DRAP
8423 without stack realign adjustment */
8424 int drap_bp_offset = UNITS_PER_WORD * 2;
8425 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8426 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8429 /* Prevent instructions from being scheduled into register save push
8430 sequence when access to the redzone area is done through frame pointer.
8431 The offset betweeh the frame pointer and the stack pointer is calculated
8432 relative to the value of the stack pointer at the end of the function
8433 prologue, and moving instructions that access redzone area via frame
8434 pointer inside push sequence violates this assumption. */
8435 if (frame_pointer_needed && frame.red_zone_size)
8436 emit_insn (gen_memory_blockage ());
8438 /* Emit cld instruction if stringops are used in the function. */
8439 if (TARGET_CLD && ix86_current_function_needs_cld)
8440 emit_insn (gen_cld ());
8443 /* Emit code to restore saved registers using MOV insns. First register
8444 is restored from POINTER + OFFSET. */
8445 static void
8446 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8447 int maybe_eh_return)
8449 int regno;
8450 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8452 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8453 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8455 /* Ensure that adjust_address won't be forced to produce pointer
8456 out of range allowed by x86-64 instruction set. */
8457 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8459 rtx r11;
8461 r11 = gen_rtx_REG (DImode, R11_REG);
8462 emit_move_insn (r11, GEN_INT (offset));
8463 emit_insn (gen_adddi3 (r11, r11, pointer));
8464 base_address = gen_rtx_MEM (Pmode, r11);
8465 offset = 0;
8467 emit_move_insn (gen_rtx_REG (Pmode, regno),
8468 adjust_address (base_address, Pmode, offset));
8469 offset += UNITS_PER_WORD;
8473 /* Emit code to restore saved registers using MOV insns. First register
8474 is restored from POINTER + OFFSET. */
8475 static void
8476 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8477 int maybe_eh_return)
8479 int regno;
8480 rtx base_address = gen_rtx_MEM (TImode, pointer);
8481 rtx mem;
8483 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8484 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8486 /* Ensure that adjust_address won't be forced to produce pointer
8487 out of range allowed by x86-64 instruction set. */
8488 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8490 rtx r11;
8492 r11 = gen_rtx_REG (DImode, R11_REG);
8493 emit_move_insn (r11, GEN_INT (offset));
8494 emit_insn (gen_adddi3 (r11, r11, pointer));
8495 base_address = gen_rtx_MEM (TImode, r11);
8496 offset = 0;
8498 mem = adjust_address (base_address, TImode, offset);
8499 set_mem_align (mem, 128);
8500 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8501 offset += 16;
8505 /* Restore function stack, frame, and registers. */
8507 void
8508 ix86_expand_epilogue (int style)
8510 int regno;
8511 int sp_valid;
8512 struct ix86_frame frame;
8513 HOST_WIDE_INT offset;
8515 ix86_finalize_stack_realign_flags ();
8517 /* When stack is realigned, SP must be valid. */
8518 sp_valid = (!frame_pointer_needed
8519 || current_function_sp_is_unchanging
8520 || stack_realign_fp);
8522 ix86_compute_frame_layout (&frame);
8524 /* See the comment about red zone and frame
8525 pointer usage in ix86_expand_prologue. */
8526 if (frame_pointer_needed && frame.red_zone_size)
8527 emit_insn (gen_memory_blockage ());
8529 /* Calculate start of saved registers relative to ebp. Special care
8530 must be taken for the normal return case of a function using
8531 eh_return: the eax and edx registers are marked as saved, but not
8532 restored along this path. */
8533 offset = frame.nregs;
8534 if (crtl->calls_eh_return && style != 2)
8535 offset -= 2;
8536 offset *= -UNITS_PER_WORD;
8537 offset -= frame.nsseregs * 16 + frame.padding0;
8539 /* If we're only restoring one register and sp is not valid then
8540 using a move instruction to restore the register since it's
8541 less work than reloading sp and popping the register.
8543 The default code result in stack adjustment using add/lea instruction,
8544 while this code results in LEAVE instruction (or discrete equivalent),
8545 so it is profitable in some other cases as well. Especially when there
8546 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8547 and there is exactly one register to pop. This heuristic may need some
8548 tuning in future. */
8549 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8550 || (TARGET_EPILOGUE_USING_MOVE
8551 && cfun->machine->use_fast_prologue_epilogue
8552 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8553 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8554 || (frame_pointer_needed && TARGET_USE_LEAVE
8555 && cfun->machine->use_fast_prologue_epilogue
8556 && (frame.nregs + frame.nsseregs) == 1)
8557 || crtl->calls_eh_return)
8559 /* Restore registers. We can use ebp or esp to address the memory
8560 locations. If both are available, default to ebp, since offsets
8561 are known to be small. Only exception is esp pointing directly
8562 to the end of block of saved registers, where we may simplify
8563 addressing mode.
8565 If we are realigning stack with bp and sp, regs restore can't
8566 be addressed by bp. sp must be used instead. */
8568 if (!frame_pointer_needed
8569 || (sp_valid && !frame.to_allocate)
8570 || stack_realign_fp)
8572 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8573 frame.to_allocate, style == 2);
8574 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8575 frame.to_allocate
8576 + frame.nsseregs * 16
8577 + frame.padding0, style == 2);
8579 else
8581 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8582 offset, style == 2);
8583 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8584 offset
8585 + frame.nsseregs * 16
8586 + frame.padding0, style == 2);
8589 /* eh_return epilogues need %ecx added to the stack pointer. */
8590 if (style == 2)
8592 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8594 /* Stack align doesn't work with eh_return. */
8595 gcc_assert (!crtl->stack_realign_needed);
8597 if (frame_pointer_needed)
8599 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8600 tmp = plus_constant (tmp, UNITS_PER_WORD);
8601 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8603 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8604 emit_move_insn (hard_frame_pointer_rtx, tmp);
8606 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8607 const0_rtx, style);
8609 else
8611 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8612 tmp = plus_constant (tmp, (frame.to_allocate
8613 + frame.nregs * UNITS_PER_WORD
8614 + frame.nsseregs * 16
8615 + frame.padding0));
8616 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8619 else if (!frame_pointer_needed)
8620 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8621 GEN_INT (frame.to_allocate
8622 + frame.nregs * UNITS_PER_WORD
8623 + frame.nsseregs * 16
8624 + frame.padding0),
8625 style);
8626 /* If not an i386, mov & pop is faster than "leave". */
8627 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8628 || !cfun->machine->use_fast_prologue_epilogue)
8629 emit_insn ((*ix86_gen_leave) ());
8630 else
8632 pro_epilogue_adjust_stack (stack_pointer_rtx,
8633 hard_frame_pointer_rtx,
8634 const0_rtx, style);
8636 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8639 else
8641 /* First step is to deallocate the stack frame so that we can
8642 pop the registers.
8644 If we realign stack with frame pointer, then stack pointer
8645 won't be able to recover via lea $offset(%bp), %sp, because
8646 there is a padding area between bp and sp for realign.
8647 "add $to_allocate, %sp" must be used instead. */
8648 if (!sp_valid)
8650 gcc_assert (frame_pointer_needed);
8651 gcc_assert (!stack_realign_fp);
8652 pro_epilogue_adjust_stack (stack_pointer_rtx,
8653 hard_frame_pointer_rtx,
8654 GEN_INT (offset), style);
8655 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8656 frame.to_allocate, style == 2);
8657 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8658 GEN_INT (frame.nsseregs * 16), style);
8660 else if (frame.to_allocate || frame.nsseregs)
8662 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8663 frame.to_allocate,
8664 style == 2);
8665 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8666 GEN_INT (frame.to_allocate
8667 + frame.nsseregs * 16
8668 + frame.padding0), style);
8671 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8672 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8673 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8674 if (frame_pointer_needed)
8676 /* Leave results in shorter dependency chains on CPUs that are
8677 able to grok it fast. */
8678 if (TARGET_USE_LEAVE)
8679 emit_insn ((*ix86_gen_leave) ());
8680 else
8682 /* For stack realigned really happens, recover stack
8683 pointer to hard frame pointer is a must, if not using
8684 leave. */
8685 if (stack_realign_fp)
8686 pro_epilogue_adjust_stack (stack_pointer_rtx,
8687 hard_frame_pointer_rtx,
8688 const0_rtx, style);
8689 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8694 if (crtl->drap_reg && crtl->stack_realign_needed)
8696 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8697 ? 0 : UNITS_PER_WORD);
8698 gcc_assert (stack_realign_drap);
8699 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8700 crtl->drap_reg,
8701 GEN_INT (-(UNITS_PER_WORD
8702 + param_ptr_offset))));
8703 if (!call_used_regs[REGNO (crtl->drap_reg)])
8704 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8708 /* Sibcall epilogues don't want a return instruction. */
8709 if (style == 0)
8710 return;
8712 if (crtl->args.pops_args && crtl->args.size)
8714 rtx popc = GEN_INT (crtl->args.pops_args);
8716 /* i386 can only pop 64K bytes. If asked to pop more, pop
8717 return address, do explicit add, and jump indirectly to the
8718 caller. */
8720 if (crtl->args.pops_args >= 65536)
8722 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8724 /* There is no "pascal" calling convention in any 64bit ABI. */
8725 gcc_assert (!TARGET_64BIT);
8727 emit_insn (gen_popsi1 (ecx));
8728 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8729 emit_jump_insn (gen_return_indirect_internal (ecx));
8731 else
8732 emit_jump_insn (gen_return_pop_internal (popc));
8734 else
8735 emit_jump_insn (gen_return_internal ());
8738 /* Reset from the function's potential modifications. */
8740 static void
8741 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8742 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8744 if (pic_offset_table_rtx)
8745 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8746 #if TARGET_MACHO
8747 /* Mach-O doesn't support labels at the end of objects, so if
8748 it looks like we might want one, insert a NOP. */
8750 rtx insn = get_last_insn ();
8751 while (insn
8752 && NOTE_P (insn)
8753 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8754 insn = PREV_INSN (insn);
8755 if (insn
8756 && (LABEL_P (insn)
8757 || (NOTE_P (insn)
8758 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8759 fputs ("\tnop\n", file);
8761 #endif
8765 /* Extract the parts of an RTL expression that is a valid memory address
8766 for an instruction. Return 0 if the structure of the address is
8767 grossly off. Return -1 if the address contains ASHIFT, so it is not
8768 strictly valid, but still used for computing length of lea instruction. */
8771 ix86_decompose_address (rtx addr, struct ix86_address *out)
8773 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8774 rtx base_reg, index_reg;
8775 HOST_WIDE_INT scale = 1;
8776 rtx scale_rtx = NULL_RTX;
8777 int retval = 1;
8778 enum ix86_address_seg seg = SEG_DEFAULT;
8780 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8781 base = addr;
8782 else if (GET_CODE (addr) == PLUS)
8784 rtx addends[4], op;
8785 int n = 0, i;
8787 op = addr;
8790 if (n >= 4)
8791 return 0;
8792 addends[n++] = XEXP (op, 1);
8793 op = XEXP (op, 0);
8795 while (GET_CODE (op) == PLUS);
8796 if (n >= 4)
8797 return 0;
8798 addends[n] = op;
8800 for (i = n; i >= 0; --i)
8802 op = addends[i];
8803 switch (GET_CODE (op))
8805 case MULT:
8806 if (index)
8807 return 0;
8808 index = XEXP (op, 0);
8809 scale_rtx = XEXP (op, 1);
8810 break;
8812 case UNSPEC:
8813 if (XINT (op, 1) == UNSPEC_TP
8814 && TARGET_TLS_DIRECT_SEG_REFS
8815 && seg == SEG_DEFAULT)
8816 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8817 else
8818 return 0;
8819 break;
8821 case REG:
8822 case SUBREG:
8823 if (!base)
8824 base = op;
8825 else if (!index)
8826 index = op;
8827 else
8828 return 0;
8829 break;
8831 case CONST:
8832 case CONST_INT:
8833 case SYMBOL_REF:
8834 case LABEL_REF:
8835 if (disp)
8836 return 0;
8837 disp = op;
8838 break;
8840 default:
8841 return 0;
8845 else if (GET_CODE (addr) == MULT)
8847 index = XEXP (addr, 0); /* index*scale */
8848 scale_rtx = XEXP (addr, 1);
8850 else if (GET_CODE (addr) == ASHIFT)
8852 rtx tmp;
8854 /* We're called for lea too, which implements ashift on occasion. */
8855 index = XEXP (addr, 0);
8856 tmp = XEXP (addr, 1);
8857 if (!CONST_INT_P (tmp))
8858 return 0;
8859 scale = INTVAL (tmp);
8860 if ((unsigned HOST_WIDE_INT) scale > 3)
8861 return 0;
8862 scale = 1 << scale;
8863 retval = -1;
8865 else
8866 disp = addr; /* displacement */
8868 /* Extract the integral value of scale. */
8869 if (scale_rtx)
8871 if (!CONST_INT_P (scale_rtx))
8872 return 0;
8873 scale = INTVAL (scale_rtx);
8876 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8877 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8879 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8880 if (base_reg && index_reg && scale == 1
8881 && (index_reg == arg_pointer_rtx
8882 || index_reg == frame_pointer_rtx
8883 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8885 rtx tmp;
8886 tmp = base, base = index, index = tmp;
8887 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8890 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8891 if ((base_reg == hard_frame_pointer_rtx
8892 || base_reg == frame_pointer_rtx
8893 || base_reg == arg_pointer_rtx) && !disp)
8894 disp = const0_rtx;
8896 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8897 Avoid this by transforming to [%esi+0].
8898 Reload calls address legitimization without cfun defined, so we need
8899 to test cfun for being non-NULL. */
8900 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8901 && base_reg && !index_reg && !disp
8902 && REG_P (base_reg)
8903 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8904 disp = const0_rtx;
8906 /* Special case: encode reg+reg instead of reg*2. */
8907 if (!base && index && scale && scale == 2)
8908 base = index, base_reg = index_reg, scale = 1;
8910 /* Special case: scaling cannot be encoded without base or displacement. */
8911 if (!base && !disp && index && scale != 1)
8912 disp = const0_rtx;
8914 out->base = base;
8915 out->index = index;
8916 out->disp = disp;
8917 out->scale = scale;
8918 out->seg = seg;
8920 return retval;
8923 /* Return cost of the memory address x.
8924 For i386, it is better to use a complex address than let gcc copy
8925 the address into a reg and make a new pseudo. But not if the address
8926 requires to two regs - that would mean more pseudos with longer
8927 lifetimes. */
8928 static int
8929 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8931 struct ix86_address parts;
8932 int cost = 1;
8933 int ok = ix86_decompose_address (x, &parts);
8935 gcc_assert (ok);
8937 if (parts.base && GET_CODE (parts.base) == SUBREG)
8938 parts.base = SUBREG_REG (parts.base);
8939 if (parts.index && GET_CODE (parts.index) == SUBREG)
8940 parts.index = SUBREG_REG (parts.index);
8942 /* Attempt to minimize number of registers in the address. */
8943 if ((parts.base
8944 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8945 || (parts.index
8946 && (!REG_P (parts.index)
8947 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8948 cost++;
8950 if (parts.base
8951 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8952 && parts.index
8953 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8954 && parts.base != parts.index)
8955 cost++;
8957 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8958 since it's predecode logic can't detect the length of instructions
8959 and it degenerates to vector decoded. Increase cost of such
8960 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8961 to split such addresses or even refuse such addresses at all.
8963 Following addressing modes are affected:
8964 [base+scale*index]
8965 [scale*index+disp]
8966 [base+index]
8968 The first and last case may be avoidable by explicitly coding the zero in
8969 memory address, but I don't have AMD-K6 machine handy to check this
8970 theory. */
8972 if (TARGET_K6
8973 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8974 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8975 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8976 cost += 10;
8978 return cost;
8981 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8982 this is used for to form addresses to local data when -fPIC is in
8983 use. */
8985 static bool
8986 darwin_local_data_pic (rtx disp)
8988 return (GET_CODE (disp) == UNSPEC
8989 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8992 /* Determine if a given RTX is a valid constant. We already know this
8993 satisfies CONSTANT_P. */
8995 bool
8996 legitimate_constant_p (rtx x)
8998 switch (GET_CODE (x))
9000 case CONST:
9001 x = XEXP (x, 0);
9003 if (GET_CODE (x) == PLUS)
9005 if (!CONST_INT_P (XEXP (x, 1)))
9006 return false;
9007 x = XEXP (x, 0);
9010 if (TARGET_MACHO && darwin_local_data_pic (x))
9011 return true;
9013 /* Only some unspecs are valid as "constants". */
9014 if (GET_CODE (x) == UNSPEC)
9015 switch (XINT (x, 1))
9017 case UNSPEC_GOT:
9018 case UNSPEC_GOTOFF:
9019 case UNSPEC_PLTOFF:
9020 return TARGET_64BIT;
9021 case UNSPEC_TPOFF:
9022 case UNSPEC_NTPOFF:
9023 x = XVECEXP (x, 0, 0);
9024 return (GET_CODE (x) == SYMBOL_REF
9025 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9026 case UNSPEC_DTPOFF:
9027 x = XVECEXP (x, 0, 0);
9028 return (GET_CODE (x) == SYMBOL_REF
9029 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9030 default:
9031 return false;
9034 /* We must have drilled down to a symbol. */
9035 if (GET_CODE (x) == LABEL_REF)
9036 return true;
9037 if (GET_CODE (x) != SYMBOL_REF)
9038 return false;
9039 /* FALLTHRU */
9041 case SYMBOL_REF:
9042 /* TLS symbols are never valid. */
9043 if (SYMBOL_REF_TLS_MODEL (x))
9044 return false;
9046 /* DLLIMPORT symbols are never valid. */
9047 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9048 && SYMBOL_REF_DLLIMPORT_P (x))
9049 return false;
9050 break;
9052 case CONST_DOUBLE:
9053 if (GET_MODE (x) == TImode
9054 && x != CONST0_RTX (TImode)
9055 && !TARGET_64BIT)
9056 return false;
9057 break;
9059 case CONST_VECTOR:
9060 if (!standard_sse_constant_p (x))
9061 return false;
9063 default:
9064 break;
9067 /* Otherwise we handle everything else in the move patterns. */
9068 return true;
9071 /* Determine if it's legal to put X into the constant pool. This
9072 is not possible for the address of thread-local symbols, which
9073 is checked above. */
9075 static bool
9076 ix86_cannot_force_const_mem (rtx x)
9078 /* We can always put integral constants and vectors in memory. */
9079 switch (GET_CODE (x))
9081 case CONST_INT:
9082 case CONST_DOUBLE:
9083 case CONST_VECTOR:
9084 return false;
9086 default:
9087 break;
9089 return !legitimate_constant_p (x);
9092 /* Determine if a given RTX is a valid constant address. */
9094 bool
9095 constant_address_p (rtx x)
9097 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9100 /* Nonzero if the constant value X is a legitimate general operand
9101 when generating PIC code. It is given that flag_pic is on and
9102 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9104 bool
9105 legitimate_pic_operand_p (rtx x)
9107 rtx inner;
9109 switch (GET_CODE (x))
9111 case CONST:
9112 inner = XEXP (x, 0);
9113 if (GET_CODE (inner) == PLUS
9114 && CONST_INT_P (XEXP (inner, 1)))
9115 inner = XEXP (inner, 0);
9117 /* Only some unspecs are valid as "constants". */
9118 if (GET_CODE (inner) == UNSPEC)
9119 switch (XINT (inner, 1))
9121 case UNSPEC_GOT:
9122 case UNSPEC_GOTOFF:
9123 case UNSPEC_PLTOFF:
9124 return TARGET_64BIT;
9125 case UNSPEC_TPOFF:
9126 x = XVECEXP (inner, 0, 0);
9127 return (GET_CODE (x) == SYMBOL_REF
9128 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9129 case UNSPEC_MACHOPIC_OFFSET:
9130 return legitimate_pic_address_disp_p (x);
9131 default:
9132 return false;
9134 /* FALLTHRU */
9136 case SYMBOL_REF:
9137 case LABEL_REF:
9138 return legitimate_pic_address_disp_p (x);
9140 default:
9141 return true;
9145 /* Determine if a given CONST RTX is a valid memory displacement
9146 in PIC mode. */
9149 legitimate_pic_address_disp_p (rtx disp)
9151 bool saw_plus;
9153 /* In 64bit mode we can allow direct addresses of symbols and labels
9154 when they are not dynamic symbols. */
9155 if (TARGET_64BIT)
9157 rtx op0 = disp, op1;
9159 switch (GET_CODE (disp))
9161 case LABEL_REF:
9162 return true;
9164 case CONST:
9165 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9166 break;
9167 op0 = XEXP (XEXP (disp, 0), 0);
9168 op1 = XEXP (XEXP (disp, 0), 1);
9169 if (!CONST_INT_P (op1)
9170 || INTVAL (op1) >= 16*1024*1024
9171 || INTVAL (op1) < -16*1024*1024)
9172 break;
9173 if (GET_CODE (op0) == LABEL_REF)
9174 return true;
9175 if (GET_CODE (op0) != SYMBOL_REF)
9176 break;
9177 /* FALLTHRU */
9179 case SYMBOL_REF:
9180 /* TLS references should always be enclosed in UNSPEC. */
9181 if (SYMBOL_REF_TLS_MODEL (op0))
9182 return false;
9183 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9184 && ix86_cmodel != CM_LARGE_PIC)
9185 return true;
9186 break;
9188 default:
9189 break;
9192 if (GET_CODE (disp) != CONST)
9193 return 0;
9194 disp = XEXP (disp, 0);
9196 if (TARGET_64BIT)
9198 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9199 of GOT tables. We should not need these anyway. */
9200 if (GET_CODE (disp) != UNSPEC
9201 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9202 && XINT (disp, 1) != UNSPEC_GOTOFF
9203 && XINT (disp, 1) != UNSPEC_PLTOFF))
9204 return 0;
9206 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9207 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9208 return 0;
9209 return 1;
9212 saw_plus = false;
9213 if (GET_CODE (disp) == PLUS)
9215 if (!CONST_INT_P (XEXP (disp, 1)))
9216 return 0;
9217 disp = XEXP (disp, 0);
9218 saw_plus = true;
9221 if (TARGET_MACHO && darwin_local_data_pic (disp))
9222 return 1;
9224 if (GET_CODE (disp) != UNSPEC)
9225 return 0;
9227 switch (XINT (disp, 1))
9229 case UNSPEC_GOT:
9230 if (saw_plus)
9231 return false;
9232 /* We need to check for both symbols and labels because VxWorks loads
9233 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9234 details. */
9235 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9236 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9237 case UNSPEC_GOTOFF:
9238 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9239 While ABI specify also 32bit relocation but we don't produce it in
9240 small PIC model at all. */
9241 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9242 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9243 && !TARGET_64BIT)
9244 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9245 return false;
9246 case UNSPEC_GOTTPOFF:
9247 case UNSPEC_GOTNTPOFF:
9248 case UNSPEC_INDNTPOFF:
9249 if (saw_plus)
9250 return false;
9251 disp = XVECEXP (disp, 0, 0);
9252 return (GET_CODE (disp) == SYMBOL_REF
9253 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9254 case UNSPEC_NTPOFF:
9255 disp = XVECEXP (disp, 0, 0);
9256 return (GET_CODE (disp) == SYMBOL_REF
9257 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9258 case UNSPEC_DTPOFF:
9259 disp = XVECEXP (disp, 0, 0);
9260 return (GET_CODE (disp) == SYMBOL_REF
9261 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9264 return 0;
9267 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9268 memory address for an instruction. The MODE argument is the machine mode
9269 for the MEM expression that wants to use this address.
9271 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9272 convert common non-canonical forms to canonical form so that they will
9273 be recognized. */
9276 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9277 rtx addr, int strict)
9279 struct ix86_address parts;
9280 rtx base, index, disp;
9281 HOST_WIDE_INT scale;
9282 const char *reason = NULL;
9283 rtx reason_rtx = NULL_RTX;
9285 if (ix86_decompose_address (addr, &parts) <= 0)
9287 reason = "decomposition failed";
9288 goto report_error;
9291 base = parts.base;
9292 index = parts.index;
9293 disp = parts.disp;
9294 scale = parts.scale;
9296 /* Validate base register.
9298 Don't allow SUBREG's that span more than a word here. It can lead to spill
9299 failures when the base is one word out of a two word structure, which is
9300 represented internally as a DImode int. */
9302 if (base)
9304 rtx reg;
9305 reason_rtx = base;
9307 if (REG_P (base))
9308 reg = base;
9309 else if (GET_CODE (base) == SUBREG
9310 && REG_P (SUBREG_REG (base))
9311 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9312 <= UNITS_PER_WORD)
9313 reg = SUBREG_REG (base);
9314 else
9316 reason = "base is not a register";
9317 goto report_error;
9320 if (GET_MODE (base) != Pmode)
9322 reason = "base is not in Pmode";
9323 goto report_error;
9326 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9327 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9329 reason = "base is not valid";
9330 goto report_error;
9334 /* Validate index register.
9336 Don't allow SUBREG's that span more than a word here -- same as above. */
9338 if (index)
9340 rtx reg;
9341 reason_rtx = index;
9343 if (REG_P (index))
9344 reg = index;
9345 else if (GET_CODE (index) == SUBREG
9346 && REG_P (SUBREG_REG (index))
9347 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9348 <= UNITS_PER_WORD)
9349 reg = SUBREG_REG (index);
9350 else
9352 reason = "index is not a register";
9353 goto report_error;
9356 if (GET_MODE (index) != Pmode)
9358 reason = "index is not in Pmode";
9359 goto report_error;
9362 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9363 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9365 reason = "index is not valid";
9366 goto report_error;
9370 /* Validate scale factor. */
9371 if (scale != 1)
9373 reason_rtx = GEN_INT (scale);
9374 if (!index)
9376 reason = "scale without index";
9377 goto report_error;
9380 if (scale != 2 && scale != 4 && scale != 8)
9382 reason = "scale is not a valid multiplier";
9383 goto report_error;
9387 /* Validate displacement. */
9388 if (disp)
9390 reason_rtx = disp;
9392 if (GET_CODE (disp) == CONST
9393 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9394 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9395 switch (XINT (XEXP (disp, 0), 1))
9397 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9398 used. While ABI specify also 32bit relocations, we don't produce
9399 them at all and use IP relative instead. */
9400 case UNSPEC_GOT:
9401 case UNSPEC_GOTOFF:
9402 gcc_assert (flag_pic);
9403 if (!TARGET_64BIT)
9404 goto is_legitimate_pic;
9405 reason = "64bit address unspec";
9406 goto report_error;
9408 case UNSPEC_GOTPCREL:
9409 gcc_assert (flag_pic);
9410 goto is_legitimate_pic;
9412 case UNSPEC_GOTTPOFF:
9413 case UNSPEC_GOTNTPOFF:
9414 case UNSPEC_INDNTPOFF:
9415 case UNSPEC_NTPOFF:
9416 case UNSPEC_DTPOFF:
9417 break;
9419 default:
9420 reason = "invalid address unspec";
9421 goto report_error;
9424 else if (SYMBOLIC_CONST (disp)
9425 && (flag_pic
9426 || (TARGET_MACHO
9427 #if TARGET_MACHO
9428 && MACHOPIC_INDIRECT
9429 && !machopic_operand_p (disp)
9430 #endif
9434 is_legitimate_pic:
9435 if (TARGET_64BIT && (index || base))
9437 /* foo@dtpoff(%rX) is ok. */
9438 if (GET_CODE (disp) != CONST
9439 || GET_CODE (XEXP (disp, 0)) != PLUS
9440 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9441 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9442 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9443 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9445 reason = "non-constant pic memory reference";
9446 goto report_error;
9449 else if (! legitimate_pic_address_disp_p (disp))
9451 reason = "displacement is an invalid pic construct";
9452 goto report_error;
9455 /* This code used to verify that a symbolic pic displacement
9456 includes the pic_offset_table_rtx register.
9458 While this is good idea, unfortunately these constructs may
9459 be created by "adds using lea" optimization for incorrect
9460 code like:
9462 int a;
9463 int foo(int i)
9465 return *(&a+i);
9468 This code is nonsensical, but results in addressing
9469 GOT table with pic_offset_table_rtx base. We can't
9470 just refuse it easily, since it gets matched by
9471 "addsi3" pattern, that later gets split to lea in the
9472 case output register differs from input. While this
9473 can be handled by separate addsi pattern for this case
9474 that never results in lea, this seems to be easier and
9475 correct fix for crash to disable this test. */
9477 else if (GET_CODE (disp) != LABEL_REF
9478 && !CONST_INT_P (disp)
9479 && (GET_CODE (disp) != CONST
9480 || !legitimate_constant_p (disp))
9481 && (GET_CODE (disp) != SYMBOL_REF
9482 || !legitimate_constant_p (disp)))
9484 reason = "displacement is not constant";
9485 goto report_error;
9487 else if (TARGET_64BIT
9488 && !x86_64_immediate_operand (disp, VOIDmode))
9490 reason = "displacement is out of range";
9491 goto report_error;
9495 /* Everything looks valid. */
9496 return TRUE;
9498 report_error:
9499 return FALSE;
9502 /* Return a unique alias set for the GOT. */
9504 static alias_set_type
9505 ix86_GOT_alias_set (void)
9507 static alias_set_type set = -1;
9508 if (set == -1)
9509 set = new_alias_set ();
9510 return set;
9513 /* Return a legitimate reference for ORIG (an address) using the
9514 register REG. If REG is 0, a new pseudo is generated.
9516 There are two types of references that must be handled:
9518 1. Global data references must load the address from the GOT, via
9519 the PIC reg. An insn is emitted to do this load, and the reg is
9520 returned.
9522 2. Static data references, constant pool addresses, and code labels
9523 compute the address as an offset from the GOT, whose base is in
9524 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9525 differentiate them from global data objects. The returned
9526 address is the PIC reg + an unspec constant.
9528 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9529 reg also appears in the address. */
9531 static rtx
9532 legitimize_pic_address (rtx orig, rtx reg)
9534 rtx addr = orig;
9535 rtx new_rtx = orig;
9536 rtx base;
9538 #if TARGET_MACHO
9539 if (TARGET_MACHO && !TARGET_64BIT)
9541 if (reg == 0)
9542 reg = gen_reg_rtx (Pmode);
9543 /* Use the generic Mach-O PIC machinery. */
9544 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9546 #endif
9548 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9549 new_rtx = addr;
9550 else if (TARGET_64BIT
9551 && ix86_cmodel != CM_SMALL_PIC
9552 && gotoff_operand (addr, Pmode))
9554 rtx tmpreg;
9555 /* This symbol may be referenced via a displacement from the PIC
9556 base address (@GOTOFF). */
9558 if (reload_in_progress)
9559 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9560 if (GET_CODE (addr) == CONST)
9561 addr = XEXP (addr, 0);
9562 if (GET_CODE (addr) == PLUS)
9564 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9565 UNSPEC_GOTOFF);
9566 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9568 else
9569 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9570 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9571 if (!reg)
9572 tmpreg = gen_reg_rtx (Pmode);
9573 else
9574 tmpreg = reg;
9575 emit_move_insn (tmpreg, new_rtx);
9577 if (reg != 0)
9579 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9580 tmpreg, 1, OPTAB_DIRECT);
9581 new_rtx = reg;
9583 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9585 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9587 /* This symbol may be referenced via a displacement from the PIC
9588 base address (@GOTOFF). */
9590 if (reload_in_progress)
9591 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9592 if (GET_CODE (addr) == CONST)
9593 addr = XEXP (addr, 0);
9594 if (GET_CODE (addr) == PLUS)
9596 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9597 UNSPEC_GOTOFF);
9598 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9600 else
9601 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9602 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9603 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9605 if (reg != 0)
9607 emit_move_insn (reg, new_rtx);
9608 new_rtx = reg;
9611 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9612 /* We can't use @GOTOFF for text labels on VxWorks;
9613 see gotoff_operand. */
9614 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9616 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9618 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9619 return legitimize_dllimport_symbol (addr, true);
9620 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9621 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9622 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9624 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9625 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9629 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9631 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9632 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9633 new_rtx = gen_const_mem (Pmode, new_rtx);
9634 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9636 if (reg == 0)
9637 reg = gen_reg_rtx (Pmode);
9638 /* Use directly gen_movsi, otherwise the address is loaded
9639 into register for CSE. We don't want to CSE this addresses,
9640 instead we CSE addresses from the GOT table, so skip this. */
9641 emit_insn (gen_movsi (reg, new_rtx));
9642 new_rtx = reg;
9644 else
9646 /* This symbol must be referenced via a load from the
9647 Global Offset Table (@GOT). */
9649 if (reload_in_progress)
9650 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9651 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9652 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9653 if (TARGET_64BIT)
9654 new_rtx = force_reg (Pmode, new_rtx);
9655 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9656 new_rtx = gen_const_mem (Pmode, new_rtx);
9657 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9659 if (reg == 0)
9660 reg = gen_reg_rtx (Pmode);
9661 emit_move_insn (reg, new_rtx);
9662 new_rtx = reg;
9665 else
9667 if (CONST_INT_P (addr)
9668 && !x86_64_immediate_operand (addr, VOIDmode))
9670 if (reg)
9672 emit_move_insn (reg, addr);
9673 new_rtx = reg;
9675 else
9676 new_rtx = force_reg (Pmode, addr);
9678 else if (GET_CODE (addr) == CONST)
9680 addr = XEXP (addr, 0);
9682 /* We must match stuff we generate before. Assume the only
9683 unspecs that can get here are ours. Not that we could do
9684 anything with them anyway.... */
9685 if (GET_CODE (addr) == UNSPEC
9686 || (GET_CODE (addr) == PLUS
9687 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9688 return orig;
9689 gcc_assert (GET_CODE (addr) == PLUS);
9691 if (GET_CODE (addr) == PLUS)
9693 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9695 /* Check first to see if this is a constant offset from a @GOTOFF
9696 symbol reference. */
9697 if (gotoff_operand (op0, Pmode)
9698 && CONST_INT_P (op1))
9700 if (!TARGET_64BIT)
9702 if (reload_in_progress)
9703 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9704 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9705 UNSPEC_GOTOFF);
9706 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9707 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9708 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9710 if (reg != 0)
9712 emit_move_insn (reg, new_rtx);
9713 new_rtx = reg;
9716 else
9718 if (INTVAL (op1) < -16*1024*1024
9719 || INTVAL (op1) >= 16*1024*1024)
9721 if (!x86_64_immediate_operand (op1, Pmode))
9722 op1 = force_reg (Pmode, op1);
9723 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9727 else
9729 base = legitimize_pic_address (XEXP (addr, 0), reg);
9730 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9731 base == reg ? NULL_RTX : reg);
9733 if (CONST_INT_P (new_rtx))
9734 new_rtx = plus_constant (base, INTVAL (new_rtx));
9735 else
9737 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9739 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9740 new_rtx = XEXP (new_rtx, 1);
9742 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9747 return new_rtx;
9750 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9752 static rtx
9753 get_thread_pointer (int to_reg)
9755 rtx tp, reg, insn;
9757 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9758 if (!to_reg)
9759 return tp;
9761 reg = gen_reg_rtx (Pmode);
9762 insn = gen_rtx_SET (VOIDmode, reg, tp);
9763 insn = emit_insn (insn);
9765 return reg;
9768 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9769 false if we expect this to be used for a memory address and true if
9770 we expect to load the address into a register. */
9772 static rtx
9773 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9775 rtx dest, base, off, pic, tp;
9776 int type;
9778 switch (model)
9780 case TLS_MODEL_GLOBAL_DYNAMIC:
9781 dest = gen_reg_rtx (Pmode);
9782 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9784 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9786 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9788 start_sequence ();
9789 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9790 insns = get_insns ();
9791 end_sequence ();
9793 RTL_CONST_CALL_P (insns) = 1;
9794 emit_libcall_block (insns, dest, rax, x);
9796 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9797 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9798 else
9799 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9801 if (TARGET_GNU2_TLS)
9803 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9805 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9807 break;
9809 case TLS_MODEL_LOCAL_DYNAMIC:
9810 base = gen_reg_rtx (Pmode);
9811 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9813 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9815 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9817 start_sequence ();
9818 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9819 insns = get_insns ();
9820 end_sequence ();
9822 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9823 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9824 RTL_CONST_CALL_P (insns) = 1;
9825 emit_libcall_block (insns, base, rax, note);
9827 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9828 emit_insn (gen_tls_local_dynamic_base_64 (base));
9829 else
9830 emit_insn (gen_tls_local_dynamic_base_32 (base));
9832 if (TARGET_GNU2_TLS)
9834 rtx x = ix86_tls_module_base ();
9836 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9837 gen_rtx_MINUS (Pmode, x, tp));
9840 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9841 off = gen_rtx_CONST (Pmode, off);
9843 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9845 if (TARGET_GNU2_TLS)
9847 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9849 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9852 break;
9854 case TLS_MODEL_INITIAL_EXEC:
9855 if (TARGET_64BIT)
9857 pic = NULL;
9858 type = UNSPEC_GOTNTPOFF;
9860 else if (flag_pic)
9862 if (reload_in_progress)
9863 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9864 pic = pic_offset_table_rtx;
9865 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9867 else if (!TARGET_ANY_GNU_TLS)
9869 pic = gen_reg_rtx (Pmode);
9870 emit_insn (gen_set_got (pic));
9871 type = UNSPEC_GOTTPOFF;
9873 else
9875 pic = NULL;
9876 type = UNSPEC_INDNTPOFF;
9879 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9880 off = gen_rtx_CONST (Pmode, off);
9881 if (pic)
9882 off = gen_rtx_PLUS (Pmode, pic, off);
9883 off = gen_const_mem (Pmode, off);
9884 set_mem_alias_set (off, ix86_GOT_alias_set ());
9886 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9888 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9889 off = force_reg (Pmode, off);
9890 return gen_rtx_PLUS (Pmode, base, off);
9892 else
9894 base = get_thread_pointer (true);
9895 dest = gen_reg_rtx (Pmode);
9896 emit_insn (gen_subsi3 (dest, base, off));
9898 break;
9900 case TLS_MODEL_LOCAL_EXEC:
9901 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9902 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9903 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9904 off = gen_rtx_CONST (Pmode, off);
9906 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9908 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9909 return gen_rtx_PLUS (Pmode, base, off);
9911 else
9913 base = get_thread_pointer (true);
9914 dest = gen_reg_rtx (Pmode);
9915 emit_insn (gen_subsi3 (dest, base, off));
9917 break;
9919 default:
9920 gcc_unreachable ();
9923 return dest;
9926 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9927 to symbol DECL. */
9929 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9930 htab_t dllimport_map;
9932 static tree
9933 get_dllimport_decl (tree decl)
9935 struct tree_map *h, in;
9936 void **loc;
9937 const char *name;
9938 const char *prefix;
9939 size_t namelen, prefixlen;
9940 char *imp_name;
9941 tree to;
9942 rtx rtl;
9944 if (!dllimport_map)
9945 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9947 in.hash = htab_hash_pointer (decl);
9948 in.base.from = decl;
9949 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9950 h = (struct tree_map *) *loc;
9951 if (h)
9952 return h->to;
9954 *loc = h = GGC_NEW (struct tree_map);
9955 h->hash = in.hash;
9956 h->base.from = decl;
9957 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9958 DECL_ARTIFICIAL (to) = 1;
9959 DECL_IGNORED_P (to) = 1;
9960 DECL_EXTERNAL (to) = 1;
9961 TREE_READONLY (to) = 1;
9963 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9964 name = targetm.strip_name_encoding (name);
9965 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9966 ? "*__imp_" : "*__imp__";
9967 namelen = strlen (name);
9968 prefixlen = strlen (prefix);
9969 imp_name = (char *) alloca (namelen + prefixlen + 1);
9970 memcpy (imp_name, prefix, prefixlen);
9971 memcpy (imp_name + prefixlen, name, namelen + 1);
9973 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9974 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9975 SET_SYMBOL_REF_DECL (rtl, to);
9976 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9978 rtl = gen_const_mem (Pmode, rtl);
9979 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9981 SET_DECL_RTL (to, rtl);
9982 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9984 return to;
9987 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9988 true if we require the result be a register. */
9990 static rtx
9991 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9993 tree imp_decl;
9994 rtx x;
9996 gcc_assert (SYMBOL_REF_DECL (symbol));
9997 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9999 x = DECL_RTL (imp_decl);
10000 if (want_reg)
10001 x = force_reg (Pmode, x);
10002 return x;
10005 /* Try machine-dependent ways of modifying an illegitimate address
10006 to be legitimate. If we find one, return the new, valid address.
10007 This macro is used in only one place: `memory_address' in explow.c.
10009 OLDX is the address as it was before break_out_memory_refs was called.
10010 In some cases it is useful to look at this to decide what needs to be done.
10012 MODE and WIN are passed so that this macro can use
10013 GO_IF_LEGITIMATE_ADDRESS.
10015 It is always safe for this macro to do nothing. It exists to recognize
10016 opportunities to optimize the output.
10018 For the 80386, we handle X+REG by loading X into a register R and
10019 using R+REG. R will go in a general reg and indexing will be used.
10020 However, if REG is a broken-out memory address or multiplication,
10021 nothing needs to be done because REG can certainly go in a general reg.
10023 When -fpic is used, special handling is needed for symbolic references.
10024 See comments by legitimize_pic_address in i386.c for details. */
10027 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
10029 int changed = 0;
10030 unsigned log;
10032 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10033 if (log)
10034 return legitimize_tls_address (x, (enum tls_model) log, false);
10035 if (GET_CODE (x) == CONST
10036 && GET_CODE (XEXP (x, 0)) == PLUS
10037 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10038 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10040 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10041 (enum tls_model) log, false);
10042 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10045 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10047 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10048 return legitimize_dllimport_symbol (x, true);
10049 if (GET_CODE (x) == CONST
10050 && GET_CODE (XEXP (x, 0)) == PLUS
10051 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10052 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10054 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10055 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10059 if (flag_pic && SYMBOLIC_CONST (x))
10060 return legitimize_pic_address (x, 0);
10062 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10063 if (GET_CODE (x) == ASHIFT
10064 && CONST_INT_P (XEXP (x, 1))
10065 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10067 changed = 1;
10068 log = INTVAL (XEXP (x, 1));
10069 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10070 GEN_INT (1 << log));
10073 if (GET_CODE (x) == PLUS)
10075 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10077 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10078 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10079 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10081 changed = 1;
10082 log = INTVAL (XEXP (XEXP (x, 0), 1));
10083 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10084 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10085 GEN_INT (1 << log));
10088 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10089 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10090 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10092 changed = 1;
10093 log = INTVAL (XEXP (XEXP (x, 1), 1));
10094 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10095 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10096 GEN_INT (1 << log));
10099 /* Put multiply first if it isn't already. */
10100 if (GET_CODE (XEXP (x, 1)) == MULT)
10102 rtx tmp = XEXP (x, 0);
10103 XEXP (x, 0) = XEXP (x, 1);
10104 XEXP (x, 1) = tmp;
10105 changed = 1;
10108 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10109 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10110 created by virtual register instantiation, register elimination, and
10111 similar optimizations. */
10112 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10114 changed = 1;
10115 x = gen_rtx_PLUS (Pmode,
10116 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10117 XEXP (XEXP (x, 1), 0)),
10118 XEXP (XEXP (x, 1), 1));
10121 /* Canonicalize
10122 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10123 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10124 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10125 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10126 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10127 && CONSTANT_P (XEXP (x, 1)))
10129 rtx constant;
10130 rtx other = NULL_RTX;
10132 if (CONST_INT_P (XEXP (x, 1)))
10134 constant = XEXP (x, 1);
10135 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10137 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10139 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10140 other = XEXP (x, 1);
10142 else
10143 constant = 0;
10145 if (constant)
10147 changed = 1;
10148 x = gen_rtx_PLUS (Pmode,
10149 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10150 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10151 plus_constant (other, INTVAL (constant)));
10155 if (changed && legitimate_address_p (mode, x, FALSE))
10156 return x;
10158 if (GET_CODE (XEXP (x, 0)) == MULT)
10160 changed = 1;
10161 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10164 if (GET_CODE (XEXP (x, 1)) == MULT)
10166 changed = 1;
10167 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10170 if (changed
10171 && REG_P (XEXP (x, 1))
10172 && REG_P (XEXP (x, 0)))
10173 return x;
10175 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10177 changed = 1;
10178 x = legitimize_pic_address (x, 0);
10181 if (changed && legitimate_address_p (mode, x, FALSE))
10182 return x;
10184 if (REG_P (XEXP (x, 0)))
10186 rtx temp = gen_reg_rtx (Pmode);
10187 rtx val = force_operand (XEXP (x, 1), temp);
10188 if (val != temp)
10189 emit_move_insn (temp, val);
10191 XEXP (x, 1) = temp;
10192 return x;
10195 else if (REG_P (XEXP (x, 1)))
10197 rtx temp = gen_reg_rtx (Pmode);
10198 rtx val = force_operand (XEXP (x, 0), temp);
10199 if (val != temp)
10200 emit_move_insn (temp, val);
10202 XEXP (x, 0) = temp;
10203 return x;
10207 return x;
10210 /* Print an integer constant expression in assembler syntax. Addition
10211 and subtraction are the only arithmetic that may appear in these
10212 expressions. FILE is the stdio stream to write to, X is the rtx, and
10213 CODE is the operand print code from the output string. */
10215 static void
10216 output_pic_addr_const (FILE *file, rtx x, int code)
10218 char buf[256];
10220 switch (GET_CODE (x))
10222 case PC:
10223 gcc_assert (flag_pic);
10224 putc ('.', file);
10225 break;
10227 case SYMBOL_REF:
10228 if (! TARGET_MACHO || TARGET_64BIT)
10229 output_addr_const (file, x);
10230 else
10232 const char *name = XSTR (x, 0);
10234 /* Mark the decl as referenced so that cgraph will
10235 output the function. */
10236 if (SYMBOL_REF_DECL (x))
10237 mark_decl_referenced (SYMBOL_REF_DECL (x));
10239 #if TARGET_MACHO
10240 if (MACHOPIC_INDIRECT
10241 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10242 name = machopic_indirection_name (x, /*stub_p=*/true);
10243 #endif
10244 assemble_name (file, name);
10246 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10247 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10248 fputs ("@PLT", file);
10249 break;
10251 case LABEL_REF:
10252 x = XEXP (x, 0);
10253 /* FALLTHRU */
10254 case CODE_LABEL:
10255 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10256 assemble_name (asm_out_file, buf);
10257 break;
10259 case CONST_INT:
10260 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10261 break;
10263 case CONST:
10264 /* This used to output parentheses around the expression,
10265 but that does not work on the 386 (either ATT or BSD assembler). */
10266 output_pic_addr_const (file, XEXP (x, 0), code);
10267 break;
10269 case CONST_DOUBLE:
10270 if (GET_MODE (x) == VOIDmode)
10272 /* We can use %d if the number is <32 bits and positive. */
10273 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10274 fprintf (file, "0x%lx%08lx",
10275 (unsigned long) CONST_DOUBLE_HIGH (x),
10276 (unsigned long) CONST_DOUBLE_LOW (x));
10277 else
10278 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10280 else
10281 /* We can't handle floating point constants;
10282 PRINT_OPERAND must handle them. */
10283 output_operand_lossage ("floating constant misused");
10284 break;
10286 case PLUS:
10287 /* Some assemblers need integer constants to appear first. */
10288 if (CONST_INT_P (XEXP (x, 0)))
10290 output_pic_addr_const (file, XEXP (x, 0), code);
10291 putc ('+', file);
10292 output_pic_addr_const (file, XEXP (x, 1), code);
10294 else
10296 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10297 output_pic_addr_const (file, XEXP (x, 1), code);
10298 putc ('+', file);
10299 output_pic_addr_const (file, XEXP (x, 0), code);
10301 break;
10303 case MINUS:
10304 if (!TARGET_MACHO)
10305 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10306 output_pic_addr_const (file, XEXP (x, 0), code);
10307 putc ('-', file);
10308 output_pic_addr_const (file, XEXP (x, 1), code);
10309 if (!TARGET_MACHO)
10310 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10311 break;
10313 case UNSPEC:
10314 gcc_assert (XVECLEN (x, 0) == 1);
10315 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10316 switch (XINT (x, 1))
10318 case UNSPEC_GOT:
10319 fputs ("@GOT", file);
10320 break;
10321 case UNSPEC_GOTOFF:
10322 fputs ("@GOTOFF", file);
10323 break;
10324 case UNSPEC_PLTOFF:
10325 fputs ("@PLTOFF", file);
10326 break;
10327 case UNSPEC_GOTPCREL:
10328 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10329 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10330 break;
10331 case UNSPEC_GOTTPOFF:
10332 /* FIXME: This might be @TPOFF in Sun ld too. */
10333 fputs ("@GOTTPOFF", file);
10334 break;
10335 case UNSPEC_TPOFF:
10336 fputs ("@TPOFF", file);
10337 break;
10338 case UNSPEC_NTPOFF:
10339 if (TARGET_64BIT)
10340 fputs ("@TPOFF", file);
10341 else
10342 fputs ("@NTPOFF", file);
10343 break;
10344 case UNSPEC_DTPOFF:
10345 fputs ("@DTPOFF", file);
10346 break;
10347 case UNSPEC_GOTNTPOFF:
10348 if (TARGET_64BIT)
10349 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10350 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10351 else
10352 fputs ("@GOTNTPOFF", file);
10353 break;
10354 case UNSPEC_INDNTPOFF:
10355 fputs ("@INDNTPOFF", file);
10356 break;
10357 #if TARGET_MACHO
10358 case UNSPEC_MACHOPIC_OFFSET:
10359 putc ('-', file);
10360 machopic_output_function_base_name (file);
10361 break;
10362 #endif
10363 default:
10364 output_operand_lossage ("invalid UNSPEC as operand");
10365 break;
10367 break;
10369 default:
10370 output_operand_lossage ("invalid expression as operand");
10374 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10375 We need to emit DTP-relative relocations. */
10377 static void ATTRIBUTE_UNUSED
10378 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10380 fputs (ASM_LONG, file);
10381 output_addr_const (file, x);
10382 fputs ("@DTPOFF", file);
10383 switch (size)
10385 case 4:
10386 break;
10387 case 8:
10388 fputs (", 0", file);
10389 break;
10390 default:
10391 gcc_unreachable ();
10395 /* Return true if X is a representation of the PIC register. This copes
10396 with calls from ix86_find_base_term, where the register might have
10397 been replaced by a cselib value. */
10399 static bool
10400 ix86_pic_register_p (rtx x)
10402 if (GET_CODE (x) == VALUE)
10403 return (pic_offset_table_rtx
10404 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10405 else
10406 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10409 /* In the name of slightly smaller debug output, and to cater to
10410 general assembler lossage, recognize PIC+GOTOFF and turn it back
10411 into a direct symbol reference.
10413 On Darwin, this is necessary to avoid a crash, because Darwin
10414 has a different PIC label for each routine but the DWARF debugging
10415 information is not associated with any particular routine, so it's
10416 necessary to remove references to the PIC label from RTL stored by
10417 the DWARF output code. */
10419 static rtx
10420 ix86_delegitimize_address (rtx orig_x)
10422 rtx x = orig_x;
10423 /* reg_addend is NULL or a multiple of some register. */
10424 rtx reg_addend = NULL_RTX;
10425 /* const_addend is NULL or a const_int. */
10426 rtx const_addend = NULL_RTX;
10427 /* This is the result, or NULL. */
10428 rtx result = NULL_RTX;
10430 if (MEM_P (x))
10431 x = XEXP (x, 0);
10433 if (TARGET_64BIT)
10435 if (GET_CODE (x) != CONST
10436 || GET_CODE (XEXP (x, 0)) != UNSPEC
10437 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10438 || !MEM_P (orig_x))
10439 return orig_x;
10440 return XVECEXP (XEXP (x, 0), 0, 0);
10443 if (GET_CODE (x) != PLUS
10444 || GET_CODE (XEXP (x, 1)) != CONST)
10445 return orig_x;
10447 if (ix86_pic_register_p (XEXP (x, 0)))
10448 /* %ebx + GOT/GOTOFF */
10450 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10452 /* %ebx + %reg * scale + GOT/GOTOFF */
10453 reg_addend = XEXP (x, 0);
10454 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10455 reg_addend = XEXP (reg_addend, 1);
10456 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10457 reg_addend = XEXP (reg_addend, 0);
10458 else
10459 return orig_x;
10460 if (!REG_P (reg_addend)
10461 && GET_CODE (reg_addend) != MULT
10462 && GET_CODE (reg_addend) != ASHIFT)
10463 return orig_x;
10465 else
10466 return orig_x;
10468 x = XEXP (XEXP (x, 1), 0);
10469 if (GET_CODE (x) == PLUS
10470 && CONST_INT_P (XEXP (x, 1)))
10472 const_addend = XEXP (x, 1);
10473 x = XEXP (x, 0);
10476 if (GET_CODE (x) == UNSPEC
10477 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10478 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10479 result = XVECEXP (x, 0, 0);
10481 if (TARGET_MACHO && darwin_local_data_pic (x)
10482 && !MEM_P (orig_x))
10483 result = XVECEXP (x, 0, 0);
10485 if (! result)
10486 return orig_x;
10488 if (const_addend)
10489 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10490 if (reg_addend)
10491 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10492 return result;
10495 /* If X is a machine specific address (i.e. a symbol or label being
10496 referenced as a displacement from the GOT implemented using an
10497 UNSPEC), then return the base term. Otherwise return X. */
10500 ix86_find_base_term (rtx x)
10502 rtx term;
10504 if (TARGET_64BIT)
10506 if (GET_CODE (x) != CONST)
10507 return x;
10508 term = XEXP (x, 0);
10509 if (GET_CODE (term) == PLUS
10510 && (CONST_INT_P (XEXP (term, 1))
10511 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10512 term = XEXP (term, 0);
10513 if (GET_CODE (term) != UNSPEC
10514 || XINT (term, 1) != UNSPEC_GOTPCREL)
10515 return x;
10517 return XVECEXP (term, 0, 0);
10520 return ix86_delegitimize_address (x);
10523 static void
10524 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10525 int fp, FILE *file)
10527 const char *suffix;
10529 if (mode == CCFPmode || mode == CCFPUmode)
10531 enum rtx_code second_code, bypass_code;
10532 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10533 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10534 code = ix86_fp_compare_code_to_integer (code);
10535 mode = CCmode;
10537 if (reverse)
10538 code = reverse_condition (code);
10540 switch (code)
10542 case EQ:
10543 switch (mode)
10545 case CCAmode:
10546 suffix = "a";
10547 break;
10549 case CCCmode:
10550 suffix = "c";
10551 break;
10553 case CCOmode:
10554 suffix = "o";
10555 break;
10557 case CCSmode:
10558 suffix = "s";
10559 break;
10561 default:
10562 suffix = "e";
10564 break;
10565 case NE:
10566 switch (mode)
10568 case CCAmode:
10569 suffix = "na";
10570 break;
10572 case CCCmode:
10573 suffix = "nc";
10574 break;
10576 case CCOmode:
10577 suffix = "no";
10578 break;
10580 case CCSmode:
10581 suffix = "ns";
10582 break;
10584 default:
10585 suffix = "ne";
10587 break;
10588 case GT:
10589 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10590 suffix = "g";
10591 break;
10592 case GTU:
10593 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10594 Those same assemblers have the same but opposite lossage on cmov. */
10595 if (mode == CCmode)
10596 suffix = fp ? "nbe" : "a";
10597 else if (mode == CCCmode)
10598 suffix = "b";
10599 else
10600 gcc_unreachable ();
10601 break;
10602 case LT:
10603 switch (mode)
10605 case CCNOmode:
10606 case CCGOCmode:
10607 suffix = "s";
10608 break;
10610 case CCmode:
10611 case CCGCmode:
10612 suffix = "l";
10613 break;
10615 default:
10616 gcc_unreachable ();
10618 break;
10619 case LTU:
10620 gcc_assert (mode == CCmode || mode == CCCmode);
10621 suffix = "b";
10622 break;
10623 case GE:
10624 switch (mode)
10626 case CCNOmode:
10627 case CCGOCmode:
10628 suffix = "ns";
10629 break;
10631 case CCmode:
10632 case CCGCmode:
10633 suffix = "ge";
10634 break;
10636 default:
10637 gcc_unreachable ();
10639 break;
10640 case GEU:
10641 /* ??? As above. */
10642 gcc_assert (mode == CCmode || mode == CCCmode);
10643 suffix = fp ? "nb" : "ae";
10644 break;
10645 case LE:
10646 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10647 suffix = "le";
10648 break;
10649 case LEU:
10650 /* ??? As above. */
10651 if (mode == CCmode)
10652 suffix = "be";
10653 else if (mode == CCCmode)
10654 suffix = fp ? "nb" : "ae";
10655 else
10656 gcc_unreachable ();
10657 break;
10658 case UNORDERED:
10659 suffix = fp ? "u" : "p";
10660 break;
10661 case ORDERED:
10662 suffix = fp ? "nu" : "np";
10663 break;
10664 default:
10665 gcc_unreachable ();
10667 fputs (suffix, file);
10670 /* Print the name of register X to FILE based on its machine mode and number.
10671 If CODE is 'w', pretend the mode is HImode.
10672 If CODE is 'b', pretend the mode is QImode.
10673 If CODE is 'k', pretend the mode is SImode.
10674 If CODE is 'q', pretend the mode is DImode.
10675 If CODE is 'x', pretend the mode is V4SFmode.
10676 If CODE is 't', pretend the mode is V8SFmode.
10677 If CODE is 'h', pretend the reg is the 'high' byte register.
10678 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10679 If CODE is 'd', duplicate the operand for AVX instruction.
10682 void
10683 print_reg (rtx x, int code, FILE *file)
10685 const char *reg;
10686 bool duplicated = code == 'd' && TARGET_AVX;
10688 gcc_assert (x == pc_rtx
10689 || (REGNO (x) != ARG_POINTER_REGNUM
10690 && REGNO (x) != FRAME_POINTER_REGNUM
10691 && REGNO (x) != FLAGS_REG
10692 && REGNO (x) != FPSR_REG
10693 && REGNO (x) != FPCR_REG));
10695 if (ASSEMBLER_DIALECT == ASM_ATT)
10696 putc ('%', file);
10698 if (x == pc_rtx)
10700 gcc_assert (TARGET_64BIT);
10701 fputs ("rip", file);
10702 return;
10705 if (code == 'w' || MMX_REG_P (x))
10706 code = 2;
10707 else if (code == 'b')
10708 code = 1;
10709 else if (code == 'k')
10710 code = 4;
10711 else if (code == 'q')
10712 code = 8;
10713 else if (code == 'y')
10714 code = 3;
10715 else if (code == 'h')
10716 code = 0;
10717 else if (code == 'x')
10718 code = 16;
10719 else if (code == 't')
10720 code = 32;
10721 else
10722 code = GET_MODE_SIZE (GET_MODE (x));
10724 /* Irritatingly, AMD extended registers use different naming convention
10725 from the normal registers. */
10726 if (REX_INT_REG_P (x))
10728 gcc_assert (TARGET_64BIT);
10729 switch (code)
10731 case 0:
10732 error ("extended registers have no high halves");
10733 break;
10734 case 1:
10735 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10736 break;
10737 case 2:
10738 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10739 break;
10740 case 4:
10741 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10742 break;
10743 case 8:
10744 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10745 break;
10746 default:
10747 error ("unsupported operand size for extended register");
10748 break;
10750 return;
10753 reg = NULL;
10754 switch (code)
10756 case 3:
10757 if (STACK_TOP_P (x))
10759 reg = "st(0)";
10760 break;
10762 /* FALLTHRU */
10763 case 8:
10764 case 4:
10765 case 12:
10766 if (! ANY_FP_REG_P (x))
10767 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10768 /* FALLTHRU */
10769 case 16:
10770 case 2:
10771 normal:
10772 reg = hi_reg_name[REGNO (x)];
10773 break;
10774 case 1:
10775 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10776 goto normal;
10777 reg = qi_reg_name[REGNO (x)];
10778 break;
10779 case 0:
10780 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10781 goto normal;
10782 reg = qi_high_reg_name[REGNO (x)];
10783 break;
10784 case 32:
10785 if (SSE_REG_P (x))
10787 gcc_assert (!duplicated);
10788 putc ('y', file);
10789 fputs (hi_reg_name[REGNO (x)] + 1, file);
10790 return;
10792 break;
10793 default:
10794 gcc_unreachable ();
10797 fputs (reg, file);
10798 if (duplicated)
10800 if (ASSEMBLER_DIALECT == ASM_ATT)
10801 fprintf (file, ", %%%s", reg);
10802 else
10803 fprintf (file, ", %s", reg);
10807 /* Locate some local-dynamic symbol still in use by this function
10808 so that we can print its name in some tls_local_dynamic_base
10809 pattern. */
10811 static int
10812 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10814 rtx x = *px;
10816 if (GET_CODE (x) == SYMBOL_REF
10817 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10819 cfun->machine->some_ld_name = XSTR (x, 0);
10820 return 1;
10823 return 0;
10826 static const char *
10827 get_some_local_dynamic_name (void)
10829 rtx insn;
10831 if (cfun->machine->some_ld_name)
10832 return cfun->machine->some_ld_name;
10834 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10835 if (INSN_P (insn)
10836 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10837 return cfun->machine->some_ld_name;
10839 gcc_unreachable ();
10842 /* Meaning of CODE:
10843 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10844 C -- print opcode suffix for set/cmov insn.
10845 c -- like C, but print reversed condition
10846 E,e -- likewise, but for compare-and-branch fused insn.
10847 F,f -- likewise, but for floating-point.
10848 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10849 otherwise nothing
10850 R -- print the prefix for register names.
10851 z -- print the opcode suffix for the size of the current operand.
10852 * -- print a star (in certain assembler syntax)
10853 A -- print an absolute memory reference.
10854 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10855 s -- print a shift double count, followed by the assemblers argument
10856 delimiter.
10857 b -- print the QImode name of the register for the indicated operand.
10858 %b0 would print %al if operands[0] is reg 0.
10859 w -- likewise, print the HImode name of the register.
10860 k -- likewise, print the SImode name of the register.
10861 q -- likewise, print the DImode name of the register.
10862 x -- likewise, print the V4SFmode name of the register.
10863 t -- likewise, print the V8SFmode name of the register.
10864 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10865 y -- print "st(0)" instead of "st" as a register.
10866 d -- print duplicated register operand for AVX instruction.
10867 D -- print condition for SSE cmp instruction.
10868 P -- if PIC, print an @PLT suffix.
10869 X -- don't print any sort of PIC '@' suffix for a symbol.
10870 & -- print some in-use local-dynamic symbol name.
10871 H -- print a memory address offset by 8; used for sse high-parts
10872 Y -- print condition for SSE5 com* instruction.
10873 + -- print a branch hint as 'cs' or 'ds' prefix
10874 ; -- print a semicolon (after prefixes due to bug in older gas).
10877 void
10878 print_operand (FILE *file, rtx x, int code)
10880 if (code)
10882 switch (code)
10884 case '*':
10885 if (ASSEMBLER_DIALECT == ASM_ATT)
10886 putc ('*', file);
10887 return;
10889 case '&':
10890 assemble_name (file, get_some_local_dynamic_name ());
10891 return;
10893 case 'A':
10894 switch (ASSEMBLER_DIALECT)
10896 case ASM_ATT:
10897 putc ('*', file);
10898 break;
10900 case ASM_INTEL:
10901 /* Intel syntax. For absolute addresses, registers should not
10902 be surrounded by braces. */
10903 if (!REG_P (x))
10905 putc ('[', file);
10906 PRINT_OPERAND (file, x, 0);
10907 putc (']', file);
10908 return;
10910 break;
10912 default:
10913 gcc_unreachable ();
10916 PRINT_OPERAND (file, x, 0);
10917 return;
10920 case 'L':
10921 if (ASSEMBLER_DIALECT == ASM_ATT)
10922 putc ('l', file);
10923 return;
10925 case 'W':
10926 if (ASSEMBLER_DIALECT == ASM_ATT)
10927 putc ('w', file);
10928 return;
10930 case 'B':
10931 if (ASSEMBLER_DIALECT == ASM_ATT)
10932 putc ('b', file);
10933 return;
10935 case 'Q':
10936 if (ASSEMBLER_DIALECT == ASM_ATT)
10937 putc ('l', file);
10938 return;
10940 case 'S':
10941 if (ASSEMBLER_DIALECT == ASM_ATT)
10942 putc ('s', file);
10943 return;
10945 case 'T':
10946 if (ASSEMBLER_DIALECT == ASM_ATT)
10947 putc ('t', file);
10948 return;
10950 case 'z':
10951 /* 387 opcodes don't get size suffixes if the operands are
10952 registers. */
10953 if (STACK_REG_P (x))
10954 return;
10956 /* Likewise if using Intel opcodes. */
10957 if (ASSEMBLER_DIALECT == ASM_INTEL)
10958 return;
10960 /* This is the size of op from size of operand. */
10961 switch (GET_MODE_SIZE (GET_MODE (x)))
10963 case 1:
10964 putc ('b', file);
10965 return;
10967 case 2:
10968 if (MEM_P (x))
10970 #ifdef HAVE_GAS_FILDS_FISTS
10971 putc ('s', file);
10972 #endif
10973 return;
10975 else
10976 putc ('w', file);
10977 return;
10979 case 4:
10980 if (GET_MODE (x) == SFmode)
10982 putc ('s', file);
10983 return;
10985 else
10986 putc ('l', file);
10987 return;
10989 case 12:
10990 case 16:
10991 putc ('t', file);
10992 return;
10994 case 8:
10995 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10997 if (MEM_P (x))
10999 #ifdef GAS_MNEMONICS
11000 putc ('q', file);
11001 #else
11002 putc ('l', file);
11003 putc ('l', file);
11004 #endif
11006 else
11007 putc ('q', file);
11009 else
11010 putc ('l', file);
11011 return;
11013 default:
11014 gcc_unreachable ();
11017 case 'd':
11018 case 'b':
11019 case 'w':
11020 case 'k':
11021 case 'q':
11022 case 'h':
11023 case 't':
11024 case 'y':
11025 case 'x':
11026 case 'X':
11027 case 'P':
11028 break;
11030 case 's':
11031 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11033 PRINT_OPERAND (file, x, 0);
11034 fputs (", ", file);
11036 return;
11038 case 'D':
11039 /* Little bit of braindamage here. The SSE compare instructions
11040 does use completely different names for the comparisons that the
11041 fp conditional moves. */
11042 if (TARGET_AVX)
11044 switch (GET_CODE (x))
11046 case EQ:
11047 fputs ("eq", file);
11048 break;
11049 case UNEQ:
11050 fputs ("eq_us", file);
11051 break;
11052 case LT:
11053 fputs ("lt", file);
11054 break;
11055 case UNLT:
11056 fputs ("nge", file);
11057 break;
11058 case LE:
11059 fputs ("le", file);
11060 break;
11061 case UNLE:
11062 fputs ("ngt", file);
11063 break;
11064 case UNORDERED:
11065 fputs ("unord", file);
11066 break;
11067 case NE:
11068 fputs ("neq", file);
11069 break;
11070 case LTGT:
11071 fputs ("neq_oq", file);
11072 break;
11073 case GE:
11074 fputs ("ge", file);
11075 break;
11076 case UNGE:
11077 fputs ("nlt", file);
11078 break;
11079 case GT:
11080 fputs ("gt", file);
11081 break;
11082 case UNGT:
11083 fputs ("nle", file);
11084 break;
11085 case ORDERED:
11086 fputs ("ord", file);
11087 break;
11088 default:
11089 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11090 return;
11093 else
11095 switch (GET_CODE (x))
11097 case EQ:
11098 case UNEQ:
11099 fputs ("eq", file);
11100 break;
11101 case LT:
11102 case UNLT:
11103 fputs ("lt", file);
11104 break;
11105 case LE:
11106 case UNLE:
11107 fputs ("le", file);
11108 break;
11109 case UNORDERED:
11110 fputs ("unord", file);
11111 break;
11112 case NE:
11113 case LTGT:
11114 fputs ("neq", file);
11115 break;
11116 case UNGE:
11117 case GE:
11118 fputs ("nlt", file);
11119 break;
11120 case UNGT:
11121 case GT:
11122 fputs ("nle", file);
11123 break;
11124 case ORDERED:
11125 fputs ("ord", file);
11126 break;
11127 default:
11128 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11129 return;
11132 return;
11133 case 'O':
11134 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11135 if (ASSEMBLER_DIALECT == ASM_ATT)
11137 switch (GET_MODE (x))
11139 case HImode: putc ('w', file); break;
11140 case SImode:
11141 case SFmode: putc ('l', file); break;
11142 case DImode:
11143 case DFmode: putc ('q', file); break;
11144 default: gcc_unreachable ();
11146 putc ('.', file);
11148 #endif
11149 return;
11150 case 'C':
11151 if (!COMPARISON_P (x))
11153 output_operand_lossage ("operand is neither a constant nor a "
11154 "condition code, invalid operand code "
11155 "'C'");
11156 return;
11158 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11159 return;
11160 case 'F':
11161 if (!COMPARISON_P (x))
11163 output_operand_lossage ("operand is neither a constant nor a "
11164 "condition code, invalid operand code "
11165 "'F'");
11166 return;
11168 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11169 if (ASSEMBLER_DIALECT == ASM_ATT)
11170 putc ('.', file);
11171 #endif
11172 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11173 return;
11175 /* Like above, but reverse condition */
11176 case 'c':
11177 /* Check to see if argument to %c is really a constant
11178 and not a condition code which needs to be reversed. */
11179 if (!COMPARISON_P (x))
11181 output_operand_lossage ("operand is neither a constant nor a "
11182 "condition code, invalid operand "
11183 "code 'c'");
11184 return;
11186 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11187 return;
11188 case 'f':
11189 if (!COMPARISON_P (x))
11191 output_operand_lossage ("operand is neither a constant nor a "
11192 "condition code, invalid operand "
11193 "code 'f'");
11194 return;
11196 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11197 if (ASSEMBLER_DIALECT == ASM_ATT)
11198 putc ('.', file);
11199 #endif
11200 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11201 return;
11203 case 'E':
11204 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11205 return;
11207 case 'e':
11208 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11209 return;
11211 case 'H':
11212 /* It doesn't actually matter what mode we use here, as we're
11213 only going to use this for printing. */
11214 x = adjust_address_nv (x, DImode, 8);
11215 break;
11217 case '+':
11219 rtx x;
11221 if (!optimize
11222 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11223 return;
11225 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11226 if (x)
11228 int pred_val = INTVAL (XEXP (x, 0));
11230 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11231 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11233 int taken = pred_val > REG_BR_PROB_BASE / 2;
11234 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11236 /* Emit hints only in the case default branch prediction
11237 heuristics would fail. */
11238 if (taken != cputaken)
11240 /* We use 3e (DS) prefix for taken branches and
11241 2e (CS) prefix for not taken branches. */
11242 if (taken)
11243 fputs ("ds ; ", file);
11244 else
11245 fputs ("cs ; ", file);
11249 return;
11252 case 'Y':
11253 switch (GET_CODE (x))
11255 case NE:
11256 fputs ("neq", file);
11257 break;
11258 case EQ:
11259 fputs ("eq", file);
11260 break;
11261 case GE:
11262 case GEU:
11263 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11264 break;
11265 case GT:
11266 case GTU:
11267 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11268 break;
11269 case LE:
11270 case LEU:
11271 fputs ("le", file);
11272 break;
11273 case LT:
11274 case LTU:
11275 fputs ("lt", file);
11276 break;
11277 case UNORDERED:
11278 fputs ("unord", file);
11279 break;
11280 case ORDERED:
11281 fputs ("ord", file);
11282 break;
11283 case UNEQ:
11284 fputs ("ueq", file);
11285 break;
11286 case UNGE:
11287 fputs ("nlt", file);
11288 break;
11289 case UNGT:
11290 fputs ("nle", file);
11291 break;
11292 case UNLE:
11293 fputs ("ule", file);
11294 break;
11295 case UNLT:
11296 fputs ("ult", file);
11297 break;
11298 case LTGT:
11299 fputs ("une", file);
11300 break;
11301 default:
11302 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11303 return;
11305 return;
11307 case ';':
11308 #if TARGET_MACHO
11309 fputs (" ; ", file);
11310 #else
11311 fputc (' ', file);
11312 #endif
11313 return;
11315 default:
11316 output_operand_lossage ("invalid operand code '%c'", code);
11320 if (REG_P (x))
11321 print_reg (x, code, file);
11323 else if (MEM_P (x))
11325 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11326 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11327 && GET_MODE (x) != BLKmode)
11329 const char * size;
11330 switch (GET_MODE_SIZE (GET_MODE (x)))
11332 case 1: size = "BYTE"; break;
11333 case 2: size = "WORD"; break;
11334 case 4: size = "DWORD"; break;
11335 case 8: size = "QWORD"; break;
11336 case 12: size = "XWORD"; break;
11337 case 16:
11338 if (GET_MODE (x) == XFmode)
11339 size = "XWORD";
11340 else
11341 size = "XMMWORD";
11342 break;
11343 default:
11344 gcc_unreachable ();
11347 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11348 if (code == 'b')
11349 size = "BYTE";
11350 else if (code == 'w')
11351 size = "WORD";
11352 else if (code == 'k')
11353 size = "DWORD";
11355 fputs (size, file);
11356 fputs (" PTR ", file);
11359 x = XEXP (x, 0);
11360 /* Avoid (%rip) for call operands. */
11361 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11362 && !CONST_INT_P (x))
11363 output_addr_const (file, x);
11364 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11365 output_operand_lossage ("invalid constraints for operand");
11366 else
11367 output_address (x);
11370 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11372 REAL_VALUE_TYPE r;
11373 long l;
11375 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11376 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11378 if (ASSEMBLER_DIALECT == ASM_ATT)
11379 putc ('$', file);
11380 fprintf (file, "0x%08lx", (long unsigned int) l);
11383 /* These float cases don't actually occur as immediate operands. */
11384 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11386 char dstr[30];
11388 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11389 fprintf (file, "%s", dstr);
11392 else if (GET_CODE (x) == CONST_DOUBLE
11393 && GET_MODE (x) == XFmode)
11395 char dstr[30];
11397 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11398 fprintf (file, "%s", dstr);
11401 else
11403 /* We have patterns that allow zero sets of memory, for instance.
11404 In 64-bit mode, we should probably support all 8-byte vectors,
11405 since we can in fact encode that into an immediate. */
11406 if (GET_CODE (x) == CONST_VECTOR)
11408 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11409 x = const0_rtx;
11412 if (code != 'P')
11414 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11416 if (ASSEMBLER_DIALECT == ASM_ATT)
11417 putc ('$', file);
11419 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11420 || GET_CODE (x) == LABEL_REF)
11422 if (ASSEMBLER_DIALECT == ASM_ATT)
11423 putc ('$', file);
11424 else
11425 fputs ("OFFSET FLAT:", file);
11428 if (CONST_INT_P (x))
11429 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11430 else if (flag_pic)
11431 output_pic_addr_const (file, x, code);
11432 else
11433 output_addr_const (file, x);
11437 /* Print a memory operand whose address is ADDR. */
11439 void
11440 print_operand_address (FILE *file, rtx addr)
11442 struct ix86_address parts;
11443 rtx base, index, disp;
11444 int scale;
11445 int ok = ix86_decompose_address (addr, &parts);
11447 gcc_assert (ok);
11449 base = parts.base;
11450 index = parts.index;
11451 disp = parts.disp;
11452 scale = parts.scale;
11454 switch (parts.seg)
11456 case SEG_DEFAULT:
11457 break;
11458 case SEG_FS:
11459 case SEG_GS:
11460 if (ASSEMBLER_DIALECT == ASM_ATT)
11461 putc ('%', file);
11462 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11463 break;
11464 default:
11465 gcc_unreachable ();
11468 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11469 if (TARGET_64BIT && !base && !index)
11471 rtx symbol = disp;
11473 if (GET_CODE (disp) == CONST
11474 && GET_CODE (XEXP (disp, 0)) == PLUS
11475 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11476 symbol = XEXP (XEXP (disp, 0), 0);
11478 if (GET_CODE (symbol) == LABEL_REF
11479 || (GET_CODE (symbol) == SYMBOL_REF
11480 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11481 base = pc_rtx;
11483 if (!base && !index)
11485 /* Displacement only requires special attention. */
11487 if (CONST_INT_P (disp))
11489 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11490 fputs ("ds:", file);
11491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11493 else if (flag_pic)
11494 output_pic_addr_const (file, disp, 0);
11495 else
11496 output_addr_const (file, disp);
11498 else
11500 if (ASSEMBLER_DIALECT == ASM_ATT)
11502 if (disp)
11504 if (flag_pic)
11505 output_pic_addr_const (file, disp, 0);
11506 else if (GET_CODE (disp) == LABEL_REF)
11507 output_asm_label (disp);
11508 else
11509 output_addr_const (file, disp);
11512 putc ('(', file);
11513 if (base)
11514 print_reg (base, 0, file);
11515 if (index)
11517 putc (',', file);
11518 print_reg (index, 0, file);
11519 if (scale != 1)
11520 fprintf (file, ",%d", scale);
11522 putc (')', file);
11524 else
11526 rtx offset = NULL_RTX;
11528 if (disp)
11530 /* Pull out the offset of a symbol; print any symbol itself. */
11531 if (GET_CODE (disp) == CONST
11532 && GET_CODE (XEXP (disp, 0)) == PLUS
11533 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11535 offset = XEXP (XEXP (disp, 0), 1);
11536 disp = gen_rtx_CONST (VOIDmode,
11537 XEXP (XEXP (disp, 0), 0));
11540 if (flag_pic)
11541 output_pic_addr_const (file, disp, 0);
11542 else if (GET_CODE (disp) == LABEL_REF)
11543 output_asm_label (disp);
11544 else if (CONST_INT_P (disp))
11545 offset = disp;
11546 else
11547 output_addr_const (file, disp);
11550 putc ('[', file);
11551 if (base)
11553 print_reg (base, 0, file);
11554 if (offset)
11556 if (INTVAL (offset) >= 0)
11557 putc ('+', file);
11558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11561 else if (offset)
11562 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11563 else
11564 putc ('0', file);
11566 if (index)
11568 putc ('+', file);
11569 print_reg (index, 0, file);
11570 if (scale != 1)
11571 fprintf (file, "*%d", scale);
11573 putc (']', file);
11578 bool
11579 output_addr_const_extra (FILE *file, rtx x)
11581 rtx op;
11583 if (GET_CODE (x) != UNSPEC)
11584 return false;
11586 op = XVECEXP (x, 0, 0);
11587 switch (XINT (x, 1))
11589 case UNSPEC_GOTTPOFF:
11590 output_addr_const (file, op);
11591 /* FIXME: This might be @TPOFF in Sun ld. */
11592 fputs ("@GOTTPOFF", file);
11593 break;
11594 case UNSPEC_TPOFF:
11595 output_addr_const (file, op);
11596 fputs ("@TPOFF", file);
11597 break;
11598 case UNSPEC_NTPOFF:
11599 output_addr_const (file, op);
11600 if (TARGET_64BIT)
11601 fputs ("@TPOFF", file);
11602 else
11603 fputs ("@NTPOFF", file);
11604 break;
11605 case UNSPEC_DTPOFF:
11606 output_addr_const (file, op);
11607 fputs ("@DTPOFF", file);
11608 break;
11609 case UNSPEC_GOTNTPOFF:
11610 output_addr_const (file, op);
11611 if (TARGET_64BIT)
11612 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11613 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11614 else
11615 fputs ("@GOTNTPOFF", file);
11616 break;
11617 case UNSPEC_INDNTPOFF:
11618 output_addr_const (file, op);
11619 fputs ("@INDNTPOFF", file);
11620 break;
11621 #if TARGET_MACHO
11622 case UNSPEC_MACHOPIC_OFFSET:
11623 output_addr_const (file, op);
11624 putc ('-', file);
11625 machopic_output_function_base_name (file);
11626 break;
11627 #endif
11629 default:
11630 return false;
11633 return true;
11636 /* Split one or more DImode RTL references into pairs of SImode
11637 references. The RTL can be REG, offsettable MEM, integer constant, or
11638 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11639 split and "num" is its length. lo_half and hi_half are output arrays
11640 that parallel "operands". */
11642 void
11643 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11645 while (num--)
11647 rtx op = operands[num];
11649 /* simplify_subreg refuse to split volatile memory addresses,
11650 but we still have to handle it. */
11651 if (MEM_P (op))
11653 lo_half[num] = adjust_address (op, SImode, 0);
11654 hi_half[num] = adjust_address (op, SImode, 4);
11656 else
11658 lo_half[num] = simplify_gen_subreg (SImode, op,
11659 GET_MODE (op) == VOIDmode
11660 ? DImode : GET_MODE (op), 0);
11661 hi_half[num] = simplify_gen_subreg (SImode, op,
11662 GET_MODE (op) == VOIDmode
11663 ? DImode : GET_MODE (op), 4);
11667 /* Split one or more TImode RTL references into pairs of DImode
11668 references. The RTL can be REG, offsettable MEM, integer constant, or
11669 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11670 split and "num" is its length. lo_half and hi_half are output arrays
11671 that parallel "operands". */
11673 void
11674 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11676 while (num--)
11678 rtx op = operands[num];
11680 /* simplify_subreg refuse to split volatile memory addresses, but we
11681 still have to handle it. */
11682 if (MEM_P (op))
11684 lo_half[num] = adjust_address (op, DImode, 0);
11685 hi_half[num] = adjust_address (op, DImode, 8);
11687 else
11689 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11690 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11695 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11696 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11697 is the expression of the binary operation. The output may either be
11698 emitted here, or returned to the caller, like all output_* functions.
11700 There is no guarantee that the operands are the same mode, as they
11701 might be within FLOAT or FLOAT_EXTEND expressions. */
11703 #ifndef SYSV386_COMPAT
11704 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11705 wants to fix the assemblers because that causes incompatibility
11706 with gcc. No-one wants to fix gcc because that causes
11707 incompatibility with assemblers... You can use the option of
11708 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11709 #define SYSV386_COMPAT 1
11710 #endif
11712 const char *
11713 output_387_binary_op (rtx insn, rtx *operands)
11715 static char buf[40];
11716 const char *p;
11717 const char *ssep;
11718 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11720 #ifdef ENABLE_CHECKING
11721 /* Even if we do not want to check the inputs, this documents input
11722 constraints. Which helps in understanding the following code. */
11723 if (STACK_REG_P (operands[0])
11724 && ((REG_P (operands[1])
11725 && REGNO (operands[0]) == REGNO (operands[1])
11726 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11727 || (REG_P (operands[2])
11728 && REGNO (operands[0]) == REGNO (operands[2])
11729 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11730 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11731 ; /* ok */
11732 else
11733 gcc_assert (is_sse);
11734 #endif
11736 switch (GET_CODE (operands[3]))
11738 case PLUS:
11739 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11740 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11741 p = "fiadd";
11742 else
11743 p = "fadd";
11744 ssep = "vadd";
11745 break;
11747 case MINUS:
11748 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11749 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11750 p = "fisub";
11751 else
11752 p = "fsub";
11753 ssep = "vsub";
11754 break;
11756 case MULT:
11757 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11758 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11759 p = "fimul";
11760 else
11761 p = "fmul";
11762 ssep = "vmul";
11763 break;
11765 case DIV:
11766 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11767 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11768 p = "fidiv";
11769 else
11770 p = "fdiv";
11771 ssep = "vdiv";
11772 break;
11774 default:
11775 gcc_unreachable ();
11778 if (is_sse)
11780 if (TARGET_AVX)
11782 strcpy (buf, ssep);
11783 if (GET_MODE (operands[0]) == SFmode)
11784 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11785 else
11786 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11788 else
11790 strcpy (buf, ssep + 1);
11791 if (GET_MODE (operands[0]) == SFmode)
11792 strcat (buf, "ss\t{%2, %0|%0, %2}");
11793 else
11794 strcat (buf, "sd\t{%2, %0|%0, %2}");
11796 return buf;
11798 strcpy (buf, p);
11800 switch (GET_CODE (operands[3]))
11802 case MULT:
11803 case PLUS:
11804 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11806 rtx temp = operands[2];
11807 operands[2] = operands[1];
11808 operands[1] = temp;
11811 /* know operands[0] == operands[1]. */
11813 if (MEM_P (operands[2]))
11815 p = "%z2\t%2";
11816 break;
11819 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11821 if (STACK_TOP_P (operands[0]))
11822 /* How is it that we are storing to a dead operand[2]?
11823 Well, presumably operands[1] is dead too. We can't
11824 store the result to st(0) as st(0) gets popped on this
11825 instruction. Instead store to operands[2] (which I
11826 think has to be st(1)). st(1) will be popped later.
11827 gcc <= 2.8.1 didn't have this check and generated
11828 assembly code that the Unixware assembler rejected. */
11829 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11830 else
11831 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11832 break;
11835 if (STACK_TOP_P (operands[0]))
11836 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11837 else
11838 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11839 break;
11841 case MINUS:
11842 case DIV:
11843 if (MEM_P (operands[1]))
11845 p = "r%z1\t%1";
11846 break;
11849 if (MEM_P (operands[2]))
11851 p = "%z2\t%2";
11852 break;
11855 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11857 #if SYSV386_COMPAT
11858 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11859 derived assemblers, confusingly reverse the direction of
11860 the operation for fsub{r} and fdiv{r} when the
11861 destination register is not st(0). The Intel assembler
11862 doesn't have this brain damage. Read !SYSV386_COMPAT to
11863 figure out what the hardware really does. */
11864 if (STACK_TOP_P (operands[0]))
11865 p = "{p\t%0, %2|rp\t%2, %0}";
11866 else
11867 p = "{rp\t%2, %0|p\t%0, %2}";
11868 #else
11869 if (STACK_TOP_P (operands[0]))
11870 /* As above for fmul/fadd, we can't store to st(0). */
11871 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11872 else
11873 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11874 #endif
11875 break;
11878 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11880 #if SYSV386_COMPAT
11881 if (STACK_TOP_P (operands[0]))
11882 p = "{rp\t%0, %1|p\t%1, %0}";
11883 else
11884 p = "{p\t%1, %0|rp\t%0, %1}";
11885 #else
11886 if (STACK_TOP_P (operands[0]))
11887 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11888 else
11889 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11890 #endif
11891 break;
11894 if (STACK_TOP_P (operands[0]))
11896 if (STACK_TOP_P (operands[1]))
11897 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11898 else
11899 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11900 break;
11902 else if (STACK_TOP_P (operands[1]))
11904 #if SYSV386_COMPAT
11905 p = "{\t%1, %0|r\t%0, %1}";
11906 #else
11907 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11908 #endif
11910 else
11912 #if SYSV386_COMPAT
11913 p = "{r\t%2, %0|\t%0, %2}";
11914 #else
11915 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11916 #endif
11918 break;
11920 default:
11921 gcc_unreachable ();
11924 strcat (buf, p);
11925 return buf;
11928 /* Return needed mode for entity in optimize_mode_switching pass. */
11931 ix86_mode_needed (int entity, rtx insn)
11933 enum attr_i387_cw mode;
11935 /* The mode UNINITIALIZED is used to store control word after a
11936 function call or ASM pattern. The mode ANY specify that function
11937 has no requirements on the control word and make no changes in the
11938 bits we are interested in. */
11940 if (CALL_P (insn)
11941 || (NONJUMP_INSN_P (insn)
11942 && (asm_noperands (PATTERN (insn)) >= 0
11943 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11944 return I387_CW_UNINITIALIZED;
11946 if (recog_memoized (insn) < 0)
11947 return I387_CW_ANY;
11949 mode = get_attr_i387_cw (insn);
11951 switch (entity)
11953 case I387_TRUNC:
11954 if (mode == I387_CW_TRUNC)
11955 return mode;
11956 break;
11958 case I387_FLOOR:
11959 if (mode == I387_CW_FLOOR)
11960 return mode;
11961 break;
11963 case I387_CEIL:
11964 if (mode == I387_CW_CEIL)
11965 return mode;
11966 break;
11968 case I387_MASK_PM:
11969 if (mode == I387_CW_MASK_PM)
11970 return mode;
11971 break;
11973 default:
11974 gcc_unreachable ();
11977 return I387_CW_ANY;
11980 /* Output code to initialize control word copies used by trunc?f?i and
11981 rounding patterns. CURRENT_MODE is set to current control word,
11982 while NEW_MODE is set to new control word. */
11984 void
11985 emit_i387_cw_initialization (int mode)
11987 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11988 rtx new_mode;
11990 enum ix86_stack_slot slot;
11992 rtx reg = gen_reg_rtx (HImode);
11994 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11995 emit_move_insn (reg, copy_rtx (stored_mode));
11997 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11998 || optimize_function_for_size_p (cfun))
12000 switch (mode)
12002 case I387_CW_TRUNC:
12003 /* round toward zero (truncate) */
12004 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12005 slot = SLOT_CW_TRUNC;
12006 break;
12008 case I387_CW_FLOOR:
12009 /* round down toward -oo */
12010 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12011 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12012 slot = SLOT_CW_FLOOR;
12013 break;
12015 case I387_CW_CEIL:
12016 /* round up toward +oo */
12017 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12018 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12019 slot = SLOT_CW_CEIL;
12020 break;
12022 case I387_CW_MASK_PM:
12023 /* mask precision exception for nearbyint() */
12024 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12025 slot = SLOT_CW_MASK_PM;
12026 break;
12028 default:
12029 gcc_unreachable ();
12032 else
12034 switch (mode)
12036 case I387_CW_TRUNC:
12037 /* round toward zero (truncate) */
12038 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12039 slot = SLOT_CW_TRUNC;
12040 break;
12042 case I387_CW_FLOOR:
12043 /* round down toward -oo */
12044 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12045 slot = SLOT_CW_FLOOR;
12046 break;
12048 case I387_CW_CEIL:
12049 /* round up toward +oo */
12050 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12051 slot = SLOT_CW_CEIL;
12052 break;
12054 case I387_CW_MASK_PM:
12055 /* mask precision exception for nearbyint() */
12056 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12057 slot = SLOT_CW_MASK_PM;
12058 break;
12060 default:
12061 gcc_unreachable ();
12065 gcc_assert (slot < MAX_386_STACK_LOCALS);
12067 new_mode = assign_386_stack_local (HImode, slot);
12068 emit_move_insn (new_mode, reg);
12071 /* Output code for INSN to convert a float to a signed int. OPERANDS
12072 are the insn operands. The output may be [HSD]Imode and the input
12073 operand may be [SDX]Fmode. */
12075 const char *
12076 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12078 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12079 int dimode_p = GET_MODE (operands[0]) == DImode;
12080 int round_mode = get_attr_i387_cw (insn);
12082 /* Jump through a hoop or two for DImode, since the hardware has no
12083 non-popping instruction. We used to do this a different way, but
12084 that was somewhat fragile and broke with post-reload splitters. */
12085 if ((dimode_p || fisttp) && !stack_top_dies)
12086 output_asm_insn ("fld\t%y1", operands);
12088 gcc_assert (STACK_TOP_P (operands[1]));
12089 gcc_assert (MEM_P (operands[0]));
12090 gcc_assert (GET_MODE (operands[1]) != TFmode);
12092 if (fisttp)
12093 output_asm_insn ("fisttp%z0\t%0", operands);
12094 else
12096 if (round_mode != I387_CW_ANY)
12097 output_asm_insn ("fldcw\t%3", operands);
12098 if (stack_top_dies || dimode_p)
12099 output_asm_insn ("fistp%z0\t%0", operands);
12100 else
12101 output_asm_insn ("fist%z0\t%0", operands);
12102 if (round_mode != I387_CW_ANY)
12103 output_asm_insn ("fldcw\t%2", operands);
12106 return "";
12109 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12110 have the values zero or one, indicates the ffreep insn's operand
12111 from the OPERANDS array. */
12113 static const char *
12114 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12116 if (TARGET_USE_FFREEP)
12117 #if HAVE_AS_IX86_FFREEP
12118 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12119 #else
12121 static char retval[] = ".word\t0xc_df";
12122 int regno = REGNO (operands[opno]);
12124 gcc_assert (FP_REGNO_P (regno));
12126 retval[9] = '0' + (regno - FIRST_STACK_REG);
12127 return retval;
12129 #endif
12131 return opno ? "fstp\t%y1" : "fstp\t%y0";
12135 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12136 should be used. UNORDERED_P is true when fucom should be used. */
12138 const char *
12139 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12141 int stack_top_dies;
12142 rtx cmp_op0, cmp_op1;
12143 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12145 if (eflags_p)
12147 cmp_op0 = operands[0];
12148 cmp_op1 = operands[1];
12150 else
12152 cmp_op0 = operands[1];
12153 cmp_op1 = operands[2];
12156 if (is_sse)
12158 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12159 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12160 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12161 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12163 if (GET_MODE (operands[0]) == SFmode)
12164 if (unordered_p)
12165 return &ucomiss[TARGET_AVX ? 0 : 1];
12166 else
12167 return &comiss[TARGET_AVX ? 0 : 1];
12168 else
12169 if (unordered_p)
12170 return &ucomisd[TARGET_AVX ? 0 : 1];
12171 else
12172 return &comisd[TARGET_AVX ? 0 : 1];
12175 gcc_assert (STACK_TOP_P (cmp_op0));
12177 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12179 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12181 if (stack_top_dies)
12183 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12184 return output_387_ffreep (operands, 1);
12186 else
12187 return "ftst\n\tfnstsw\t%0";
12190 if (STACK_REG_P (cmp_op1)
12191 && stack_top_dies
12192 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12193 && REGNO (cmp_op1) != FIRST_STACK_REG)
12195 /* If both the top of the 387 stack dies, and the other operand
12196 is also a stack register that dies, then this must be a
12197 `fcompp' float compare */
12199 if (eflags_p)
12201 /* There is no double popping fcomi variant. Fortunately,
12202 eflags is immune from the fstp's cc clobbering. */
12203 if (unordered_p)
12204 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12205 else
12206 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12207 return output_387_ffreep (operands, 0);
12209 else
12211 if (unordered_p)
12212 return "fucompp\n\tfnstsw\t%0";
12213 else
12214 return "fcompp\n\tfnstsw\t%0";
12217 else
12219 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12221 static const char * const alt[16] =
12223 "fcom%z2\t%y2\n\tfnstsw\t%0",
12224 "fcomp%z2\t%y2\n\tfnstsw\t%0",
12225 "fucom%z2\t%y2\n\tfnstsw\t%0",
12226 "fucomp%z2\t%y2\n\tfnstsw\t%0",
12228 "ficom%z2\t%y2\n\tfnstsw\t%0",
12229 "ficomp%z2\t%y2\n\tfnstsw\t%0",
12230 NULL,
12231 NULL,
12233 "fcomi\t{%y1, %0|%0, %y1}",
12234 "fcomip\t{%y1, %0|%0, %y1}",
12235 "fucomi\t{%y1, %0|%0, %y1}",
12236 "fucomip\t{%y1, %0|%0, %y1}",
12238 NULL,
12239 NULL,
12240 NULL,
12241 NULL
12244 int mask;
12245 const char *ret;
12247 mask = eflags_p << 3;
12248 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12249 mask |= unordered_p << 1;
12250 mask |= stack_top_dies;
12252 gcc_assert (mask < 16);
12253 ret = alt[mask];
12254 gcc_assert (ret);
12256 return ret;
12260 void
12261 ix86_output_addr_vec_elt (FILE *file, int value)
12263 const char *directive = ASM_LONG;
12265 #ifdef ASM_QUAD
12266 if (TARGET_64BIT)
12267 directive = ASM_QUAD;
12268 #else
12269 gcc_assert (!TARGET_64BIT);
12270 #endif
12272 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12275 void
12276 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12278 const char *directive = ASM_LONG;
12280 #ifdef ASM_QUAD
12281 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12282 directive = ASM_QUAD;
12283 #else
12284 gcc_assert (!TARGET_64BIT);
12285 #endif
12286 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12287 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12288 fprintf (file, "%s%s%d-%s%d\n",
12289 directive, LPREFIX, value, LPREFIX, rel);
12290 else if (HAVE_AS_GOTOFF_IN_DATA)
12291 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12292 #if TARGET_MACHO
12293 else if (TARGET_MACHO)
12295 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12296 machopic_output_function_base_name (file);
12297 fprintf(file, "\n");
12299 #endif
12300 else
12301 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12302 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12305 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12306 for the target. */
12308 void
12309 ix86_expand_clear (rtx dest)
12311 rtx tmp;
12313 /* We play register width games, which are only valid after reload. */
12314 gcc_assert (reload_completed);
12316 /* Avoid HImode and its attendant prefix byte. */
12317 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12318 dest = gen_rtx_REG (SImode, REGNO (dest));
12319 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12321 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12322 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12324 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12325 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12328 emit_insn (tmp);
12331 /* X is an unchanging MEM. If it is a constant pool reference, return
12332 the constant pool rtx, else NULL. */
12335 maybe_get_pool_constant (rtx x)
12337 x = ix86_delegitimize_address (XEXP (x, 0));
12339 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12340 return get_pool_constant (x);
12342 return NULL_RTX;
12345 void
12346 ix86_expand_move (enum machine_mode mode, rtx operands[])
12348 rtx op0, op1;
12349 enum tls_model model;
12351 op0 = operands[0];
12352 op1 = operands[1];
12354 if (GET_CODE (op1) == SYMBOL_REF)
12356 model = SYMBOL_REF_TLS_MODEL (op1);
12357 if (model)
12359 op1 = legitimize_tls_address (op1, model, true);
12360 op1 = force_operand (op1, op0);
12361 if (op1 == op0)
12362 return;
12364 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12365 && SYMBOL_REF_DLLIMPORT_P (op1))
12366 op1 = legitimize_dllimport_symbol (op1, false);
12368 else if (GET_CODE (op1) == CONST
12369 && GET_CODE (XEXP (op1, 0)) == PLUS
12370 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12372 rtx addend = XEXP (XEXP (op1, 0), 1);
12373 rtx symbol = XEXP (XEXP (op1, 0), 0);
12374 rtx tmp = NULL;
12376 model = SYMBOL_REF_TLS_MODEL (symbol);
12377 if (model)
12378 tmp = legitimize_tls_address (symbol, model, true);
12379 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12380 && SYMBOL_REF_DLLIMPORT_P (symbol))
12381 tmp = legitimize_dllimport_symbol (symbol, true);
12383 if (tmp)
12385 tmp = force_operand (tmp, NULL);
12386 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12387 op0, 1, OPTAB_DIRECT);
12388 if (tmp == op0)
12389 return;
12393 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12395 if (TARGET_MACHO && !TARGET_64BIT)
12397 #if TARGET_MACHO
12398 if (MACHOPIC_PURE)
12400 rtx temp = ((reload_in_progress
12401 || ((op0 && REG_P (op0))
12402 && mode == Pmode))
12403 ? op0 : gen_reg_rtx (Pmode));
12404 op1 = machopic_indirect_data_reference (op1, temp);
12405 op1 = machopic_legitimize_pic_address (op1, mode,
12406 temp == op1 ? 0 : temp);
12408 else if (MACHOPIC_INDIRECT)
12409 op1 = machopic_indirect_data_reference (op1, 0);
12410 if (op0 == op1)
12411 return;
12412 #endif
12414 else
12416 if (MEM_P (op0))
12417 op1 = force_reg (Pmode, op1);
12418 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12420 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12421 op1 = legitimize_pic_address (op1, reg);
12422 if (op0 == op1)
12423 return;
12427 else
12429 if (MEM_P (op0)
12430 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12431 || !push_operand (op0, mode))
12432 && MEM_P (op1))
12433 op1 = force_reg (mode, op1);
12435 if (push_operand (op0, mode)
12436 && ! general_no_elim_operand (op1, mode))
12437 op1 = copy_to_mode_reg (mode, op1);
12439 /* Force large constants in 64bit compilation into register
12440 to get them CSEed. */
12441 if (can_create_pseudo_p ()
12442 && (mode == DImode) && TARGET_64BIT
12443 && immediate_operand (op1, mode)
12444 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12445 && !register_operand (op0, mode)
12446 && optimize)
12447 op1 = copy_to_mode_reg (mode, op1);
12449 if (can_create_pseudo_p ()
12450 && FLOAT_MODE_P (mode)
12451 && GET_CODE (op1) == CONST_DOUBLE)
12453 /* If we are loading a floating point constant to a register,
12454 force the value to memory now, since we'll get better code
12455 out the back end. */
12457 op1 = validize_mem (force_const_mem (mode, op1));
12458 if (!register_operand (op0, mode))
12460 rtx temp = gen_reg_rtx (mode);
12461 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12462 emit_move_insn (op0, temp);
12463 return;
12468 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12471 void
12472 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12474 rtx op0 = operands[0], op1 = operands[1];
12475 unsigned int align = GET_MODE_ALIGNMENT (mode);
12477 /* Force constants other than zero into memory. We do not know how
12478 the instructions used to build constants modify the upper 64 bits
12479 of the register, once we have that information we may be able
12480 to handle some of them more efficiently. */
12481 if (can_create_pseudo_p ()
12482 && register_operand (op0, mode)
12483 && (CONSTANT_P (op1)
12484 || (GET_CODE (op1) == SUBREG
12485 && CONSTANT_P (SUBREG_REG (op1))))
12486 && standard_sse_constant_p (op1) <= 0)
12487 op1 = validize_mem (force_const_mem (mode, op1));
12489 /* We need to check memory alignment for SSE mode since attribute
12490 can make operands unaligned. */
12491 if (can_create_pseudo_p ()
12492 && SSE_REG_MODE_P (mode)
12493 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12494 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12496 rtx tmp[2];
12498 /* ix86_expand_vector_move_misalign() does not like constants ... */
12499 if (CONSTANT_P (op1)
12500 || (GET_CODE (op1) == SUBREG
12501 && CONSTANT_P (SUBREG_REG (op1))))
12502 op1 = validize_mem (force_const_mem (mode, op1));
12504 /* ... nor both arguments in memory. */
12505 if (!register_operand (op0, mode)
12506 && !register_operand (op1, mode))
12507 op1 = force_reg (mode, op1);
12509 tmp[0] = op0; tmp[1] = op1;
12510 ix86_expand_vector_move_misalign (mode, tmp);
12511 return;
12514 /* Make operand1 a register if it isn't already. */
12515 if (can_create_pseudo_p ()
12516 && !register_operand (op0, mode)
12517 && !register_operand (op1, mode))
12519 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12520 return;
12523 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12526 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12527 straight to ix86_expand_vector_move. */
12528 /* Code generation for scalar reg-reg moves of single and double precision data:
12529 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12530 movaps reg, reg
12531 else
12532 movss reg, reg
12533 if (x86_sse_partial_reg_dependency == true)
12534 movapd reg, reg
12535 else
12536 movsd reg, reg
12538 Code generation for scalar loads of double precision data:
12539 if (x86_sse_split_regs == true)
12540 movlpd mem, reg (gas syntax)
12541 else
12542 movsd mem, reg
12544 Code generation for unaligned packed loads of single precision data
12545 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12546 if (x86_sse_unaligned_move_optimal)
12547 movups mem, reg
12549 if (x86_sse_partial_reg_dependency == true)
12551 xorps reg, reg
12552 movlps mem, reg
12553 movhps mem+8, reg
12555 else
12557 movlps mem, reg
12558 movhps mem+8, reg
12561 Code generation for unaligned packed loads of double precision data
12562 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12563 if (x86_sse_unaligned_move_optimal)
12564 movupd mem, reg
12566 if (x86_sse_split_regs == true)
12568 movlpd mem, reg
12569 movhpd mem+8, reg
12571 else
12573 movsd mem, reg
12574 movhpd mem+8, reg
12578 void
12579 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12581 rtx op0, op1, m;
12583 op0 = operands[0];
12584 op1 = operands[1];
12586 if (TARGET_AVX)
12588 switch (GET_MODE_CLASS (mode))
12590 case MODE_VECTOR_INT:
12591 case MODE_INT:
12592 switch (GET_MODE_SIZE (mode))
12594 case 16:
12595 op0 = gen_lowpart (V16QImode, op0);
12596 op1 = gen_lowpart (V16QImode, op1);
12597 emit_insn (gen_avx_movdqu (op0, op1));
12598 break;
12599 case 32:
12600 op0 = gen_lowpart (V32QImode, op0);
12601 op1 = gen_lowpart (V32QImode, op1);
12602 emit_insn (gen_avx_movdqu256 (op0, op1));
12603 break;
12604 default:
12605 gcc_unreachable ();
12607 break;
12608 case MODE_VECTOR_FLOAT:
12609 op0 = gen_lowpart (mode, op0);
12610 op1 = gen_lowpart (mode, op1);
12612 switch (mode)
12614 case V4SFmode:
12615 emit_insn (gen_avx_movups (op0, op1));
12616 break;
12617 case V8SFmode:
12618 emit_insn (gen_avx_movups256 (op0, op1));
12619 break;
12620 case V2DFmode:
12621 emit_insn (gen_avx_movupd (op0, op1));
12622 break;
12623 case V4DFmode:
12624 emit_insn (gen_avx_movupd256 (op0, op1));
12625 break;
12626 default:
12627 gcc_unreachable ();
12629 break;
12631 default:
12632 gcc_unreachable ();
12635 return;
12638 if (MEM_P (op1))
12640 /* If we're optimizing for size, movups is the smallest. */
12641 if (optimize_insn_for_size_p ())
12643 op0 = gen_lowpart (V4SFmode, op0);
12644 op1 = gen_lowpart (V4SFmode, op1);
12645 emit_insn (gen_sse_movups (op0, op1));
12646 return;
12649 /* ??? If we have typed data, then it would appear that using
12650 movdqu is the only way to get unaligned data loaded with
12651 integer type. */
12652 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12654 op0 = gen_lowpart (V16QImode, op0);
12655 op1 = gen_lowpart (V16QImode, op1);
12656 emit_insn (gen_sse2_movdqu (op0, op1));
12657 return;
12660 if (TARGET_SSE2 && mode == V2DFmode)
12662 rtx zero;
12664 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12666 op0 = gen_lowpart (V2DFmode, op0);
12667 op1 = gen_lowpart (V2DFmode, op1);
12668 emit_insn (gen_sse2_movupd (op0, op1));
12669 return;
12672 /* When SSE registers are split into halves, we can avoid
12673 writing to the top half twice. */
12674 if (TARGET_SSE_SPLIT_REGS)
12676 emit_clobber (op0);
12677 zero = op0;
12679 else
12681 /* ??? Not sure about the best option for the Intel chips.
12682 The following would seem to satisfy; the register is
12683 entirely cleared, breaking the dependency chain. We
12684 then store to the upper half, with a dependency depth
12685 of one. A rumor has it that Intel recommends two movsd
12686 followed by an unpacklpd, but this is unconfirmed. And
12687 given that the dependency depth of the unpacklpd would
12688 still be one, I'm not sure why this would be better. */
12689 zero = CONST0_RTX (V2DFmode);
12692 m = adjust_address (op1, DFmode, 0);
12693 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12694 m = adjust_address (op1, DFmode, 8);
12695 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12697 else
12699 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12701 op0 = gen_lowpart (V4SFmode, op0);
12702 op1 = gen_lowpart (V4SFmode, op1);
12703 emit_insn (gen_sse_movups (op0, op1));
12704 return;
12707 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12708 emit_move_insn (op0, CONST0_RTX (mode));
12709 else
12710 emit_clobber (op0);
12712 if (mode != V4SFmode)
12713 op0 = gen_lowpart (V4SFmode, op0);
12714 m = adjust_address (op1, V2SFmode, 0);
12715 emit_insn (gen_sse_loadlps (op0, op0, m));
12716 m = adjust_address (op1, V2SFmode, 8);
12717 emit_insn (gen_sse_loadhps (op0, op0, m));
12720 else if (MEM_P (op0))
12722 /* If we're optimizing for size, movups is the smallest. */
12723 if (optimize_insn_for_size_p ())
12725 op0 = gen_lowpart (V4SFmode, op0);
12726 op1 = gen_lowpart (V4SFmode, op1);
12727 emit_insn (gen_sse_movups (op0, op1));
12728 return;
12731 /* ??? Similar to above, only less clear because of quote
12732 typeless stores unquote. */
12733 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12734 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12736 op0 = gen_lowpart (V16QImode, op0);
12737 op1 = gen_lowpart (V16QImode, op1);
12738 emit_insn (gen_sse2_movdqu (op0, op1));
12739 return;
12742 if (TARGET_SSE2 && mode == V2DFmode)
12744 m = adjust_address (op0, DFmode, 0);
12745 emit_insn (gen_sse2_storelpd (m, op1));
12746 m = adjust_address (op0, DFmode, 8);
12747 emit_insn (gen_sse2_storehpd (m, op1));
12749 else
12751 if (mode != V4SFmode)
12752 op1 = gen_lowpart (V4SFmode, op1);
12753 m = adjust_address (op0, V2SFmode, 0);
12754 emit_insn (gen_sse_storelps (m, op1));
12755 m = adjust_address (op0, V2SFmode, 8);
12756 emit_insn (gen_sse_storehps (m, op1));
12759 else
12760 gcc_unreachable ();
12763 /* Expand a push in MODE. This is some mode for which we do not support
12764 proper push instructions, at least from the registers that we expect
12765 the value to live in. */
12767 void
12768 ix86_expand_push (enum machine_mode mode, rtx x)
12770 rtx tmp;
12772 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12773 GEN_INT (-GET_MODE_SIZE (mode)),
12774 stack_pointer_rtx, 1, OPTAB_DIRECT);
12775 if (tmp != stack_pointer_rtx)
12776 emit_move_insn (stack_pointer_rtx, tmp);
12778 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12780 /* When we push an operand onto stack, it has to be aligned at least
12781 at the function argument boundary. However since we don't have
12782 the argument type, we can't determine the actual argument
12783 boundary. */
12784 emit_move_insn (tmp, x);
12787 /* Helper function of ix86_fixup_binary_operands to canonicalize
12788 operand order. Returns true if the operands should be swapped. */
12790 static bool
12791 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12792 rtx operands[])
12794 rtx dst = operands[0];
12795 rtx src1 = operands[1];
12796 rtx src2 = operands[2];
12798 /* If the operation is not commutative, we can't do anything. */
12799 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12800 return false;
12802 /* Highest priority is that src1 should match dst. */
12803 if (rtx_equal_p (dst, src1))
12804 return false;
12805 if (rtx_equal_p (dst, src2))
12806 return true;
12808 /* Next highest priority is that immediate constants come second. */
12809 if (immediate_operand (src2, mode))
12810 return false;
12811 if (immediate_operand (src1, mode))
12812 return true;
12814 /* Lowest priority is that memory references should come second. */
12815 if (MEM_P (src2))
12816 return false;
12817 if (MEM_P (src1))
12818 return true;
12820 return false;
12824 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12825 destination to use for the operation. If different from the true
12826 destination in operands[0], a copy operation will be required. */
12829 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12830 rtx operands[])
12832 rtx dst = operands[0];
12833 rtx src1 = operands[1];
12834 rtx src2 = operands[2];
12836 /* Canonicalize operand order. */
12837 if (ix86_swap_binary_operands_p (code, mode, operands))
12839 rtx temp;
12841 /* It is invalid to swap operands of different modes. */
12842 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12844 temp = src1;
12845 src1 = src2;
12846 src2 = temp;
12849 /* Both source operands cannot be in memory. */
12850 if (MEM_P (src1) && MEM_P (src2))
12852 /* Optimization: Only read from memory once. */
12853 if (rtx_equal_p (src1, src2))
12855 src2 = force_reg (mode, src2);
12856 src1 = src2;
12858 else
12859 src2 = force_reg (mode, src2);
12862 /* If the destination is memory, and we do not have matching source
12863 operands, do things in registers. */
12864 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12865 dst = gen_reg_rtx (mode);
12867 /* Source 1 cannot be a constant. */
12868 if (CONSTANT_P (src1))
12869 src1 = force_reg (mode, src1);
12871 /* Source 1 cannot be a non-matching memory. */
12872 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12873 src1 = force_reg (mode, src1);
12875 operands[1] = src1;
12876 operands[2] = src2;
12877 return dst;
12880 /* Similarly, but assume that the destination has already been
12881 set up properly. */
12883 void
12884 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12885 enum machine_mode mode, rtx operands[])
12887 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12888 gcc_assert (dst == operands[0]);
12891 /* Attempt to expand a binary operator. Make the expansion closer to the
12892 actual machine, then just general_operand, which will allow 3 separate
12893 memory references (one output, two input) in a single insn. */
12895 void
12896 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12897 rtx operands[])
12899 rtx src1, src2, dst, op, clob;
12901 dst = ix86_fixup_binary_operands (code, mode, operands);
12902 src1 = operands[1];
12903 src2 = operands[2];
12905 /* Emit the instruction. */
12907 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12908 if (reload_in_progress)
12910 /* Reload doesn't know about the flags register, and doesn't know that
12911 it doesn't want to clobber it. We can only do this with PLUS. */
12912 gcc_assert (code == PLUS);
12913 emit_insn (op);
12915 else
12917 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12918 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12921 /* Fix up the destination if needed. */
12922 if (dst != operands[0])
12923 emit_move_insn (operands[0], dst);
12926 /* Return TRUE or FALSE depending on whether the binary operator meets the
12927 appropriate constraints. */
12930 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12931 rtx operands[3])
12933 rtx dst = operands[0];
12934 rtx src1 = operands[1];
12935 rtx src2 = operands[2];
12937 /* Both source operands cannot be in memory. */
12938 if (MEM_P (src1) && MEM_P (src2))
12939 return 0;
12941 /* Canonicalize operand order for commutative operators. */
12942 if (ix86_swap_binary_operands_p (code, mode, operands))
12944 rtx temp = src1;
12945 src1 = src2;
12946 src2 = temp;
12949 /* If the destination is memory, we must have a matching source operand. */
12950 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12951 return 0;
12953 /* Source 1 cannot be a constant. */
12954 if (CONSTANT_P (src1))
12955 return 0;
12957 /* Source 1 cannot be a non-matching memory. */
12958 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12959 return 0;
12961 return 1;
12964 /* Attempt to expand a unary operator. Make the expansion closer to the
12965 actual machine, then just general_operand, which will allow 2 separate
12966 memory references (one output, one input) in a single insn. */
12968 void
12969 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12970 rtx operands[])
12972 int matching_memory;
12973 rtx src, dst, op, clob;
12975 dst = operands[0];
12976 src = operands[1];
12978 /* If the destination is memory, and we do not have matching source
12979 operands, do things in registers. */
12980 matching_memory = 0;
12981 if (MEM_P (dst))
12983 if (rtx_equal_p (dst, src))
12984 matching_memory = 1;
12985 else
12986 dst = gen_reg_rtx (mode);
12989 /* When source operand is memory, destination must match. */
12990 if (MEM_P (src) && !matching_memory)
12991 src = force_reg (mode, src);
12993 /* Emit the instruction. */
12995 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12996 if (reload_in_progress || code == NOT)
12998 /* Reload doesn't know about the flags register, and doesn't know that
12999 it doesn't want to clobber it. */
13000 gcc_assert (code == NOT);
13001 emit_insn (op);
13003 else
13005 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13006 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13009 /* Fix up the destination if needed. */
13010 if (dst != operands[0])
13011 emit_move_insn (operands[0], dst);
13014 #define LEA_SEARCH_THRESHOLD 12
13016 /* Search backward for non-agu definition of register number REGNO1
13017 or register number REGNO2 in INSN's basic block until
13018 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13019 2. Reach BB boundary, or
13020 3. Reach agu definition.
13021 Returns the distance between the non-agu definition point and INSN.
13022 If no definition point, returns -1. */
13024 static int
13025 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13026 rtx insn)
13028 basic_block bb = BLOCK_FOR_INSN (insn);
13029 int distance = 0;
13030 df_ref *def_rec;
13031 enum attr_type insn_type;
13033 if (insn != BB_HEAD (bb))
13035 rtx prev = PREV_INSN (insn);
13036 while (prev && distance < LEA_SEARCH_THRESHOLD)
13038 if (INSN_P (prev))
13040 distance++;
13041 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13042 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13043 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13044 && (regno1 == DF_REF_REGNO (*def_rec)
13045 || regno2 == DF_REF_REGNO (*def_rec)))
13047 insn_type = get_attr_type (prev);
13048 if (insn_type != TYPE_LEA)
13049 goto done;
13052 if (prev == BB_HEAD (bb))
13053 break;
13054 prev = PREV_INSN (prev);
13058 if (distance < LEA_SEARCH_THRESHOLD)
13060 edge e;
13061 edge_iterator ei;
13062 bool simple_loop = false;
13064 FOR_EACH_EDGE (e, ei, bb->preds)
13065 if (e->src == bb)
13067 simple_loop = true;
13068 break;
13071 if (simple_loop)
13073 rtx prev = BB_END (bb);
13074 while (prev
13075 && prev != insn
13076 && distance < LEA_SEARCH_THRESHOLD)
13078 if (INSN_P (prev))
13080 distance++;
13081 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13082 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13083 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13084 && (regno1 == DF_REF_REGNO (*def_rec)
13085 || regno2 == DF_REF_REGNO (*def_rec)))
13087 insn_type = get_attr_type (prev);
13088 if (insn_type != TYPE_LEA)
13089 goto done;
13092 prev = PREV_INSN (prev);
13097 distance = -1;
13099 done:
13100 /* get_attr_type may modify recog data. We want to make sure
13101 that recog data is valid for instruction INSN, on which
13102 distance_non_agu_define is called. INSN is unchanged here. */
13103 extract_insn_cached (insn);
13104 return distance;
13107 /* Return the distance between INSN and the next insn that uses
13108 register number REGNO0 in memory address. Return -1 if no such
13109 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13111 static int
13112 distance_agu_use (unsigned int regno0, rtx insn)
13114 basic_block bb = BLOCK_FOR_INSN (insn);
13115 int distance = 0;
13116 df_ref *def_rec;
13117 df_ref *use_rec;
13119 if (insn != BB_END (bb))
13121 rtx next = NEXT_INSN (insn);
13122 while (next && distance < LEA_SEARCH_THRESHOLD)
13124 if (INSN_P (next))
13126 distance++;
13128 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13129 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13130 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13131 && regno0 == DF_REF_REGNO (*use_rec))
13133 /* Return DISTANCE if OP0 is used in memory
13134 address in NEXT. */
13135 return distance;
13138 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13139 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13140 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13141 && regno0 == DF_REF_REGNO (*def_rec))
13143 /* Return -1 if OP0 is set in NEXT. */
13144 return -1;
13147 if (next == BB_END (bb))
13148 break;
13149 next = NEXT_INSN (next);
13153 if (distance < LEA_SEARCH_THRESHOLD)
13155 edge e;
13156 edge_iterator ei;
13157 bool simple_loop = false;
13159 FOR_EACH_EDGE (e, ei, bb->succs)
13160 if (e->dest == bb)
13162 simple_loop = true;
13163 break;
13166 if (simple_loop)
13168 rtx next = BB_HEAD (bb);
13169 while (next
13170 && next != insn
13171 && distance < LEA_SEARCH_THRESHOLD)
13173 if (INSN_P (next))
13175 distance++;
13177 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13178 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13179 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13180 && regno0 == DF_REF_REGNO (*use_rec))
13182 /* Return DISTANCE if OP0 is used in memory
13183 address in NEXT. */
13184 return distance;
13187 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13188 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13189 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13190 && regno0 == DF_REF_REGNO (*def_rec))
13192 /* Return -1 if OP0 is set in NEXT. */
13193 return -1;
13197 next = NEXT_INSN (next);
13202 return -1;
13205 /* Define this macro to tune LEA priority vs ADD, it take effect when
13206 there is a dilemma of choicing LEA or ADD
13207 Negative value: ADD is more preferred than LEA
13208 Zero: Netrual
13209 Positive value: LEA is more preferred than ADD*/
13210 #define IX86_LEA_PRIORITY 2
13212 /* Return true if it is ok to optimize an ADD operation to LEA
13213 operation to avoid flag register consumation. For the processors
13214 like ATOM, if the destination register of LEA holds an actual
13215 address which will be used soon, LEA is better and otherwise ADD
13216 is better. */
13218 bool
13219 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13220 rtx insn, rtx operands[])
13222 unsigned int regno0 = true_regnum (operands[0]);
13223 unsigned int regno1 = true_regnum (operands[1]);
13224 unsigned int regno2;
13226 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13227 return regno0 != regno1;
13229 regno2 = true_regnum (operands[2]);
13231 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13232 if (regno0 != regno1 && regno0 != regno2)
13233 return true;
13234 else
13236 int dist_define, dist_use;
13237 dist_define = distance_non_agu_define (regno1, regno2, insn);
13238 if (dist_define <= 0)
13239 return true;
13241 /* If this insn has both backward non-agu dependence and forward
13242 agu dependence, the one with short distance take effect. */
13243 dist_use = distance_agu_use (regno0, insn);
13244 if (dist_use <= 0
13245 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13246 return false;
13248 return true;
13252 /* Return true if destination reg of SET_BODY is shift count of
13253 USE_BODY. */
13255 static bool
13256 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13258 rtx set_dest;
13259 rtx shift_rtx;
13260 int i;
13262 /* Retrieve destination of SET_BODY. */
13263 switch (GET_CODE (set_body))
13265 case SET:
13266 set_dest = SET_DEST (set_body);
13267 if (!set_dest || !REG_P (set_dest))
13268 return false;
13269 break;
13270 case PARALLEL:
13271 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13272 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13273 use_body))
13274 return true;
13275 default:
13276 return false;
13277 break;
13280 /* Retrieve shift count of USE_BODY. */
13281 switch (GET_CODE (use_body))
13283 case SET:
13284 shift_rtx = XEXP (use_body, 1);
13285 break;
13286 case PARALLEL:
13287 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13288 if (ix86_dep_by_shift_count_body (set_body,
13289 XVECEXP (use_body, 0, i)))
13290 return true;
13291 default:
13292 return false;
13293 break;
13296 if (shift_rtx
13297 && (GET_CODE (shift_rtx) == ASHIFT
13298 || GET_CODE (shift_rtx) == LSHIFTRT
13299 || GET_CODE (shift_rtx) == ASHIFTRT
13300 || GET_CODE (shift_rtx) == ROTATE
13301 || GET_CODE (shift_rtx) == ROTATERT))
13303 rtx shift_count = XEXP (shift_rtx, 1);
13305 /* Return true if shift count is dest of SET_BODY. */
13306 if (REG_P (shift_count)
13307 && true_regnum (set_dest) == true_regnum (shift_count))
13308 return true;
13311 return false;
13314 /* Return true if destination reg of SET_INSN is shift count of
13315 USE_INSN. */
13317 bool
13318 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13320 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13321 PATTERN (use_insn));
13324 /* Return TRUE or FALSE depending on whether the unary operator meets the
13325 appropriate constraints. */
13328 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13329 enum machine_mode mode ATTRIBUTE_UNUSED,
13330 rtx operands[2] ATTRIBUTE_UNUSED)
13332 /* If one of operands is memory, source and destination must match. */
13333 if ((MEM_P (operands[0])
13334 || MEM_P (operands[1]))
13335 && ! rtx_equal_p (operands[0], operands[1]))
13336 return FALSE;
13337 return TRUE;
13340 /* Post-reload splitter for converting an SF or DFmode value in an
13341 SSE register into an unsigned SImode. */
13343 void
13344 ix86_split_convert_uns_si_sse (rtx operands[])
13346 enum machine_mode vecmode;
13347 rtx value, large, zero_or_two31, input, two31, x;
13349 large = operands[1];
13350 zero_or_two31 = operands[2];
13351 input = operands[3];
13352 two31 = operands[4];
13353 vecmode = GET_MODE (large);
13354 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13356 /* Load up the value into the low element. We must ensure that the other
13357 elements are valid floats -- zero is the easiest such value. */
13358 if (MEM_P (input))
13360 if (vecmode == V4SFmode)
13361 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13362 else
13363 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13365 else
13367 input = gen_rtx_REG (vecmode, REGNO (input));
13368 emit_move_insn (value, CONST0_RTX (vecmode));
13369 if (vecmode == V4SFmode)
13370 emit_insn (gen_sse_movss (value, value, input));
13371 else
13372 emit_insn (gen_sse2_movsd (value, value, input));
13375 emit_move_insn (large, two31);
13376 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13378 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13379 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13381 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13382 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13384 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13385 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13387 large = gen_rtx_REG (V4SImode, REGNO (large));
13388 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13390 x = gen_rtx_REG (V4SImode, REGNO (value));
13391 if (vecmode == V4SFmode)
13392 emit_insn (gen_sse2_cvttps2dq (x, value));
13393 else
13394 emit_insn (gen_sse2_cvttpd2dq (x, value));
13395 value = x;
13397 emit_insn (gen_xorv4si3 (value, value, large));
13400 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13401 Expects the 64-bit DImode to be supplied in a pair of integral
13402 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13403 -mfpmath=sse, !optimize_size only. */
13405 void
13406 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13408 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13409 rtx int_xmm, fp_xmm;
13410 rtx biases, exponents;
13411 rtx x;
13413 int_xmm = gen_reg_rtx (V4SImode);
13414 if (TARGET_INTER_UNIT_MOVES)
13415 emit_insn (gen_movdi_to_sse (int_xmm, input));
13416 else if (TARGET_SSE_SPLIT_REGS)
13418 emit_clobber (int_xmm);
13419 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13421 else
13423 x = gen_reg_rtx (V2DImode);
13424 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13425 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13428 x = gen_rtx_CONST_VECTOR (V4SImode,
13429 gen_rtvec (4, GEN_INT (0x43300000UL),
13430 GEN_INT (0x45300000UL),
13431 const0_rtx, const0_rtx));
13432 exponents = validize_mem (force_const_mem (V4SImode, x));
13434 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13435 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13437 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13438 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13439 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13440 (0x1.0p84 + double(fp_value_hi_xmm)).
13441 Note these exponents differ by 32. */
13443 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13445 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13446 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13447 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13448 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13449 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13450 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13451 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13452 biases = validize_mem (force_const_mem (V2DFmode, biases));
13453 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13455 /* Add the upper and lower DFmode values together. */
13456 if (TARGET_SSE3)
13457 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13458 else
13460 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13461 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13462 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13465 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13468 /* Not used, but eases macroization of patterns. */
13469 void
13470 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13471 rtx input ATTRIBUTE_UNUSED)
13473 gcc_unreachable ();
13476 /* Convert an unsigned SImode value into a DFmode. Only currently used
13477 for SSE, but applicable anywhere. */
13479 void
13480 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13482 REAL_VALUE_TYPE TWO31r;
13483 rtx x, fp;
13485 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13486 NULL, 1, OPTAB_DIRECT);
13488 fp = gen_reg_rtx (DFmode);
13489 emit_insn (gen_floatsidf2 (fp, x));
13491 real_ldexp (&TWO31r, &dconst1, 31);
13492 x = const_double_from_real_value (TWO31r, DFmode);
13494 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13495 if (x != target)
13496 emit_move_insn (target, x);
13499 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13500 32-bit mode; otherwise we have a direct convert instruction. */
13502 void
13503 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13505 REAL_VALUE_TYPE TWO32r;
13506 rtx fp_lo, fp_hi, x;
13508 fp_lo = gen_reg_rtx (DFmode);
13509 fp_hi = gen_reg_rtx (DFmode);
13511 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13513 real_ldexp (&TWO32r, &dconst1, 32);
13514 x = const_double_from_real_value (TWO32r, DFmode);
13515 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13517 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13519 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13520 0, OPTAB_DIRECT);
13521 if (x != target)
13522 emit_move_insn (target, x);
13525 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13526 For x86_32, -mfpmath=sse, !optimize_size only. */
13527 void
13528 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13530 REAL_VALUE_TYPE ONE16r;
13531 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13533 real_ldexp (&ONE16r, &dconst1, 16);
13534 x = const_double_from_real_value (ONE16r, SFmode);
13535 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13536 NULL, 0, OPTAB_DIRECT);
13537 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13538 NULL, 0, OPTAB_DIRECT);
13539 fp_hi = gen_reg_rtx (SFmode);
13540 fp_lo = gen_reg_rtx (SFmode);
13541 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13542 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13543 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13544 0, OPTAB_DIRECT);
13545 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13546 0, OPTAB_DIRECT);
13547 if (!rtx_equal_p (target, fp_hi))
13548 emit_move_insn (target, fp_hi);
13551 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
13552 then replicate the value for all elements of the vector
13553 register. */
13556 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13558 rtvec v;
13559 switch (mode)
13561 case SImode:
13562 gcc_assert (vect);
13563 v = gen_rtvec (4, value, value, value, value);
13564 return gen_rtx_CONST_VECTOR (V4SImode, v);
13566 case DImode:
13567 gcc_assert (vect);
13568 v = gen_rtvec (2, value, value);
13569 return gen_rtx_CONST_VECTOR (V2DImode, v);
13571 case SFmode:
13572 if (vect)
13573 v = gen_rtvec (4, value, value, value, value);
13574 else
13575 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13576 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13577 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13579 case DFmode:
13580 if (vect)
13581 v = gen_rtvec (2, value, value);
13582 else
13583 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13584 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13586 default:
13587 gcc_unreachable ();
13591 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13592 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13593 for an SSE register. If VECT is true, then replicate the mask for
13594 all elements of the vector register. If INVERT is true, then create
13595 a mask excluding the sign bit. */
13598 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13600 enum machine_mode vec_mode, imode;
13601 HOST_WIDE_INT hi, lo;
13602 int shift = 63;
13603 rtx v;
13604 rtx mask;
13606 /* Find the sign bit, sign extended to 2*HWI. */
13607 switch (mode)
13609 case SImode:
13610 case SFmode:
13611 imode = SImode;
13612 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13613 lo = 0x80000000, hi = lo < 0;
13614 break;
13616 case DImode:
13617 case DFmode:
13618 imode = DImode;
13619 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13620 if (HOST_BITS_PER_WIDE_INT >= 64)
13621 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13622 else
13623 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13624 break;
13626 case TImode:
13627 case TFmode:
13628 vec_mode = VOIDmode;
13629 if (HOST_BITS_PER_WIDE_INT >= 64)
13631 imode = TImode;
13632 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13634 else
13636 rtvec vec;
13638 imode = DImode;
13639 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13641 if (invert)
13643 lo = ~lo, hi = ~hi;
13644 v = constm1_rtx;
13646 else
13647 v = const0_rtx;
13649 mask = immed_double_const (lo, hi, imode);
13651 vec = gen_rtvec (2, v, mask);
13652 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13653 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13655 return v;
13657 break;
13659 default:
13660 gcc_unreachable ();
13663 if (invert)
13664 lo = ~lo, hi = ~hi;
13666 /* Force this value into the low part of a fp vector constant. */
13667 mask = immed_double_const (lo, hi, imode);
13668 mask = gen_lowpart (mode, mask);
13670 if (vec_mode == VOIDmode)
13671 return force_reg (mode, mask);
13673 v = ix86_build_const_vector (mode, vect, mask);
13674 return force_reg (vec_mode, v);
13677 /* Generate code for floating point ABS or NEG. */
13679 void
13680 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13681 rtx operands[])
13683 rtx mask, set, use, clob, dst, src;
13684 bool use_sse = false;
13685 bool vector_mode = VECTOR_MODE_P (mode);
13686 enum machine_mode elt_mode = mode;
13688 if (vector_mode)
13690 elt_mode = GET_MODE_INNER (mode);
13691 use_sse = true;
13693 else if (mode == TFmode)
13694 use_sse = true;
13695 else if (TARGET_SSE_MATH)
13696 use_sse = SSE_FLOAT_MODE_P (mode);
13698 /* NEG and ABS performed with SSE use bitwise mask operations.
13699 Create the appropriate mask now. */
13700 if (use_sse)
13701 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13702 else
13703 mask = NULL_RTX;
13705 dst = operands[0];
13706 src = operands[1];
13708 if (vector_mode)
13710 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13711 set = gen_rtx_SET (VOIDmode, dst, set);
13712 emit_insn (set);
13714 else
13716 set = gen_rtx_fmt_e (code, mode, src);
13717 set = gen_rtx_SET (VOIDmode, dst, set);
13718 if (mask)
13720 use = gen_rtx_USE (VOIDmode, mask);
13721 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13722 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13723 gen_rtvec (3, set, use, clob)));
13725 else
13726 emit_insn (set);
13730 /* Expand a copysign operation. Special case operand 0 being a constant. */
13732 void
13733 ix86_expand_copysign (rtx operands[])
13735 enum machine_mode mode;
13736 rtx dest, op0, op1, mask, nmask;
13738 dest = operands[0];
13739 op0 = operands[1];
13740 op1 = operands[2];
13742 mode = GET_MODE (dest);
13744 if (GET_CODE (op0) == CONST_DOUBLE)
13746 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13748 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13749 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13751 if (mode == SFmode || mode == DFmode)
13753 enum machine_mode vmode;
13755 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13757 if (op0 == CONST0_RTX (mode))
13758 op0 = CONST0_RTX (vmode);
13759 else
13761 rtvec v;
13763 if (mode == SFmode)
13764 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13765 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13766 else
13767 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13769 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13772 else if (op0 != CONST0_RTX (mode))
13773 op0 = force_reg (mode, op0);
13775 mask = ix86_build_signbit_mask (mode, 0, 0);
13777 if (mode == SFmode)
13778 copysign_insn = gen_copysignsf3_const;
13779 else if (mode == DFmode)
13780 copysign_insn = gen_copysigndf3_const;
13781 else
13782 copysign_insn = gen_copysigntf3_const;
13784 emit_insn (copysign_insn (dest, op0, op1, mask));
13786 else
13788 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13790 nmask = ix86_build_signbit_mask (mode, 0, 1);
13791 mask = ix86_build_signbit_mask (mode, 0, 0);
13793 if (mode == SFmode)
13794 copysign_insn = gen_copysignsf3_var;
13795 else if (mode == DFmode)
13796 copysign_insn = gen_copysigndf3_var;
13797 else
13798 copysign_insn = gen_copysigntf3_var;
13800 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13804 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13805 be a constant, and so has already been expanded into a vector constant. */
13807 void
13808 ix86_split_copysign_const (rtx operands[])
13810 enum machine_mode mode, vmode;
13811 rtx dest, op0, op1, mask, x;
13813 dest = operands[0];
13814 op0 = operands[1];
13815 op1 = operands[2];
13816 mask = operands[3];
13818 mode = GET_MODE (dest);
13819 vmode = GET_MODE (mask);
13821 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13822 x = gen_rtx_AND (vmode, dest, mask);
13823 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13825 if (op0 != CONST0_RTX (vmode))
13827 x = gen_rtx_IOR (vmode, dest, op0);
13828 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13832 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13833 so we have to do two masks. */
13835 void
13836 ix86_split_copysign_var (rtx operands[])
13838 enum machine_mode mode, vmode;
13839 rtx dest, scratch, op0, op1, mask, nmask, x;
13841 dest = operands[0];
13842 scratch = operands[1];
13843 op0 = operands[2];
13844 op1 = operands[3];
13845 nmask = operands[4];
13846 mask = operands[5];
13848 mode = GET_MODE (dest);
13849 vmode = GET_MODE (mask);
13851 if (rtx_equal_p (op0, op1))
13853 /* Shouldn't happen often (it's useless, obviously), but when it does
13854 we'd generate incorrect code if we continue below. */
13855 emit_move_insn (dest, op0);
13856 return;
13859 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13861 gcc_assert (REGNO (op1) == REGNO (scratch));
13863 x = gen_rtx_AND (vmode, scratch, mask);
13864 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13866 dest = mask;
13867 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13868 x = gen_rtx_NOT (vmode, dest);
13869 x = gen_rtx_AND (vmode, x, op0);
13870 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13872 else
13874 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13876 x = gen_rtx_AND (vmode, scratch, mask);
13878 else /* alternative 2,4 */
13880 gcc_assert (REGNO (mask) == REGNO (scratch));
13881 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13882 x = gen_rtx_AND (vmode, scratch, op1);
13884 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13886 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13888 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13889 x = gen_rtx_AND (vmode, dest, nmask);
13891 else /* alternative 3,4 */
13893 gcc_assert (REGNO (nmask) == REGNO (dest));
13894 dest = nmask;
13895 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13896 x = gen_rtx_AND (vmode, dest, op0);
13898 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13901 x = gen_rtx_IOR (vmode, dest, scratch);
13902 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13905 /* Return TRUE or FALSE depending on whether the first SET in INSN
13906 has source and destination with matching CC modes, and that the
13907 CC mode is at least as constrained as REQ_MODE. */
13910 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13912 rtx set;
13913 enum machine_mode set_mode;
13915 set = PATTERN (insn);
13916 if (GET_CODE (set) == PARALLEL)
13917 set = XVECEXP (set, 0, 0);
13918 gcc_assert (GET_CODE (set) == SET);
13919 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13921 set_mode = GET_MODE (SET_DEST (set));
13922 switch (set_mode)
13924 case CCNOmode:
13925 if (req_mode != CCNOmode
13926 && (req_mode != CCmode
13927 || XEXP (SET_SRC (set), 1) != const0_rtx))
13928 return 0;
13929 break;
13930 case CCmode:
13931 if (req_mode == CCGCmode)
13932 return 0;
13933 /* FALLTHRU */
13934 case CCGCmode:
13935 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13936 return 0;
13937 /* FALLTHRU */
13938 case CCGOCmode:
13939 if (req_mode == CCZmode)
13940 return 0;
13941 /* FALLTHRU */
13942 case CCAmode:
13943 case CCCmode:
13944 case CCOmode:
13945 case CCSmode:
13946 case CCZmode:
13947 break;
13949 default:
13950 gcc_unreachable ();
13953 return (GET_MODE (SET_SRC (set)) == set_mode);
13956 /* Generate insn patterns to do an integer compare of OPERANDS. */
13958 static rtx
13959 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13961 enum machine_mode cmpmode;
13962 rtx tmp, flags;
13964 cmpmode = SELECT_CC_MODE (code, op0, op1);
13965 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13967 /* This is very simple, but making the interface the same as in the
13968 FP case makes the rest of the code easier. */
13969 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13970 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13972 /* Return the test that should be put into the flags user, i.e.
13973 the bcc, scc, or cmov instruction. */
13974 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13977 /* Figure out whether to use ordered or unordered fp comparisons.
13978 Return the appropriate mode to use. */
13980 enum machine_mode
13981 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13983 /* ??? In order to make all comparisons reversible, we do all comparisons
13984 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13985 all forms trapping and nontrapping comparisons, we can make inequality
13986 comparisons trapping again, since it results in better code when using
13987 FCOM based compares. */
13988 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13991 enum machine_mode
13992 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13994 enum machine_mode mode = GET_MODE (op0);
13996 if (SCALAR_FLOAT_MODE_P (mode))
13998 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13999 return ix86_fp_compare_mode (code);
14002 switch (code)
14004 /* Only zero flag is needed. */
14005 case EQ: /* ZF=0 */
14006 case NE: /* ZF!=0 */
14007 return CCZmode;
14008 /* Codes needing carry flag. */
14009 case GEU: /* CF=0 */
14010 case LTU: /* CF=1 */
14011 /* Detect overflow checks. They need just the carry flag. */
14012 if (GET_CODE (op0) == PLUS
14013 && rtx_equal_p (op1, XEXP (op0, 0)))
14014 return CCCmode;
14015 else
14016 return CCmode;
14017 case GTU: /* CF=0 & ZF=0 */
14018 case LEU: /* CF=1 | ZF=1 */
14019 /* Detect overflow checks. They need just the carry flag. */
14020 if (GET_CODE (op0) == MINUS
14021 && rtx_equal_p (op1, XEXP (op0, 0)))
14022 return CCCmode;
14023 else
14024 return CCmode;
14025 /* Codes possibly doable only with sign flag when
14026 comparing against zero. */
14027 case GE: /* SF=OF or SF=0 */
14028 case LT: /* SF<>OF or SF=1 */
14029 if (op1 == const0_rtx)
14030 return CCGOCmode;
14031 else
14032 /* For other cases Carry flag is not required. */
14033 return CCGCmode;
14034 /* Codes doable only with sign flag when comparing
14035 against zero, but we miss jump instruction for it
14036 so we need to use relational tests against overflow
14037 that thus needs to be zero. */
14038 case GT: /* ZF=0 & SF=OF */
14039 case LE: /* ZF=1 | SF<>OF */
14040 if (op1 == const0_rtx)
14041 return CCNOmode;
14042 else
14043 return CCGCmode;
14044 /* strcmp pattern do (use flags) and combine may ask us for proper
14045 mode. */
14046 case USE:
14047 return CCmode;
14048 default:
14049 gcc_unreachable ();
14053 /* Return the fixed registers used for condition codes. */
14055 static bool
14056 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14058 *p1 = FLAGS_REG;
14059 *p2 = FPSR_REG;
14060 return true;
14063 /* If two condition code modes are compatible, return a condition code
14064 mode which is compatible with both. Otherwise, return
14065 VOIDmode. */
14067 static enum machine_mode
14068 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14070 if (m1 == m2)
14071 return m1;
14073 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14074 return VOIDmode;
14076 if ((m1 == CCGCmode && m2 == CCGOCmode)
14077 || (m1 == CCGOCmode && m2 == CCGCmode))
14078 return CCGCmode;
14080 switch (m1)
14082 default:
14083 gcc_unreachable ();
14085 case CCmode:
14086 case CCGCmode:
14087 case CCGOCmode:
14088 case CCNOmode:
14089 case CCAmode:
14090 case CCCmode:
14091 case CCOmode:
14092 case CCSmode:
14093 case CCZmode:
14094 switch (m2)
14096 default:
14097 return VOIDmode;
14099 case CCmode:
14100 case CCGCmode:
14101 case CCGOCmode:
14102 case CCNOmode:
14103 case CCAmode:
14104 case CCCmode:
14105 case CCOmode:
14106 case CCSmode:
14107 case CCZmode:
14108 return CCmode;
14111 case CCFPmode:
14112 case CCFPUmode:
14113 /* These are only compatible with themselves, which we already
14114 checked above. */
14115 return VOIDmode;
14119 /* Split comparison code CODE into comparisons we can do using branch
14120 instructions. BYPASS_CODE is comparison code for branch that will
14121 branch around FIRST_CODE and SECOND_CODE. If some of branches
14122 is not required, set value to UNKNOWN.
14123 We never require more than two branches. */
14125 void
14126 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14127 enum rtx_code *first_code,
14128 enum rtx_code *second_code)
14130 *first_code = code;
14131 *bypass_code = UNKNOWN;
14132 *second_code = UNKNOWN;
14134 /* The fcomi comparison sets flags as follows:
14136 cmp ZF PF CF
14137 > 0 0 0
14138 < 0 0 1
14139 = 1 0 0
14140 un 1 1 1 */
14142 switch (code)
14144 case GT: /* GTU - CF=0 & ZF=0 */
14145 case GE: /* GEU - CF=0 */
14146 case ORDERED: /* PF=0 */
14147 case UNORDERED: /* PF=1 */
14148 case UNEQ: /* EQ - ZF=1 */
14149 case UNLT: /* LTU - CF=1 */
14150 case UNLE: /* LEU - CF=1 | ZF=1 */
14151 case LTGT: /* EQ - ZF=0 */
14152 break;
14153 case LT: /* LTU - CF=1 - fails on unordered */
14154 *first_code = UNLT;
14155 *bypass_code = UNORDERED;
14156 break;
14157 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14158 *first_code = UNLE;
14159 *bypass_code = UNORDERED;
14160 break;
14161 case EQ: /* EQ - ZF=1 - fails on unordered */
14162 *first_code = UNEQ;
14163 *bypass_code = UNORDERED;
14164 break;
14165 case NE: /* NE - ZF=0 - fails on unordered */
14166 *first_code = LTGT;
14167 *second_code = UNORDERED;
14168 break;
14169 case UNGE: /* GEU - CF=0 - fails on unordered */
14170 *first_code = GE;
14171 *second_code = UNORDERED;
14172 break;
14173 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14174 *first_code = GT;
14175 *second_code = UNORDERED;
14176 break;
14177 default:
14178 gcc_unreachable ();
14180 if (!TARGET_IEEE_FP)
14182 *second_code = UNKNOWN;
14183 *bypass_code = UNKNOWN;
14187 /* Return cost of comparison done fcom + arithmetics operations on AX.
14188 All following functions do use number of instructions as a cost metrics.
14189 In future this should be tweaked to compute bytes for optimize_size and
14190 take into account performance of various instructions on various CPUs. */
14191 static int
14192 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14194 if (!TARGET_IEEE_FP)
14195 return 4;
14196 /* The cost of code output by ix86_expand_fp_compare. */
14197 switch (code)
14199 case UNLE:
14200 case UNLT:
14201 case LTGT:
14202 case GT:
14203 case GE:
14204 case UNORDERED:
14205 case ORDERED:
14206 case UNEQ:
14207 return 4;
14208 break;
14209 case LT:
14210 case NE:
14211 case EQ:
14212 case UNGE:
14213 return 5;
14214 break;
14215 case LE:
14216 case UNGT:
14217 return 6;
14218 break;
14219 default:
14220 gcc_unreachable ();
14224 /* Return cost of comparison done using fcomi operation.
14225 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14226 static int
14227 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14229 enum rtx_code bypass_code, first_code, second_code;
14230 /* Return arbitrarily high cost when instruction is not supported - this
14231 prevents gcc from using it. */
14232 if (!TARGET_CMOVE)
14233 return 1024;
14234 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14235 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14238 /* Return cost of comparison done using sahf operation.
14239 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14240 static int
14241 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14243 enum rtx_code bypass_code, first_code, second_code;
14244 /* Return arbitrarily high cost when instruction is not preferred - this
14245 avoids gcc from using it. */
14246 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14247 return 1024;
14248 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14249 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14252 /* Compute cost of the comparison done using any method.
14253 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14254 static int
14255 ix86_fp_comparison_cost (enum rtx_code code)
14257 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14258 int min;
14260 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14261 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14263 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14264 if (min > sahf_cost)
14265 min = sahf_cost;
14266 if (min > fcomi_cost)
14267 min = fcomi_cost;
14268 return min;
14271 /* Return true if we should use an FCOMI instruction for this
14272 fp comparison. */
14275 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14277 enum rtx_code swapped_code = swap_condition (code);
14279 return ((ix86_fp_comparison_cost (code)
14280 == ix86_fp_comparison_fcomi_cost (code))
14281 || (ix86_fp_comparison_cost (swapped_code)
14282 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14285 /* Swap, force into registers, or otherwise massage the two operands
14286 to a fp comparison. The operands are updated in place; the new
14287 comparison code is returned. */
14289 static enum rtx_code
14290 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14292 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14293 rtx op0 = *pop0, op1 = *pop1;
14294 enum machine_mode op_mode = GET_MODE (op0);
14295 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14297 /* All of the unordered compare instructions only work on registers.
14298 The same is true of the fcomi compare instructions. The XFmode
14299 compare instructions require registers except when comparing
14300 against zero or when converting operand 1 from fixed point to
14301 floating point. */
14303 if (!is_sse
14304 && (fpcmp_mode == CCFPUmode
14305 || (op_mode == XFmode
14306 && ! (standard_80387_constant_p (op0) == 1
14307 || standard_80387_constant_p (op1) == 1)
14308 && GET_CODE (op1) != FLOAT)
14309 || ix86_use_fcomi_compare (code)))
14311 op0 = force_reg (op_mode, op0);
14312 op1 = force_reg (op_mode, op1);
14314 else
14316 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14317 things around if they appear profitable, otherwise force op0
14318 into a register. */
14320 if (standard_80387_constant_p (op0) == 0
14321 || (MEM_P (op0)
14322 && ! (standard_80387_constant_p (op1) == 0
14323 || MEM_P (op1))))
14325 rtx tmp;
14326 tmp = op0, op0 = op1, op1 = tmp;
14327 code = swap_condition (code);
14330 if (!REG_P (op0))
14331 op0 = force_reg (op_mode, op0);
14333 if (CONSTANT_P (op1))
14335 int tmp = standard_80387_constant_p (op1);
14336 if (tmp == 0)
14337 op1 = validize_mem (force_const_mem (op_mode, op1));
14338 else if (tmp == 1)
14340 if (TARGET_CMOVE)
14341 op1 = force_reg (op_mode, op1);
14343 else
14344 op1 = force_reg (op_mode, op1);
14348 /* Try to rearrange the comparison to make it cheaper. */
14349 if (ix86_fp_comparison_cost (code)
14350 > ix86_fp_comparison_cost (swap_condition (code))
14351 && (REG_P (op1) || can_create_pseudo_p ()))
14353 rtx tmp;
14354 tmp = op0, op0 = op1, op1 = tmp;
14355 code = swap_condition (code);
14356 if (!REG_P (op0))
14357 op0 = force_reg (op_mode, op0);
14360 *pop0 = op0;
14361 *pop1 = op1;
14362 return code;
14365 /* Convert comparison codes we use to represent FP comparison to integer
14366 code that will result in proper branch. Return UNKNOWN if no such code
14367 is available. */
14369 enum rtx_code
14370 ix86_fp_compare_code_to_integer (enum rtx_code code)
14372 switch (code)
14374 case GT:
14375 return GTU;
14376 case GE:
14377 return GEU;
14378 case ORDERED:
14379 case UNORDERED:
14380 return code;
14381 break;
14382 case UNEQ:
14383 return EQ;
14384 break;
14385 case UNLT:
14386 return LTU;
14387 break;
14388 case UNLE:
14389 return LEU;
14390 break;
14391 case LTGT:
14392 return NE;
14393 break;
14394 default:
14395 return UNKNOWN;
14399 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14401 static rtx
14402 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14403 rtx *second_test, rtx *bypass_test)
14405 enum machine_mode fpcmp_mode, intcmp_mode;
14406 rtx tmp, tmp2;
14407 int cost = ix86_fp_comparison_cost (code);
14408 enum rtx_code bypass_code, first_code, second_code;
14410 fpcmp_mode = ix86_fp_compare_mode (code);
14411 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14413 if (second_test)
14414 *second_test = NULL_RTX;
14415 if (bypass_test)
14416 *bypass_test = NULL_RTX;
14418 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14420 /* Do fcomi/sahf based test when profitable. */
14421 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14422 && (bypass_code == UNKNOWN || bypass_test)
14423 && (second_code == UNKNOWN || second_test))
14425 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14426 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14427 tmp);
14428 if (TARGET_CMOVE)
14429 emit_insn (tmp);
14430 else
14432 gcc_assert (TARGET_SAHF);
14434 if (!scratch)
14435 scratch = gen_reg_rtx (HImode);
14436 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14438 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14441 /* The FP codes work out to act like unsigned. */
14442 intcmp_mode = fpcmp_mode;
14443 code = first_code;
14444 if (bypass_code != UNKNOWN)
14445 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14446 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14447 const0_rtx);
14448 if (second_code != UNKNOWN)
14449 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14450 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14451 const0_rtx);
14453 else
14455 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14456 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14457 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14458 if (!scratch)
14459 scratch = gen_reg_rtx (HImode);
14460 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14462 /* In the unordered case, we have to check C2 for NaN's, which
14463 doesn't happen to work out to anything nice combination-wise.
14464 So do some bit twiddling on the value we've got in AH to come
14465 up with an appropriate set of condition codes. */
14467 intcmp_mode = CCNOmode;
14468 switch (code)
14470 case GT:
14471 case UNGT:
14472 if (code == GT || !TARGET_IEEE_FP)
14474 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14475 code = EQ;
14477 else
14479 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14480 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14481 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14482 intcmp_mode = CCmode;
14483 code = GEU;
14485 break;
14486 case LT:
14487 case UNLT:
14488 if (code == LT && TARGET_IEEE_FP)
14490 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14491 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14492 intcmp_mode = CCmode;
14493 code = EQ;
14495 else
14497 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14498 code = NE;
14500 break;
14501 case GE:
14502 case UNGE:
14503 if (code == GE || !TARGET_IEEE_FP)
14505 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14506 code = EQ;
14508 else
14510 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14511 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14512 GEN_INT (0x01)));
14513 code = NE;
14515 break;
14516 case LE:
14517 case UNLE:
14518 if (code == LE && TARGET_IEEE_FP)
14520 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14521 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14522 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14523 intcmp_mode = CCmode;
14524 code = LTU;
14526 else
14528 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14529 code = NE;
14531 break;
14532 case EQ:
14533 case UNEQ:
14534 if (code == EQ && TARGET_IEEE_FP)
14536 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14537 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14538 intcmp_mode = CCmode;
14539 code = EQ;
14541 else
14543 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14544 code = NE;
14545 break;
14547 break;
14548 case NE:
14549 case LTGT:
14550 if (code == NE && TARGET_IEEE_FP)
14552 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14553 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14554 GEN_INT (0x40)));
14555 code = NE;
14557 else
14559 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14560 code = EQ;
14562 break;
14564 case UNORDERED:
14565 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14566 code = NE;
14567 break;
14568 case ORDERED:
14569 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14570 code = EQ;
14571 break;
14573 default:
14574 gcc_unreachable ();
14578 /* Return the test that should be put into the flags user, i.e.
14579 the bcc, scc, or cmov instruction. */
14580 return gen_rtx_fmt_ee (code, VOIDmode,
14581 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14582 const0_rtx);
14586 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14588 rtx op0, op1, ret;
14589 op0 = ix86_compare_op0;
14590 op1 = ix86_compare_op1;
14592 if (second_test)
14593 *second_test = NULL_RTX;
14594 if (bypass_test)
14595 *bypass_test = NULL_RTX;
14597 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14598 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14600 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14602 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14603 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14604 second_test, bypass_test);
14606 else
14607 ret = ix86_expand_int_compare (code, op0, op1);
14609 return ret;
14612 /* Return true if the CODE will result in nontrivial jump sequence. */
14613 bool
14614 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14616 enum rtx_code bypass_code, first_code, second_code;
14617 if (!TARGET_CMOVE)
14618 return true;
14619 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14620 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14623 void
14624 ix86_expand_branch (enum rtx_code code, rtx label)
14626 rtx tmp;
14628 switch (GET_MODE (ix86_compare_op0))
14630 case QImode:
14631 case HImode:
14632 case SImode:
14633 simple:
14634 tmp = ix86_expand_compare (code, NULL, NULL);
14635 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14636 gen_rtx_LABEL_REF (VOIDmode, label),
14637 pc_rtx);
14638 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14639 return;
14641 case SFmode:
14642 case DFmode:
14643 case XFmode:
14645 rtvec vec;
14646 int use_fcomi;
14647 enum rtx_code bypass_code, first_code, second_code;
14649 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14650 &ix86_compare_op1);
14652 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14654 /* Check whether we will use the natural sequence with one jump. If
14655 so, we can expand jump early. Otherwise delay expansion by
14656 creating compound insn to not confuse optimizers. */
14657 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14659 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14660 gen_rtx_LABEL_REF (VOIDmode, label),
14661 pc_rtx, NULL_RTX, NULL_RTX);
14663 else
14665 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14666 ix86_compare_op0, ix86_compare_op1);
14667 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14668 gen_rtx_LABEL_REF (VOIDmode, label),
14669 pc_rtx);
14670 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14672 use_fcomi = ix86_use_fcomi_compare (code);
14673 vec = rtvec_alloc (3 + !use_fcomi);
14674 RTVEC_ELT (vec, 0) = tmp;
14675 RTVEC_ELT (vec, 1)
14676 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14677 RTVEC_ELT (vec, 2)
14678 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14679 if (! use_fcomi)
14680 RTVEC_ELT (vec, 3)
14681 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14683 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14685 return;
14688 case DImode:
14689 if (TARGET_64BIT)
14690 goto simple;
14691 case TImode:
14692 /* Expand DImode branch into multiple compare+branch. */
14694 rtx lo[2], hi[2], label2;
14695 enum rtx_code code1, code2, code3;
14696 enum machine_mode submode;
14698 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14700 tmp = ix86_compare_op0;
14701 ix86_compare_op0 = ix86_compare_op1;
14702 ix86_compare_op1 = tmp;
14703 code = swap_condition (code);
14705 if (GET_MODE (ix86_compare_op0) == DImode)
14707 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14708 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14709 submode = SImode;
14711 else
14713 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14714 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14715 submode = DImode;
14718 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14719 avoid two branches. This costs one extra insn, so disable when
14720 optimizing for size. */
14722 if ((code == EQ || code == NE)
14723 && (!optimize_insn_for_size_p ()
14724 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14726 rtx xor0, xor1;
14728 xor1 = hi[0];
14729 if (hi[1] != const0_rtx)
14730 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14731 NULL_RTX, 0, OPTAB_WIDEN);
14733 xor0 = lo[0];
14734 if (lo[1] != const0_rtx)
14735 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14736 NULL_RTX, 0, OPTAB_WIDEN);
14738 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14739 NULL_RTX, 0, OPTAB_WIDEN);
14741 ix86_compare_op0 = tmp;
14742 ix86_compare_op1 = const0_rtx;
14743 ix86_expand_branch (code, label);
14744 return;
14747 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14748 op1 is a constant and the low word is zero, then we can just
14749 examine the high word. Similarly for low word -1 and
14750 less-or-equal-than or greater-than. */
14752 if (CONST_INT_P (hi[1]))
14753 switch (code)
14755 case LT: case LTU: case GE: case GEU:
14756 if (lo[1] == const0_rtx)
14758 ix86_compare_op0 = hi[0];
14759 ix86_compare_op1 = hi[1];
14760 ix86_expand_branch (code, label);
14761 return;
14763 break;
14764 case LE: case LEU: case GT: case GTU:
14765 if (lo[1] == constm1_rtx)
14767 ix86_compare_op0 = hi[0];
14768 ix86_compare_op1 = hi[1];
14769 ix86_expand_branch (code, label);
14770 return;
14772 break;
14773 default:
14774 break;
14777 /* Otherwise, we need two or three jumps. */
14779 label2 = gen_label_rtx ();
14781 code1 = code;
14782 code2 = swap_condition (code);
14783 code3 = unsigned_condition (code);
14785 switch (code)
14787 case LT: case GT: case LTU: case GTU:
14788 break;
14790 case LE: code1 = LT; code2 = GT; break;
14791 case GE: code1 = GT; code2 = LT; break;
14792 case LEU: code1 = LTU; code2 = GTU; break;
14793 case GEU: code1 = GTU; code2 = LTU; break;
14795 case EQ: code1 = UNKNOWN; code2 = NE; break;
14796 case NE: code2 = UNKNOWN; break;
14798 default:
14799 gcc_unreachable ();
14803 * a < b =>
14804 * if (hi(a) < hi(b)) goto true;
14805 * if (hi(a) > hi(b)) goto false;
14806 * if (lo(a) < lo(b)) goto true;
14807 * false:
14810 ix86_compare_op0 = hi[0];
14811 ix86_compare_op1 = hi[1];
14813 if (code1 != UNKNOWN)
14814 ix86_expand_branch (code1, label);
14815 if (code2 != UNKNOWN)
14816 ix86_expand_branch (code2, label2);
14818 ix86_compare_op0 = lo[0];
14819 ix86_compare_op1 = lo[1];
14820 ix86_expand_branch (code3, label);
14822 if (code2 != UNKNOWN)
14823 emit_label (label2);
14824 return;
14827 default:
14828 /* If we have already emitted a compare insn, go straight to simple.
14829 ix86_expand_compare won't emit anything if ix86_compare_emitted
14830 is non NULL. */
14831 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14832 goto simple;
14836 /* Split branch based on floating point condition. */
14837 void
14838 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14839 rtx target1, rtx target2, rtx tmp, rtx pushed)
14841 rtx second, bypass;
14842 rtx label = NULL_RTX;
14843 rtx condition;
14844 int bypass_probability = -1, second_probability = -1, probability = -1;
14845 rtx i;
14847 if (target2 != pc_rtx)
14849 rtx tmp = target2;
14850 code = reverse_condition_maybe_unordered (code);
14851 target2 = target1;
14852 target1 = tmp;
14855 condition = ix86_expand_fp_compare (code, op1, op2,
14856 tmp, &second, &bypass);
14858 /* Remove pushed operand from stack. */
14859 if (pushed)
14860 ix86_free_from_memory (GET_MODE (pushed));
14862 if (split_branch_probability >= 0)
14864 /* Distribute the probabilities across the jumps.
14865 Assume the BYPASS and SECOND to be always test
14866 for UNORDERED. */
14867 probability = split_branch_probability;
14869 /* Value of 1 is low enough to make no need for probability
14870 to be updated. Later we may run some experiments and see
14871 if unordered values are more frequent in practice. */
14872 if (bypass)
14873 bypass_probability = 1;
14874 if (second)
14875 second_probability = 1;
14877 if (bypass != NULL_RTX)
14879 label = gen_label_rtx ();
14880 i = emit_jump_insn (gen_rtx_SET
14881 (VOIDmode, pc_rtx,
14882 gen_rtx_IF_THEN_ELSE (VOIDmode,
14883 bypass,
14884 gen_rtx_LABEL_REF (VOIDmode,
14885 label),
14886 pc_rtx)));
14887 if (bypass_probability >= 0)
14888 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
14890 i = emit_jump_insn (gen_rtx_SET
14891 (VOIDmode, pc_rtx,
14892 gen_rtx_IF_THEN_ELSE (VOIDmode,
14893 condition, target1, target2)));
14894 if (probability >= 0)
14895 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
14896 if (second != NULL_RTX)
14898 i = emit_jump_insn (gen_rtx_SET
14899 (VOIDmode, pc_rtx,
14900 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14901 target2)));
14902 if (second_probability >= 0)
14903 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
14905 if (label != NULL_RTX)
14906 emit_label (label);
14910 ix86_expand_setcc (enum rtx_code code, rtx dest)
14912 rtx ret, tmp, tmpreg, equiv;
14913 rtx second_test, bypass_test;
14915 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14916 return 0; /* FAIL */
14918 gcc_assert (GET_MODE (dest) == QImode);
14920 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14921 PUT_MODE (ret, QImode);
14923 tmp = dest;
14924 tmpreg = dest;
14926 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14927 if (bypass_test || second_test)
14929 rtx test = second_test;
14930 int bypass = 0;
14931 rtx tmp2 = gen_reg_rtx (QImode);
14932 if (bypass_test)
14934 gcc_assert (!second_test);
14935 test = bypass_test;
14936 bypass = 1;
14937 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14939 PUT_MODE (test, QImode);
14940 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14942 if (bypass)
14943 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14944 else
14945 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14948 /* Attach a REG_EQUAL note describing the comparison result. */
14949 if (ix86_compare_op0 && ix86_compare_op1)
14951 equiv = simplify_gen_relational (code, QImode,
14952 GET_MODE (ix86_compare_op0),
14953 ix86_compare_op0, ix86_compare_op1);
14954 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14957 return 1; /* DONE */
14960 /* Expand comparison setting or clearing carry flag. Return true when
14961 successful and set pop for the operation. */
14962 static bool
14963 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14965 enum machine_mode mode =
14966 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14968 /* Do not handle DImode compares that go through special path. */
14969 if (mode == (TARGET_64BIT ? TImode : DImode))
14970 return false;
14972 if (SCALAR_FLOAT_MODE_P (mode))
14974 rtx second_test = NULL, bypass_test = NULL;
14975 rtx compare_op, compare_seq;
14977 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14979 /* Shortcut: following common codes never translate
14980 into carry flag compares. */
14981 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14982 || code == ORDERED || code == UNORDERED)
14983 return false;
14985 /* These comparisons require zero flag; swap operands so they won't. */
14986 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14987 && !TARGET_IEEE_FP)
14989 rtx tmp = op0;
14990 op0 = op1;
14991 op1 = tmp;
14992 code = swap_condition (code);
14995 /* Try to expand the comparison and verify that we end up with
14996 carry flag based comparison. This fails to be true only when
14997 we decide to expand comparison using arithmetic that is not
14998 too common scenario. */
14999 start_sequence ();
15000 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15001 &second_test, &bypass_test);
15002 compare_seq = get_insns ();
15003 end_sequence ();
15005 if (second_test || bypass_test)
15006 return false;
15008 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15009 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15010 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15011 else
15012 code = GET_CODE (compare_op);
15014 if (code != LTU && code != GEU)
15015 return false;
15017 emit_insn (compare_seq);
15018 *pop = compare_op;
15019 return true;
15022 if (!INTEGRAL_MODE_P (mode))
15023 return false;
15025 switch (code)
15027 case LTU:
15028 case GEU:
15029 break;
15031 /* Convert a==0 into (unsigned)a<1. */
15032 case EQ:
15033 case NE:
15034 if (op1 != const0_rtx)
15035 return false;
15036 op1 = const1_rtx;
15037 code = (code == EQ ? LTU : GEU);
15038 break;
15040 /* Convert a>b into b<a or a>=b-1. */
15041 case GTU:
15042 case LEU:
15043 if (CONST_INT_P (op1))
15045 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15046 /* Bail out on overflow. We still can swap operands but that
15047 would force loading of the constant into register. */
15048 if (op1 == const0_rtx
15049 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15050 return false;
15051 code = (code == GTU ? GEU : LTU);
15053 else
15055 rtx tmp = op1;
15056 op1 = op0;
15057 op0 = tmp;
15058 code = (code == GTU ? LTU : GEU);
15060 break;
15062 /* Convert a>=0 into (unsigned)a<0x80000000. */
15063 case LT:
15064 case GE:
15065 if (mode == DImode || op1 != const0_rtx)
15066 return false;
15067 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15068 code = (code == LT ? GEU : LTU);
15069 break;
15070 case LE:
15071 case GT:
15072 if (mode == DImode || op1 != constm1_rtx)
15073 return false;
15074 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15075 code = (code == LE ? GEU : LTU);
15076 break;
15078 default:
15079 return false;
15081 /* Swapping operands may cause constant to appear as first operand. */
15082 if (!nonimmediate_operand (op0, VOIDmode))
15084 if (!can_create_pseudo_p ())
15085 return false;
15086 op0 = force_reg (mode, op0);
15088 ix86_compare_op0 = op0;
15089 ix86_compare_op1 = op1;
15090 *pop = ix86_expand_compare (code, NULL, NULL);
15091 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15092 return true;
15096 ix86_expand_int_movcc (rtx operands[])
15098 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15099 rtx compare_seq, compare_op;
15100 rtx second_test, bypass_test;
15101 enum machine_mode mode = GET_MODE (operands[0]);
15102 bool sign_bit_compare_p = false;;
15104 start_sequence ();
15105 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15106 compare_seq = get_insns ();
15107 end_sequence ();
15109 compare_code = GET_CODE (compare_op);
15111 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15112 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15113 sign_bit_compare_p = true;
15115 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15116 HImode insns, we'd be swallowed in word prefix ops. */
15118 if ((mode != HImode || TARGET_FAST_PREFIX)
15119 && (mode != (TARGET_64BIT ? TImode : DImode))
15120 && CONST_INT_P (operands[2])
15121 && CONST_INT_P (operands[3]))
15123 rtx out = operands[0];
15124 HOST_WIDE_INT ct = INTVAL (operands[2]);
15125 HOST_WIDE_INT cf = INTVAL (operands[3]);
15126 HOST_WIDE_INT diff;
15128 diff = ct - cf;
15129 /* Sign bit compares are better done using shifts than we do by using
15130 sbb. */
15131 if (sign_bit_compare_p
15132 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15133 ix86_compare_op1, &compare_op))
15135 /* Detect overlap between destination and compare sources. */
15136 rtx tmp = out;
15138 if (!sign_bit_compare_p)
15140 bool fpcmp = false;
15142 compare_code = GET_CODE (compare_op);
15144 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15145 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15147 fpcmp = true;
15148 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15151 /* To simplify rest of code, restrict to the GEU case. */
15152 if (compare_code == LTU)
15154 HOST_WIDE_INT tmp = ct;
15155 ct = cf;
15156 cf = tmp;
15157 compare_code = reverse_condition (compare_code);
15158 code = reverse_condition (code);
15160 else
15162 if (fpcmp)
15163 PUT_CODE (compare_op,
15164 reverse_condition_maybe_unordered
15165 (GET_CODE (compare_op)));
15166 else
15167 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15169 diff = ct - cf;
15171 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15172 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15173 tmp = gen_reg_rtx (mode);
15175 if (mode == DImode)
15176 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15177 else
15178 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15180 else
15182 if (code == GT || code == GE)
15183 code = reverse_condition (code);
15184 else
15186 HOST_WIDE_INT tmp = ct;
15187 ct = cf;
15188 cf = tmp;
15189 diff = ct - cf;
15191 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15192 ix86_compare_op1, VOIDmode, 0, -1);
15195 if (diff == 1)
15198 * cmpl op0,op1
15199 * sbbl dest,dest
15200 * [addl dest, ct]
15202 * Size 5 - 8.
15204 if (ct)
15205 tmp = expand_simple_binop (mode, PLUS,
15206 tmp, GEN_INT (ct),
15207 copy_rtx (tmp), 1, OPTAB_DIRECT);
15209 else if (cf == -1)
15212 * cmpl op0,op1
15213 * sbbl dest,dest
15214 * orl $ct, dest
15216 * Size 8.
15218 tmp = expand_simple_binop (mode, IOR,
15219 tmp, GEN_INT (ct),
15220 copy_rtx (tmp), 1, OPTAB_DIRECT);
15222 else if (diff == -1 && ct)
15225 * cmpl op0,op1
15226 * sbbl dest,dest
15227 * notl dest
15228 * [addl dest, cf]
15230 * Size 8 - 11.
15232 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15233 if (cf)
15234 tmp = expand_simple_binop (mode, PLUS,
15235 copy_rtx (tmp), GEN_INT (cf),
15236 copy_rtx (tmp), 1, OPTAB_DIRECT);
15238 else
15241 * cmpl op0,op1
15242 * sbbl dest,dest
15243 * [notl dest]
15244 * andl cf - ct, dest
15245 * [addl dest, ct]
15247 * Size 8 - 11.
15250 if (cf == 0)
15252 cf = ct;
15253 ct = 0;
15254 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15257 tmp = expand_simple_binop (mode, AND,
15258 copy_rtx (tmp),
15259 gen_int_mode (cf - ct, mode),
15260 copy_rtx (tmp), 1, OPTAB_DIRECT);
15261 if (ct)
15262 tmp = expand_simple_binop (mode, PLUS,
15263 copy_rtx (tmp), GEN_INT (ct),
15264 copy_rtx (tmp), 1, OPTAB_DIRECT);
15267 if (!rtx_equal_p (tmp, out))
15268 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15270 return 1; /* DONE */
15273 if (diff < 0)
15275 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15277 HOST_WIDE_INT tmp;
15278 tmp = ct, ct = cf, cf = tmp;
15279 diff = -diff;
15281 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15283 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15285 /* We may be reversing unordered compare to normal compare, that
15286 is not valid in general (we may convert non-trapping condition
15287 to trapping one), however on i386 we currently emit all
15288 comparisons unordered. */
15289 compare_code = reverse_condition_maybe_unordered (compare_code);
15290 code = reverse_condition_maybe_unordered (code);
15292 else
15294 compare_code = reverse_condition (compare_code);
15295 code = reverse_condition (code);
15299 compare_code = UNKNOWN;
15300 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15301 && CONST_INT_P (ix86_compare_op1))
15303 if (ix86_compare_op1 == const0_rtx
15304 && (code == LT || code == GE))
15305 compare_code = code;
15306 else if (ix86_compare_op1 == constm1_rtx)
15308 if (code == LE)
15309 compare_code = LT;
15310 else if (code == GT)
15311 compare_code = GE;
15315 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15316 if (compare_code != UNKNOWN
15317 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15318 && (cf == -1 || ct == -1))
15320 /* If lea code below could be used, only optimize
15321 if it results in a 2 insn sequence. */
15323 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15324 || diff == 3 || diff == 5 || diff == 9)
15325 || (compare_code == LT && ct == -1)
15326 || (compare_code == GE && cf == -1))
15329 * notl op1 (if necessary)
15330 * sarl $31, op1
15331 * orl cf, op1
15333 if (ct != -1)
15335 cf = ct;
15336 ct = -1;
15337 code = reverse_condition (code);
15340 out = emit_store_flag (out, code, ix86_compare_op0,
15341 ix86_compare_op1, VOIDmode, 0, -1);
15343 out = expand_simple_binop (mode, IOR,
15344 out, GEN_INT (cf),
15345 out, 1, OPTAB_DIRECT);
15346 if (out != operands[0])
15347 emit_move_insn (operands[0], out);
15349 return 1; /* DONE */
15354 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15355 || diff == 3 || diff == 5 || diff == 9)
15356 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15357 && (mode != DImode
15358 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15361 * xorl dest,dest
15362 * cmpl op1,op2
15363 * setcc dest
15364 * lea cf(dest*(ct-cf)),dest
15366 * Size 14.
15368 * This also catches the degenerate setcc-only case.
15371 rtx tmp;
15372 int nops;
15374 out = emit_store_flag (out, code, ix86_compare_op0,
15375 ix86_compare_op1, VOIDmode, 0, 1);
15377 nops = 0;
15378 /* On x86_64 the lea instruction operates on Pmode, so we need
15379 to get arithmetics done in proper mode to match. */
15380 if (diff == 1)
15381 tmp = copy_rtx (out);
15382 else
15384 rtx out1;
15385 out1 = copy_rtx (out);
15386 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15387 nops++;
15388 if (diff & 1)
15390 tmp = gen_rtx_PLUS (mode, tmp, out1);
15391 nops++;
15394 if (cf != 0)
15396 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15397 nops++;
15399 if (!rtx_equal_p (tmp, out))
15401 if (nops == 1)
15402 out = force_operand (tmp, copy_rtx (out));
15403 else
15404 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15406 if (!rtx_equal_p (out, operands[0]))
15407 emit_move_insn (operands[0], copy_rtx (out));
15409 return 1; /* DONE */
15413 * General case: Jumpful:
15414 * xorl dest,dest cmpl op1, op2
15415 * cmpl op1, op2 movl ct, dest
15416 * setcc dest jcc 1f
15417 * decl dest movl cf, dest
15418 * andl (cf-ct),dest 1:
15419 * addl ct,dest
15421 * Size 20. Size 14.
15423 * This is reasonably steep, but branch mispredict costs are
15424 * high on modern cpus, so consider failing only if optimizing
15425 * for space.
15428 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15429 && BRANCH_COST (optimize_insn_for_speed_p (),
15430 false) >= 2)
15432 if (cf == 0)
15434 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15436 cf = ct;
15437 ct = 0;
15439 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15441 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15443 /* We may be reversing unordered compare to normal compare,
15444 that is not valid in general (we may convert non-trapping
15445 condition to trapping one), however on i386 we currently
15446 emit all comparisons unordered. */
15447 code = reverse_condition_maybe_unordered (code);
15449 else
15451 code = reverse_condition (code);
15452 if (compare_code != UNKNOWN)
15453 compare_code = reverse_condition (compare_code);
15457 if (compare_code != UNKNOWN)
15459 /* notl op1 (if needed)
15460 sarl $31, op1
15461 andl (cf-ct), op1
15462 addl ct, op1
15464 For x < 0 (resp. x <= -1) there will be no notl,
15465 so if possible swap the constants to get rid of the
15466 complement.
15467 True/false will be -1/0 while code below (store flag
15468 followed by decrement) is 0/-1, so the constants need
15469 to be exchanged once more. */
15471 if (compare_code == GE || !cf)
15473 code = reverse_condition (code);
15474 compare_code = LT;
15476 else
15478 HOST_WIDE_INT tmp = cf;
15479 cf = ct;
15480 ct = tmp;
15483 out = emit_store_flag (out, code, ix86_compare_op0,
15484 ix86_compare_op1, VOIDmode, 0, -1);
15486 else
15488 out = emit_store_flag (out, code, ix86_compare_op0,
15489 ix86_compare_op1, VOIDmode, 0, 1);
15491 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15492 copy_rtx (out), 1, OPTAB_DIRECT);
15495 out = expand_simple_binop (mode, AND, copy_rtx (out),
15496 gen_int_mode (cf - ct, mode),
15497 copy_rtx (out), 1, OPTAB_DIRECT);
15498 if (ct)
15499 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15500 copy_rtx (out), 1, OPTAB_DIRECT);
15501 if (!rtx_equal_p (out, operands[0]))
15502 emit_move_insn (operands[0], copy_rtx (out));
15504 return 1; /* DONE */
15508 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15510 /* Try a few things more with specific constants and a variable. */
15512 optab op;
15513 rtx var, orig_out, out, tmp;
15515 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15516 return 0; /* FAIL */
15518 /* If one of the two operands is an interesting constant, load a
15519 constant with the above and mask it in with a logical operation. */
15521 if (CONST_INT_P (operands[2]))
15523 var = operands[3];
15524 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15525 operands[3] = constm1_rtx, op = and_optab;
15526 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15527 operands[3] = const0_rtx, op = ior_optab;
15528 else
15529 return 0; /* FAIL */
15531 else if (CONST_INT_P (operands[3]))
15533 var = operands[2];
15534 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15535 operands[2] = constm1_rtx, op = and_optab;
15536 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15537 operands[2] = const0_rtx, op = ior_optab;
15538 else
15539 return 0; /* FAIL */
15541 else
15542 return 0; /* FAIL */
15544 orig_out = operands[0];
15545 tmp = gen_reg_rtx (mode);
15546 operands[0] = tmp;
15548 /* Recurse to get the constant loaded. */
15549 if (ix86_expand_int_movcc (operands) == 0)
15550 return 0; /* FAIL */
15552 /* Mask in the interesting variable. */
15553 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15554 OPTAB_WIDEN);
15555 if (!rtx_equal_p (out, orig_out))
15556 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15558 return 1; /* DONE */
15562 * For comparison with above,
15564 * movl cf,dest
15565 * movl ct,tmp
15566 * cmpl op1,op2
15567 * cmovcc tmp,dest
15569 * Size 15.
15572 if (! nonimmediate_operand (operands[2], mode))
15573 operands[2] = force_reg (mode, operands[2]);
15574 if (! nonimmediate_operand (operands[3], mode))
15575 operands[3] = force_reg (mode, operands[3]);
15577 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15579 rtx tmp = gen_reg_rtx (mode);
15580 emit_move_insn (tmp, operands[3]);
15581 operands[3] = tmp;
15583 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15585 rtx tmp = gen_reg_rtx (mode);
15586 emit_move_insn (tmp, operands[2]);
15587 operands[2] = tmp;
15590 if (! register_operand (operands[2], VOIDmode)
15591 && (mode == QImode
15592 || ! register_operand (operands[3], VOIDmode)))
15593 operands[2] = force_reg (mode, operands[2]);
15595 if (mode == QImode
15596 && ! register_operand (operands[3], VOIDmode))
15597 operands[3] = force_reg (mode, operands[3]);
15599 emit_insn (compare_seq);
15600 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15601 gen_rtx_IF_THEN_ELSE (mode,
15602 compare_op, operands[2],
15603 operands[3])));
15604 if (bypass_test)
15605 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15606 gen_rtx_IF_THEN_ELSE (mode,
15607 bypass_test,
15608 copy_rtx (operands[3]),
15609 copy_rtx (operands[0]))));
15610 if (second_test)
15611 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15612 gen_rtx_IF_THEN_ELSE (mode,
15613 second_test,
15614 copy_rtx (operands[2]),
15615 copy_rtx (operands[0]))));
15617 return 1; /* DONE */
15620 /* Swap, force into registers, or otherwise massage the two operands
15621 to an sse comparison with a mask result. Thus we differ a bit from
15622 ix86_prepare_fp_compare_args which expects to produce a flags result.
15624 The DEST operand exists to help determine whether to commute commutative
15625 operators. The POP0/POP1 operands are updated in place. The new
15626 comparison code is returned, or UNKNOWN if not implementable. */
15628 static enum rtx_code
15629 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15630 rtx *pop0, rtx *pop1)
15632 rtx tmp;
15634 switch (code)
15636 case LTGT:
15637 case UNEQ:
15638 /* We have no LTGT as an operator. We could implement it with
15639 NE & ORDERED, but this requires an extra temporary. It's
15640 not clear that it's worth it. */
15641 return UNKNOWN;
15643 case LT:
15644 case LE:
15645 case UNGT:
15646 case UNGE:
15647 /* These are supported directly. */
15648 break;
15650 case EQ:
15651 case NE:
15652 case UNORDERED:
15653 case ORDERED:
15654 /* For commutative operators, try to canonicalize the destination
15655 operand to be first in the comparison - this helps reload to
15656 avoid extra moves. */
15657 if (!dest || !rtx_equal_p (dest, *pop1))
15658 break;
15659 /* FALLTHRU */
15661 case GE:
15662 case GT:
15663 case UNLE:
15664 case UNLT:
15665 /* These are not supported directly. Swap the comparison operands
15666 to transform into something that is supported. */
15667 tmp = *pop0;
15668 *pop0 = *pop1;
15669 *pop1 = tmp;
15670 code = swap_condition (code);
15671 break;
15673 default:
15674 gcc_unreachable ();
15677 return code;
15680 /* Detect conditional moves that exactly match min/max operational
15681 semantics. Note that this is IEEE safe, as long as we don't
15682 interchange the operands.
15684 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15685 and TRUE if the operation is successful and instructions are emitted. */
15687 static bool
15688 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15689 rtx cmp_op1, rtx if_true, rtx if_false)
15691 enum machine_mode mode;
15692 bool is_min;
15693 rtx tmp;
15695 if (code == LT)
15697 else if (code == UNGE)
15699 tmp = if_true;
15700 if_true = if_false;
15701 if_false = tmp;
15703 else
15704 return false;
15706 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15707 is_min = true;
15708 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15709 is_min = false;
15710 else
15711 return false;
15713 mode = GET_MODE (dest);
15715 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15716 but MODE may be a vector mode and thus not appropriate. */
15717 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15719 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15720 rtvec v;
15722 if_true = force_reg (mode, if_true);
15723 v = gen_rtvec (2, if_true, if_false);
15724 tmp = gen_rtx_UNSPEC (mode, v, u);
15726 else
15728 code = is_min ? SMIN : SMAX;
15729 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15732 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15733 return true;
15736 /* Expand an sse vector comparison. Return the register with the result. */
15738 static rtx
15739 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15740 rtx op_true, rtx op_false)
15742 enum machine_mode mode = GET_MODE (dest);
15743 rtx x;
15745 cmp_op0 = force_reg (mode, cmp_op0);
15746 if (!nonimmediate_operand (cmp_op1, mode))
15747 cmp_op1 = force_reg (mode, cmp_op1);
15749 if (optimize
15750 || reg_overlap_mentioned_p (dest, op_true)
15751 || reg_overlap_mentioned_p (dest, op_false))
15752 dest = gen_reg_rtx (mode);
15754 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15755 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15757 return dest;
15760 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15761 operations. This is used for both scalar and vector conditional moves. */
15763 static void
15764 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15766 enum machine_mode mode = GET_MODE (dest);
15767 rtx t2, t3, x;
15769 if (op_false == CONST0_RTX (mode))
15771 op_true = force_reg (mode, op_true);
15772 x = gen_rtx_AND (mode, cmp, op_true);
15773 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15775 else if (op_true == CONST0_RTX (mode))
15777 op_false = force_reg (mode, op_false);
15778 x = gen_rtx_NOT (mode, cmp);
15779 x = gen_rtx_AND (mode, x, op_false);
15780 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15782 else if (TARGET_SSE5)
15784 rtx pcmov = gen_rtx_SET (mode, dest,
15785 gen_rtx_IF_THEN_ELSE (mode, cmp,
15786 op_true,
15787 op_false));
15788 emit_insn (pcmov);
15790 else
15792 op_true = force_reg (mode, op_true);
15793 op_false = force_reg (mode, op_false);
15795 t2 = gen_reg_rtx (mode);
15796 if (optimize)
15797 t3 = gen_reg_rtx (mode);
15798 else
15799 t3 = dest;
15801 x = gen_rtx_AND (mode, op_true, cmp);
15802 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15804 x = gen_rtx_NOT (mode, cmp);
15805 x = gen_rtx_AND (mode, x, op_false);
15806 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15808 x = gen_rtx_IOR (mode, t3, t2);
15809 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15813 /* Expand a floating-point conditional move. Return true if successful. */
15816 ix86_expand_fp_movcc (rtx operands[])
15818 enum machine_mode mode = GET_MODE (operands[0]);
15819 enum rtx_code code = GET_CODE (operands[1]);
15820 rtx tmp, compare_op, second_test, bypass_test;
15822 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15824 enum machine_mode cmode;
15826 /* Since we've no cmove for sse registers, don't force bad register
15827 allocation just to gain access to it. Deny movcc when the
15828 comparison mode doesn't match the move mode. */
15829 cmode = GET_MODE (ix86_compare_op0);
15830 if (cmode == VOIDmode)
15831 cmode = GET_MODE (ix86_compare_op1);
15832 if (cmode != mode)
15833 return 0;
15835 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15836 &ix86_compare_op0,
15837 &ix86_compare_op1);
15838 if (code == UNKNOWN)
15839 return 0;
15841 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15842 ix86_compare_op1, operands[2],
15843 operands[3]))
15844 return 1;
15846 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15847 ix86_compare_op1, operands[2], operands[3]);
15848 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15849 return 1;
15852 /* The floating point conditional move instructions don't directly
15853 support conditions resulting from a signed integer comparison. */
15855 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15857 /* The floating point conditional move instructions don't directly
15858 support signed integer comparisons. */
15860 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15862 gcc_assert (!second_test && !bypass_test);
15863 tmp = gen_reg_rtx (QImode);
15864 ix86_expand_setcc (code, tmp);
15865 code = NE;
15866 ix86_compare_op0 = tmp;
15867 ix86_compare_op1 = const0_rtx;
15868 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15870 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15872 tmp = gen_reg_rtx (mode);
15873 emit_move_insn (tmp, operands[3]);
15874 operands[3] = tmp;
15876 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15878 tmp = gen_reg_rtx (mode);
15879 emit_move_insn (tmp, operands[2]);
15880 operands[2] = tmp;
15883 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15884 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15885 operands[2], operands[3])));
15886 if (bypass_test)
15887 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15888 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15889 operands[3], operands[0])));
15890 if (second_test)
15891 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15892 gen_rtx_IF_THEN_ELSE (mode, second_test,
15893 operands[2], operands[0])));
15895 return 1;
15898 /* Expand a floating-point vector conditional move; a vcond operation
15899 rather than a movcc operation. */
15901 bool
15902 ix86_expand_fp_vcond (rtx operands[])
15904 enum rtx_code code = GET_CODE (operands[3]);
15905 rtx cmp;
15907 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15908 &operands[4], &operands[5]);
15909 if (code == UNKNOWN)
15910 return false;
15912 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15913 operands[5], operands[1], operands[2]))
15914 return true;
15916 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15917 operands[1], operands[2]);
15918 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15919 return true;
15922 /* Expand a signed/unsigned integral vector conditional move. */
15924 bool
15925 ix86_expand_int_vcond (rtx operands[])
15927 enum machine_mode mode = GET_MODE (operands[0]);
15928 enum rtx_code code = GET_CODE (operands[3]);
15929 bool negate = false;
15930 rtx x, cop0, cop1;
15932 cop0 = operands[4];
15933 cop1 = operands[5];
15935 /* SSE5 supports all of the comparisons on all vector int types. */
15936 if (!TARGET_SSE5)
15938 /* Canonicalize the comparison to EQ, GT, GTU. */
15939 switch (code)
15941 case EQ:
15942 case GT:
15943 case GTU:
15944 break;
15946 case NE:
15947 case LE:
15948 case LEU:
15949 code = reverse_condition (code);
15950 negate = true;
15951 break;
15953 case GE:
15954 case GEU:
15955 code = reverse_condition (code);
15956 negate = true;
15957 /* FALLTHRU */
15959 case LT:
15960 case LTU:
15961 code = swap_condition (code);
15962 x = cop0, cop0 = cop1, cop1 = x;
15963 break;
15965 default:
15966 gcc_unreachable ();
15969 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15970 if (mode == V2DImode)
15972 switch (code)
15974 case EQ:
15975 /* SSE4.1 supports EQ. */
15976 if (!TARGET_SSE4_1)
15977 return false;
15978 break;
15980 case GT:
15981 case GTU:
15982 /* SSE4.2 supports GT/GTU. */
15983 if (!TARGET_SSE4_2)
15984 return false;
15985 break;
15987 default:
15988 gcc_unreachable ();
15992 /* Unsigned parallel compare is not supported by the hardware. Play some
15993 tricks to turn this into a signed comparison against 0. */
15994 if (code == GTU)
15996 cop0 = force_reg (mode, cop0);
15998 switch (mode)
16000 case V4SImode:
16001 case V2DImode:
16003 rtx t1, t2, mask;
16005 /* Perform a parallel modulo subtraction. */
16006 t1 = gen_reg_rtx (mode);
16007 emit_insn ((mode == V4SImode
16008 ? gen_subv4si3
16009 : gen_subv2di3) (t1, cop0, cop1));
16011 /* Extract the original sign bit of op0. */
16012 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16013 true, false);
16014 t2 = gen_reg_rtx (mode);
16015 emit_insn ((mode == V4SImode
16016 ? gen_andv4si3
16017 : gen_andv2di3) (t2, cop0, mask));
16019 /* XOR it back into the result of the subtraction. This results
16020 in the sign bit set iff we saw unsigned underflow. */
16021 x = gen_reg_rtx (mode);
16022 emit_insn ((mode == V4SImode
16023 ? gen_xorv4si3
16024 : gen_xorv2di3) (x, t1, t2));
16026 code = GT;
16028 break;
16030 case V16QImode:
16031 case V8HImode:
16032 /* Perform a parallel unsigned saturating subtraction. */
16033 x = gen_reg_rtx (mode);
16034 emit_insn (gen_rtx_SET (VOIDmode, x,
16035 gen_rtx_US_MINUS (mode, cop0, cop1)));
16037 code = EQ;
16038 negate = !negate;
16039 break;
16041 default:
16042 gcc_unreachable ();
16045 cop0 = x;
16046 cop1 = CONST0_RTX (mode);
16050 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16051 operands[1+negate], operands[2-negate]);
16053 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16054 operands[2-negate]);
16055 return true;
16058 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16059 true if we should do zero extension, else sign extension. HIGH_P is
16060 true if we want the N/2 high elements, else the low elements. */
16062 void
16063 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16065 enum machine_mode imode = GET_MODE (operands[1]);
16066 rtx (*unpack)(rtx, rtx, rtx);
16067 rtx se, dest;
16069 switch (imode)
16071 case V16QImode:
16072 if (high_p)
16073 unpack = gen_vec_interleave_highv16qi;
16074 else
16075 unpack = gen_vec_interleave_lowv16qi;
16076 break;
16077 case V8HImode:
16078 if (high_p)
16079 unpack = gen_vec_interleave_highv8hi;
16080 else
16081 unpack = gen_vec_interleave_lowv8hi;
16082 break;
16083 case V4SImode:
16084 if (high_p)
16085 unpack = gen_vec_interleave_highv4si;
16086 else
16087 unpack = gen_vec_interleave_lowv4si;
16088 break;
16089 default:
16090 gcc_unreachable ();
16093 dest = gen_lowpart (imode, operands[0]);
16095 if (unsigned_p)
16096 se = force_reg (imode, CONST0_RTX (imode));
16097 else
16098 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16099 operands[1], pc_rtx, pc_rtx);
16101 emit_insn (unpack (dest, operands[1], se));
16104 /* This function performs the same task as ix86_expand_sse_unpack,
16105 but with SSE4.1 instructions. */
16107 void
16108 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16110 enum machine_mode imode = GET_MODE (operands[1]);
16111 rtx (*unpack)(rtx, rtx);
16112 rtx src, dest;
16114 switch (imode)
16116 case V16QImode:
16117 if (unsigned_p)
16118 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16119 else
16120 unpack = gen_sse4_1_extendv8qiv8hi2;
16121 break;
16122 case V8HImode:
16123 if (unsigned_p)
16124 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16125 else
16126 unpack = gen_sse4_1_extendv4hiv4si2;
16127 break;
16128 case V4SImode:
16129 if (unsigned_p)
16130 unpack = gen_sse4_1_zero_extendv2siv2di2;
16131 else
16132 unpack = gen_sse4_1_extendv2siv2di2;
16133 break;
16134 default:
16135 gcc_unreachable ();
16138 dest = operands[0];
16139 if (high_p)
16141 /* Shift higher 8 bytes to lower 8 bytes. */
16142 src = gen_reg_rtx (imode);
16143 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16144 gen_lowpart (TImode, operands[1]),
16145 GEN_INT (64)));
16147 else
16148 src = operands[1];
16150 emit_insn (unpack (dest, src));
16153 /* This function performs the same task as ix86_expand_sse_unpack,
16154 but with sse5 instructions. */
16156 void
16157 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16159 enum machine_mode imode = GET_MODE (operands[1]);
16160 int pperm_bytes[16];
16161 int i;
16162 int h = (high_p) ? 8 : 0;
16163 int h2;
16164 int sign_extend;
16165 rtvec v = rtvec_alloc (16);
16166 rtvec vs;
16167 rtx x, p;
16168 rtx op0 = operands[0], op1 = operands[1];
16170 switch (imode)
16172 case V16QImode:
16173 vs = rtvec_alloc (8);
16174 h2 = (high_p) ? 8 : 0;
16175 for (i = 0; i < 8; i++)
16177 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16178 pperm_bytes[2*i+1] = ((unsigned_p)
16179 ? PPERM_ZERO
16180 : PPERM_SIGN | PPERM_SRC2 | i | h);
16183 for (i = 0; i < 16; i++)
16184 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16186 for (i = 0; i < 8; i++)
16187 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16189 p = gen_rtx_PARALLEL (VOIDmode, vs);
16190 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16191 if (unsigned_p)
16192 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16193 else
16194 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16195 break;
16197 case V8HImode:
16198 vs = rtvec_alloc (4);
16199 h2 = (high_p) ? 4 : 0;
16200 for (i = 0; i < 4; i++)
16202 sign_extend = ((unsigned_p)
16203 ? PPERM_ZERO
16204 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16205 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16206 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16207 pperm_bytes[4*i+2] = sign_extend;
16208 pperm_bytes[4*i+3] = sign_extend;
16211 for (i = 0; i < 16; i++)
16212 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16214 for (i = 0; i < 4; i++)
16215 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16217 p = gen_rtx_PARALLEL (VOIDmode, vs);
16218 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16219 if (unsigned_p)
16220 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16221 else
16222 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16223 break;
16225 case V4SImode:
16226 vs = rtvec_alloc (2);
16227 h2 = (high_p) ? 2 : 0;
16228 for (i = 0; i < 2; i++)
16230 sign_extend = ((unsigned_p)
16231 ? PPERM_ZERO
16232 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16233 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16234 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16235 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16236 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16237 pperm_bytes[8*i+4] = sign_extend;
16238 pperm_bytes[8*i+5] = sign_extend;
16239 pperm_bytes[8*i+6] = sign_extend;
16240 pperm_bytes[8*i+7] = sign_extend;
16243 for (i = 0; i < 16; i++)
16244 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16246 for (i = 0; i < 2; i++)
16247 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16249 p = gen_rtx_PARALLEL (VOIDmode, vs);
16250 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16251 if (unsigned_p)
16252 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16253 else
16254 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16255 break;
16257 default:
16258 gcc_unreachable ();
16261 return;
16264 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16265 next narrower integer vector type */
16266 void
16267 ix86_expand_sse5_pack (rtx operands[3])
16269 enum machine_mode imode = GET_MODE (operands[0]);
16270 int pperm_bytes[16];
16271 int i;
16272 rtvec v = rtvec_alloc (16);
16273 rtx x;
16274 rtx op0 = operands[0];
16275 rtx op1 = operands[1];
16276 rtx op2 = operands[2];
16278 switch (imode)
16280 case V16QImode:
16281 for (i = 0; i < 8; i++)
16283 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16284 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16287 for (i = 0; i < 16; i++)
16288 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16290 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16291 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16292 break;
16294 case V8HImode:
16295 for (i = 0; i < 4; i++)
16297 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16298 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16299 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16300 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16303 for (i = 0; i < 16; i++)
16304 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16306 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16307 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16308 break;
16310 case V4SImode:
16311 for (i = 0; i < 2; i++)
16313 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16314 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16315 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16316 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16317 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16318 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16319 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16320 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16323 for (i = 0; i < 16; i++)
16324 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16326 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16327 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16328 break;
16330 default:
16331 gcc_unreachable ();
16334 return;
16337 /* Expand conditional increment or decrement using adb/sbb instructions.
16338 The default case using setcc followed by the conditional move can be
16339 done by generic code. */
16341 ix86_expand_int_addcc (rtx operands[])
16343 enum rtx_code code = GET_CODE (operands[1]);
16344 rtx compare_op;
16345 rtx val = const0_rtx;
16346 bool fpcmp = false;
16347 enum machine_mode mode = GET_MODE (operands[0]);
16349 if (operands[3] != const1_rtx
16350 && operands[3] != constm1_rtx)
16351 return 0;
16352 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16353 ix86_compare_op1, &compare_op))
16354 return 0;
16355 code = GET_CODE (compare_op);
16357 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16358 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16360 fpcmp = true;
16361 code = ix86_fp_compare_code_to_integer (code);
16364 if (code != LTU)
16366 val = constm1_rtx;
16367 if (fpcmp)
16368 PUT_CODE (compare_op,
16369 reverse_condition_maybe_unordered
16370 (GET_CODE (compare_op)));
16371 else
16372 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16374 PUT_MODE (compare_op, mode);
16376 /* Construct either adc or sbb insn. */
16377 if ((code == LTU) == (operands[3] == constm1_rtx))
16379 switch (GET_MODE (operands[0]))
16381 case QImode:
16382 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16383 break;
16384 case HImode:
16385 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16386 break;
16387 case SImode:
16388 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16389 break;
16390 case DImode:
16391 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16392 break;
16393 default:
16394 gcc_unreachable ();
16397 else
16399 switch (GET_MODE (operands[0]))
16401 case QImode:
16402 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16403 break;
16404 case HImode:
16405 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16406 break;
16407 case SImode:
16408 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16409 break;
16410 case DImode:
16411 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16412 break;
16413 default:
16414 gcc_unreachable ();
16417 return 1; /* DONE */
16421 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16422 works for floating pointer parameters and nonoffsetable memories.
16423 For pushes, it returns just stack offsets; the values will be saved
16424 in the right order. Maximally three parts are generated. */
16426 static int
16427 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16429 int size;
16431 if (!TARGET_64BIT)
16432 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16433 else
16434 size = (GET_MODE_SIZE (mode) + 4) / 8;
16436 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16437 gcc_assert (size >= 2 && size <= 4);
16439 /* Optimize constant pool reference to immediates. This is used by fp
16440 moves, that force all constants to memory to allow combining. */
16441 if (MEM_P (operand) && MEM_READONLY_P (operand))
16443 rtx tmp = maybe_get_pool_constant (operand);
16444 if (tmp)
16445 operand = tmp;
16448 if (MEM_P (operand) && !offsettable_memref_p (operand))
16450 /* The only non-offsetable memories we handle are pushes. */
16451 int ok = push_operand (operand, VOIDmode);
16453 gcc_assert (ok);
16455 operand = copy_rtx (operand);
16456 PUT_MODE (operand, Pmode);
16457 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16458 return size;
16461 if (GET_CODE (operand) == CONST_VECTOR)
16463 enum machine_mode imode = int_mode_for_mode (mode);
16464 /* Caution: if we looked through a constant pool memory above,
16465 the operand may actually have a different mode now. That's
16466 ok, since we want to pun this all the way back to an integer. */
16467 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16468 gcc_assert (operand != NULL);
16469 mode = imode;
16472 if (!TARGET_64BIT)
16474 if (mode == DImode)
16475 split_di (&operand, 1, &parts[0], &parts[1]);
16476 else
16478 int i;
16480 if (REG_P (operand))
16482 gcc_assert (reload_completed);
16483 for (i = 0; i < size; i++)
16484 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16486 else if (offsettable_memref_p (operand))
16488 operand = adjust_address (operand, SImode, 0);
16489 parts[0] = operand;
16490 for (i = 1; i < size; i++)
16491 parts[i] = adjust_address (operand, SImode, 4 * i);
16493 else if (GET_CODE (operand) == CONST_DOUBLE)
16495 REAL_VALUE_TYPE r;
16496 long l[4];
16498 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16499 switch (mode)
16501 case TFmode:
16502 real_to_target (l, &r, mode);
16503 parts[3] = gen_int_mode (l[3], SImode);
16504 parts[2] = gen_int_mode (l[2], SImode);
16505 break;
16506 case XFmode:
16507 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16508 parts[2] = gen_int_mode (l[2], SImode);
16509 break;
16510 case DFmode:
16511 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16512 break;
16513 default:
16514 gcc_unreachable ();
16516 parts[1] = gen_int_mode (l[1], SImode);
16517 parts[0] = gen_int_mode (l[0], SImode);
16519 else
16520 gcc_unreachable ();
16523 else
16525 if (mode == TImode)
16526 split_ti (&operand, 1, &parts[0], &parts[1]);
16527 if (mode == XFmode || mode == TFmode)
16529 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16530 if (REG_P (operand))
16532 gcc_assert (reload_completed);
16533 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16534 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16536 else if (offsettable_memref_p (operand))
16538 operand = adjust_address (operand, DImode, 0);
16539 parts[0] = operand;
16540 parts[1] = adjust_address (operand, upper_mode, 8);
16542 else if (GET_CODE (operand) == CONST_DOUBLE)
16544 REAL_VALUE_TYPE r;
16545 long l[4];
16547 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16548 real_to_target (l, &r, mode);
16550 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16551 if (HOST_BITS_PER_WIDE_INT >= 64)
16552 parts[0]
16553 = gen_int_mode
16554 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16555 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16556 DImode);
16557 else
16558 parts[0] = immed_double_const (l[0], l[1], DImode);
16560 if (upper_mode == SImode)
16561 parts[1] = gen_int_mode (l[2], SImode);
16562 else if (HOST_BITS_PER_WIDE_INT >= 64)
16563 parts[1]
16564 = gen_int_mode
16565 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16566 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16567 DImode);
16568 else
16569 parts[1] = immed_double_const (l[2], l[3], DImode);
16571 else
16572 gcc_unreachable ();
16576 return size;
16579 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16580 Return false when normal moves are needed; true when all required
16581 insns have been emitted. Operands 2-4 contain the input values
16582 int the correct order; operands 5-7 contain the output values. */
16584 void
16585 ix86_split_long_move (rtx operands[])
16587 rtx part[2][4];
16588 int nparts, i, j;
16589 int push = 0;
16590 int collisions = 0;
16591 enum machine_mode mode = GET_MODE (operands[0]);
16592 bool collisionparts[4];
16594 /* The DFmode expanders may ask us to move double.
16595 For 64bit target this is single move. By hiding the fact
16596 here we simplify i386.md splitters. */
16597 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16599 /* Optimize constant pool reference to immediates. This is used by
16600 fp moves, that force all constants to memory to allow combining. */
16602 if (MEM_P (operands[1])
16603 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16604 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16605 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16606 if (push_operand (operands[0], VOIDmode))
16608 operands[0] = copy_rtx (operands[0]);
16609 PUT_MODE (operands[0], Pmode);
16611 else
16612 operands[0] = gen_lowpart (DImode, operands[0]);
16613 operands[1] = gen_lowpart (DImode, operands[1]);
16614 emit_move_insn (operands[0], operands[1]);
16615 return;
16618 /* The only non-offsettable memory we handle is push. */
16619 if (push_operand (operands[0], VOIDmode))
16620 push = 1;
16621 else
16622 gcc_assert (!MEM_P (operands[0])
16623 || offsettable_memref_p (operands[0]));
16625 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16626 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16628 /* When emitting push, take care for source operands on the stack. */
16629 if (push && MEM_P (operands[1])
16630 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16631 for (i = 0; i < nparts - 1; i++)
16632 part[1][i] = change_address (part[1][i],
16633 GET_MODE (part[1][i]),
16634 XEXP (part[1][i + 1], 0));
16636 /* We need to do copy in the right order in case an address register
16637 of the source overlaps the destination. */
16638 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16640 rtx tmp;
16642 for (i = 0; i < nparts; i++)
16644 collisionparts[i]
16645 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16646 if (collisionparts[i])
16647 collisions++;
16650 /* Collision in the middle part can be handled by reordering. */
16651 if (collisions == 1 && nparts == 3 && collisionparts [1])
16653 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16654 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16656 else if (collisions == 1
16657 && nparts == 4
16658 && (collisionparts [1] || collisionparts [2]))
16660 if (collisionparts [1])
16662 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16663 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16665 else
16667 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16668 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16672 /* If there are more collisions, we can't handle it by reordering.
16673 Do an lea to the last part and use only one colliding move. */
16674 else if (collisions > 1)
16676 rtx base;
16678 collisions = 1;
16680 base = part[0][nparts - 1];
16682 /* Handle the case when the last part isn't valid for lea.
16683 Happens in 64-bit mode storing the 12-byte XFmode. */
16684 if (GET_MODE (base) != Pmode)
16685 base = gen_rtx_REG (Pmode, REGNO (base));
16687 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16688 part[1][0] = replace_equiv_address (part[1][0], base);
16689 for (i = 1; i < nparts; i++)
16691 tmp = plus_constant (base, UNITS_PER_WORD * i);
16692 part[1][i] = replace_equiv_address (part[1][i], tmp);
16697 if (push)
16699 if (!TARGET_64BIT)
16701 if (nparts == 3)
16703 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16704 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16705 emit_move_insn (part[0][2], part[1][2]);
16707 else if (nparts == 4)
16709 emit_move_insn (part[0][3], part[1][3]);
16710 emit_move_insn (part[0][2], part[1][2]);
16713 else
16715 /* In 64bit mode we don't have 32bit push available. In case this is
16716 register, it is OK - we will just use larger counterpart. We also
16717 retype memory - these comes from attempt to avoid REX prefix on
16718 moving of second half of TFmode value. */
16719 if (GET_MODE (part[1][1]) == SImode)
16721 switch (GET_CODE (part[1][1]))
16723 case MEM:
16724 part[1][1] = adjust_address (part[1][1], DImode, 0);
16725 break;
16727 case REG:
16728 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16729 break;
16731 default:
16732 gcc_unreachable ();
16735 if (GET_MODE (part[1][0]) == SImode)
16736 part[1][0] = part[1][1];
16739 emit_move_insn (part[0][1], part[1][1]);
16740 emit_move_insn (part[0][0], part[1][0]);
16741 return;
16744 /* Choose correct order to not overwrite the source before it is copied. */
16745 if ((REG_P (part[0][0])
16746 && REG_P (part[1][1])
16747 && (REGNO (part[0][0]) == REGNO (part[1][1])
16748 || (nparts == 3
16749 && REGNO (part[0][0]) == REGNO (part[1][2]))
16750 || (nparts == 4
16751 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16752 || (collisions > 0
16753 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16755 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16757 operands[2 + i] = part[0][j];
16758 operands[6 + i] = part[1][j];
16761 else
16763 for (i = 0; i < nparts; i++)
16765 operands[2 + i] = part[0][i];
16766 operands[6 + i] = part[1][i];
16770 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16771 if (optimize_insn_for_size_p ())
16773 for (j = 0; j < nparts - 1; j++)
16774 if (CONST_INT_P (operands[6 + j])
16775 && operands[6 + j] != const0_rtx
16776 && REG_P (operands[2 + j]))
16777 for (i = j; i < nparts - 1; i++)
16778 if (CONST_INT_P (operands[7 + i])
16779 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16780 operands[7 + i] = operands[2 + j];
16783 for (i = 0; i < nparts; i++)
16784 emit_move_insn (operands[2 + i], operands[6 + i]);
16786 return;
16789 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16790 left shift by a constant, either using a single shift or
16791 a sequence of add instructions. */
16793 static void
16794 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16796 if (count == 1)
16798 emit_insn ((mode == DImode
16799 ? gen_addsi3
16800 : gen_adddi3) (operand, operand, operand));
16802 else if (!optimize_insn_for_size_p ()
16803 && count * ix86_cost->add <= ix86_cost->shift_const)
16805 int i;
16806 for (i=0; i<count; i++)
16808 emit_insn ((mode == DImode
16809 ? gen_addsi3
16810 : gen_adddi3) (operand, operand, operand));
16813 else
16814 emit_insn ((mode == DImode
16815 ? gen_ashlsi3
16816 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16819 void
16820 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16822 rtx low[2], high[2];
16823 int count;
16824 const int single_width = mode == DImode ? 32 : 64;
16826 if (CONST_INT_P (operands[2]))
16828 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16829 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16831 if (count >= single_width)
16833 emit_move_insn (high[0], low[1]);
16834 emit_move_insn (low[0], const0_rtx);
16836 if (count > single_width)
16837 ix86_expand_ashl_const (high[0], count - single_width, mode);
16839 else
16841 if (!rtx_equal_p (operands[0], operands[1]))
16842 emit_move_insn (operands[0], operands[1]);
16843 emit_insn ((mode == DImode
16844 ? gen_x86_shld
16845 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16846 ix86_expand_ashl_const (low[0], count, mode);
16848 return;
16851 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16853 if (operands[1] == const1_rtx)
16855 /* Assuming we've chosen a QImode capable registers, then 1 << N
16856 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16857 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16859 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16861 ix86_expand_clear (low[0]);
16862 ix86_expand_clear (high[0]);
16863 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16865 d = gen_lowpart (QImode, low[0]);
16866 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16867 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16868 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16870 d = gen_lowpart (QImode, high[0]);
16871 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16872 s = gen_rtx_NE (QImode, flags, const0_rtx);
16873 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16876 /* Otherwise, we can get the same results by manually performing
16877 a bit extract operation on bit 5/6, and then performing the two
16878 shifts. The two methods of getting 0/1 into low/high are exactly
16879 the same size. Avoiding the shift in the bit extract case helps
16880 pentium4 a bit; no one else seems to care much either way. */
16881 else
16883 rtx x;
16885 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16886 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16887 else
16888 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16889 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16891 emit_insn ((mode == DImode
16892 ? gen_lshrsi3
16893 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16894 emit_insn ((mode == DImode
16895 ? gen_andsi3
16896 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16897 emit_move_insn (low[0], high[0]);
16898 emit_insn ((mode == DImode
16899 ? gen_xorsi3
16900 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16903 emit_insn ((mode == DImode
16904 ? gen_ashlsi3
16905 : gen_ashldi3) (low[0], low[0], operands[2]));
16906 emit_insn ((mode == DImode
16907 ? gen_ashlsi3
16908 : gen_ashldi3) (high[0], high[0], operands[2]));
16909 return;
16912 if (operands[1] == constm1_rtx)
16914 /* For -1 << N, we can avoid the shld instruction, because we
16915 know that we're shifting 0...31/63 ones into a -1. */
16916 emit_move_insn (low[0], constm1_rtx);
16917 if (optimize_insn_for_size_p ())
16918 emit_move_insn (high[0], low[0]);
16919 else
16920 emit_move_insn (high[0], constm1_rtx);
16922 else
16924 if (!rtx_equal_p (operands[0], operands[1]))
16925 emit_move_insn (operands[0], operands[1]);
16927 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16928 emit_insn ((mode == DImode
16929 ? gen_x86_shld
16930 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16933 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16935 if (TARGET_CMOVE && scratch)
16937 ix86_expand_clear (scratch);
16938 emit_insn ((mode == DImode
16939 ? gen_x86_shift_adj_1
16940 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16941 scratch));
16943 else
16944 emit_insn ((mode == DImode
16945 ? gen_x86_shift_adj_2
16946 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16949 void
16950 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16952 rtx low[2], high[2];
16953 int count;
16954 const int single_width = mode == DImode ? 32 : 64;
16956 if (CONST_INT_P (operands[2]))
16958 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16959 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16961 if (count == single_width * 2 - 1)
16963 emit_move_insn (high[0], high[1]);
16964 emit_insn ((mode == DImode
16965 ? gen_ashrsi3
16966 : gen_ashrdi3) (high[0], high[0],
16967 GEN_INT (single_width - 1)));
16968 emit_move_insn (low[0], high[0]);
16971 else if (count >= single_width)
16973 emit_move_insn (low[0], high[1]);
16974 emit_move_insn (high[0], low[0]);
16975 emit_insn ((mode == DImode
16976 ? gen_ashrsi3
16977 : gen_ashrdi3) (high[0], high[0],
16978 GEN_INT (single_width - 1)));
16979 if (count > single_width)
16980 emit_insn ((mode == DImode
16981 ? gen_ashrsi3
16982 : gen_ashrdi3) (low[0], low[0],
16983 GEN_INT (count - single_width)));
16985 else
16987 if (!rtx_equal_p (operands[0], operands[1]))
16988 emit_move_insn (operands[0], operands[1]);
16989 emit_insn ((mode == DImode
16990 ? gen_x86_shrd
16991 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16992 emit_insn ((mode == DImode
16993 ? gen_ashrsi3
16994 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16997 else
16999 if (!rtx_equal_p (operands[0], operands[1]))
17000 emit_move_insn (operands[0], operands[1]);
17002 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17004 emit_insn ((mode == DImode
17005 ? gen_x86_shrd
17006 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17007 emit_insn ((mode == DImode
17008 ? gen_ashrsi3
17009 : gen_ashrdi3) (high[0], high[0], operands[2]));
17011 if (TARGET_CMOVE && scratch)
17013 emit_move_insn (scratch, high[0]);
17014 emit_insn ((mode == DImode
17015 ? gen_ashrsi3
17016 : gen_ashrdi3) (scratch, scratch,
17017 GEN_INT (single_width - 1)));
17018 emit_insn ((mode == DImode
17019 ? gen_x86_shift_adj_1
17020 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17021 scratch));
17023 else
17024 emit_insn ((mode == DImode
17025 ? gen_x86_shift_adj_3
17026 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17030 void
17031 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17033 rtx low[2], high[2];
17034 int count;
17035 const int single_width = mode == DImode ? 32 : 64;
17037 if (CONST_INT_P (operands[2]))
17039 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17040 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17042 if (count >= single_width)
17044 emit_move_insn (low[0], high[1]);
17045 ix86_expand_clear (high[0]);
17047 if (count > single_width)
17048 emit_insn ((mode == DImode
17049 ? gen_lshrsi3
17050 : gen_lshrdi3) (low[0], low[0],
17051 GEN_INT (count - single_width)));
17053 else
17055 if (!rtx_equal_p (operands[0], operands[1]))
17056 emit_move_insn (operands[0], operands[1]);
17057 emit_insn ((mode == DImode
17058 ? gen_x86_shrd
17059 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17060 emit_insn ((mode == DImode
17061 ? gen_lshrsi3
17062 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17065 else
17067 if (!rtx_equal_p (operands[0], operands[1]))
17068 emit_move_insn (operands[0], operands[1]);
17070 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17072 emit_insn ((mode == DImode
17073 ? gen_x86_shrd
17074 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17075 emit_insn ((mode == DImode
17076 ? gen_lshrsi3
17077 : gen_lshrdi3) (high[0], high[0], operands[2]));
17079 /* Heh. By reversing the arguments, we can reuse this pattern. */
17080 if (TARGET_CMOVE && scratch)
17082 ix86_expand_clear (scratch);
17083 emit_insn ((mode == DImode
17084 ? gen_x86_shift_adj_1
17085 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17086 scratch));
17088 else
17089 emit_insn ((mode == DImode
17090 ? gen_x86_shift_adj_2
17091 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17095 /* Predict just emitted jump instruction to be taken with probability PROB. */
17096 static void
17097 predict_jump (int prob)
17099 rtx insn = get_last_insn ();
17100 gcc_assert (JUMP_P (insn));
17101 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17104 /* Helper function for the string operations below. Dest VARIABLE whether
17105 it is aligned to VALUE bytes. If true, jump to the label. */
17106 static rtx
17107 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17109 rtx label = gen_label_rtx ();
17110 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17111 if (GET_MODE (variable) == DImode)
17112 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17113 else
17114 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17115 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17116 1, label);
17117 if (epilogue)
17118 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17119 else
17120 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17121 return label;
17124 /* Adjust COUNTER by the VALUE. */
17125 static void
17126 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17128 if (GET_MODE (countreg) == DImode)
17129 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17130 else
17131 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17134 /* Zero extend possibly SImode EXP to Pmode register. */
17136 ix86_zero_extend_to_Pmode (rtx exp)
17138 rtx r;
17139 if (GET_MODE (exp) == VOIDmode)
17140 return force_reg (Pmode, exp);
17141 if (GET_MODE (exp) == Pmode)
17142 return copy_to_mode_reg (Pmode, exp);
17143 r = gen_reg_rtx (Pmode);
17144 emit_insn (gen_zero_extendsidi2 (r, exp));
17145 return r;
17148 /* Divide COUNTREG by SCALE. */
17149 static rtx
17150 scale_counter (rtx countreg, int scale)
17152 rtx sc;
17153 rtx piece_size_mask;
17155 if (scale == 1)
17156 return countreg;
17157 if (CONST_INT_P (countreg))
17158 return GEN_INT (INTVAL (countreg) / scale);
17159 gcc_assert (REG_P (countreg));
17161 piece_size_mask = GEN_INT (scale - 1);
17162 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17163 GEN_INT (exact_log2 (scale)),
17164 NULL, 1, OPTAB_DIRECT);
17165 return sc;
17168 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17169 DImode for constant loop counts. */
17171 static enum machine_mode
17172 counter_mode (rtx count_exp)
17174 if (GET_MODE (count_exp) != VOIDmode)
17175 return GET_MODE (count_exp);
17176 if (GET_CODE (count_exp) != CONST_INT)
17177 return Pmode;
17178 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17179 return DImode;
17180 return SImode;
17183 /* When SRCPTR is non-NULL, output simple loop to move memory
17184 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17185 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17186 equivalent loop to set memory by VALUE (supposed to be in MODE).
17188 The size is rounded down to whole number of chunk size moved at once.
17189 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17192 static void
17193 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17194 rtx destptr, rtx srcptr, rtx value,
17195 rtx count, enum machine_mode mode, int unroll,
17196 int expected_size)
17198 rtx out_label, top_label, iter, tmp;
17199 enum machine_mode iter_mode = counter_mode (count);
17200 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17201 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17202 rtx size;
17203 rtx x_addr;
17204 rtx y_addr;
17205 int i;
17207 top_label = gen_label_rtx ();
17208 out_label = gen_label_rtx ();
17209 iter = gen_reg_rtx (iter_mode);
17211 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17212 NULL, 1, OPTAB_DIRECT);
17213 /* Those two should combine. */
17214 if (piece_size == const1_rtx)
17216 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17217 true, out_label);
17218 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17220 emit_move_insn (iter, const0_rtx);
17222 emit_label (top_label);
17224 tmp = convert_modes (Pmode, iter_mode, iter, true);
17225 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17226 destmem = change_address (destmem, mode, x_addr);
17228 if (srcmem)
17230 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17231 srcmem = change_address (srcmem, mode, y_addr);
17233 /* When unrolling for chips that reorder memory reads and writes,
17234 we can save registers by using single temporary.
17235 Also using 4 temporaries is overkill in 32bit mode. */
17236 if (!TARGET_64BIT && 0)
17238 for (i = 0; i < unroll; i++)
17240 if (i)
17242 destmem =
17243 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17244 srcmem =
17245 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17247 emit_move_insn (destmem, srcmem);
17250 else
17252 rtx tmpreg[4];
17253 gcc_assert (unroll <= 4);
17254 for (i = 0; i < unroll; i++)
17256 tmpreg[i] = gen_reg_rtx (mode);
17257 if (i)
17259 srcmem =
17260 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17262 emit_move_insn (tmpreg[i], srcmem);
17264 for (i = 0; i < unroll; i++)
17266 if (i)
17268 destmem =
17269 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17271 emit_move_insn (destmem, tmpreg[i]);
17275 else
17276 for (i = 0; i < unroll; i++)
17278 if (i)
17279 destmem =
17280 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17281 emit_move_insn (destmem, value);
17284 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17285 true, OPTAB_LIB_WIDEN);
17286 if (tmp != iter)
17287 emit_move_insn (iter, tmp);
17289 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17290 true, top_label);
17291 if (expected_size != -1)
17293 expected_size /= GET_MODE_SIZE (mode) * unroll;
17294 if (expected_size == 0)
17295 predict_jump (0);
17296 else if (expected_size > REG_BR_PROB_BASE)
17297 predict_jump (REG_BR_PROB_BASE - 1);
17298 else
17299 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17301 else
17302 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17303 iter = ix86_zero_extend_to_Pmode (iter);
17304 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17305 true, OPTAB_LIB_WIDEN);
17306 if (tmp != destptr)
17307 emit_move_insn (destptr, tmp);
17308 if (srcptr)
17310 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17311 true, OPTAB_LIB_WIDEN);
17312 if (tmp != srcptr)
17313 emit_move_insn (srcptr, tmp);
17315 emit_label (out_label);
17318 /* Output "rep; mov" instruction.
17319 Arguments have same meaning as for previous function */
17320 static void
17321 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17322 rtx destptr, rtx srcptr,
17323 rtx count,
17324 enum machine_mode mode)
17326 rtx destexp;
17327 rtx srcexp;
17328 rtx countreg;
17330 /* If the size is known, it is shorter to use rep movs. */
17331 if (mode == QImode && CONST_INT_P (count)
17332 && !(INTVAL (count) & 3))
17333 mode = SImode;
17335 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17336 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17337 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17338 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17339 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17340 if (mode != QImode)
17342 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17343 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17344 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17345 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17346 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17347 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17349 else
17351 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17352 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17354 if (CONST_INT_P (count))
17356 count = GEN_INT (INTVAL (count)
17357 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17358 destmem = shallow_copy_rtx (destmem);
17359 srcmem = shallow_copy_rtx (srcmem);
17360 set_mem_size (destmem, count);
17361 set_mem_size (srcmem, count);
17363 else
17365 if (MEM_SIZE (destmem))
17366 set_mem_size (destmem, NULL_RTX);
17367 if (MEM_SIZE (srcmem))
17368 set_mem_size (srcmem, NULL_RTX);
17370 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17371 destexp, srcexp));
17374 /* Output "rep; stos" instruction.
17375 Arguments have same meaning as for previous function */
17376 static void
17377 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17378 rtx count, enum machine_mode mode,
17379 rtx orig_value)
17381 rtx destexp;
17382 rtx countreg;
17384 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17385 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17386 value = force_reg (mode, gen_lowpart (mode, value));
17387 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17388 if (mode != QImode)
17390 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17391 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17392 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17394 else
17395 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17396 if (orig_value == const0_rtx && CONST_INT_P (count))
17398 count = GEN_INT (INTVAL (count)
17399 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17400 destmem = shallow_copy_rtx (destmem);
17401 set_mem_size (destmem, count);
17403 else if (MEM_SIZE (destmem))
17404 set_mem_size (destmem, NULL_RTX);
17405 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17408 static void
17409 emit_strmov (rtx destmem, rtx srcmem,
17410 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17412 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17413 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17414 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17417 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17418 static void
17419 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17420 rtx destptr, rtx srcptr, rtx count, int max_size)
17422 rtx src, dest;
17423 if (CONST_INT_P (count))
17425 HOST_WIDE_INT countval = INTVAL (count);
17426 int offset = 0;
17428 if ((countval & 0x10) && max_size > 16)
17430 if (TARGET_64BIT)
17432 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17433 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17435 else
17436 gcc_unreachable ();
17437 offset += 16;
17439 if ((countval & 0x08) && max_size > 8)
17441 if (TARGET_64BIT)
17442 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17443 else
17445 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17446 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17448 offset += 8;
17450 if ((countval & 0x04) && max_size > 4)
17452 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17453 offset += 4;
17455 if ((countval & 0x02) && max_size > 2)
17457 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17458 offset += 2;
17460 if ((countval & 0x01) && max_size > 1)
17462 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17463 offset += 1;
17465 return;
17467 if (max_size > 8)
17469 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17470 count, 1, OPTAB_DIRECT);
17471 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17472 count, QImode, 1, 4);
17473 return;
17476 /* When there are stringops, we can cheaply increase dest and src pointers.
17477 Otherwise we save code size by maintaining offset (zero is readily
17478 available from preceding rep operation) and using x86 addressing modes.
17480 if (TARGET_SINGLE_STRINGOP)
17482 if (max_size > 4)
17484 rtx label = ix86_expand_aligntest (count, 4, true);
17485 src = change_address (srcmem, SImode, srcptr);
17486 dest = change_address (destmem, SImode, destptr);
17487 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17488 emit_label (label);
17489 LABEL_NUSES (label) = 1;
17491 if (max_size > 2)
17493 rtx label = ix86_expand_aligntest (count, 2, true);
17494 src = change_address (srcmem, HImode, srcptr);
17495 dest = change_address (destmem, HImode, destptr);
17496 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17497 emit_label (label);
17498 LABEL_NUSES (label) = 1;
17500 if (max_size > 1)
17502 rtx label = ix86_expand_aligntest (count, 1, true);
17503 src = change_address (srcmem, QImode, srcptr);
17504 dest = change_address (destmem, QImode, destptr);
17505 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17506 emit_label (label);
17507 LABEL_NUSES (label) = 1;
17510 else
17512 rtx offset = force_reg (Pmode, const0_rtx);
17513 rtx tmp;
17515 if (max_size > 4)
17517 rtx label = ix86_expand_aligntest (count, 4, true);
17518 src = change_address (srcmem, SImode, srcptr);
17519 dest = change_address (destmem, SImode, destptr);
17520 emit_move_insn (dest, src);
17521 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17522 true, OPTAB_LIB_WIDEN);
17523 if (tmp != offset)
17524 emit_move_insn (offset, tmp);
17525 emit_label (label);
17526 LABEL_NUSES (label) = 1;
17528 if (max_size > 2)
17530 rtx label = ix86_expand_aligntest (count, 2, true);
17531 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17532 src = change_address (srcmem, HImode, tmp);
17533 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17534 dest = change_address (destmem, HImode, tmp);
17535 emit_move_insn (dest, src);
17536 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17537 true, OPTAB_LIB_WIDEN);
17538 if (tmp != offset)
17539 emit_move_insn (offset, tmp);
17540 emit_label (label);
17541 LABEL_NUSES (label) = 1;
17543 if (max_size > 1)
17545 rtx label = ix86_expand_aligntest (count, 1, true);
17546 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17547 src = change_address (srcmem, QImode, tmp);
17548 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17549 dest = change_address (destmem, QImode, tmp);
17550 emit_move_insn (dest, src);
17551 emit_label (label);
17552 LABEL_NUSES (label) = 1;
17557 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17558 static void
17559 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17560 rtx count, int max_size)
17562 count =
17563 expand_simple_binop (counter_mode (count), AND, count,
17564 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17565 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17566 gen_lowpart (QImode, value), count, QImode,
17567 1, max_size / 2);
17570 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17571 static void
17572 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17574 rtx dest;
17576 if (CONST_INT_P (count))
17578 HOST_WIDE_INT countval = INTVAL (count);
17579 int offset = 0;
17581 if ((countval & 0x10) && max_size > 16)
17583 if (TARGET_64BIT)
17585 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17586 emit_insn (gen_strset (destptr, dest, value));
17587 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17588 emit_insn (gen_strset (destptr, dest, value));
17590 else
17591 gcc_unreachable ();
17592 offset += 16;
17594 if ((countval & 0x08) && max_size > 8)
17596 if (TARGET_64BIT)
17598 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17599 emit_insn (gen_strset (destptr, dest, value));
17601 else
17603 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17604 emit_insn (gen_strset (destptr, dest, value));
17605 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17606 emit_insn (gen_strset (destptr, dest, value));
17608 offset += 8;
17610 if ((countval & 0x04) && max_size > 4)
17612 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17613 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17614 offset += 4;
17616 if ((countval & 0x02) && max_size > 2)
17618 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17619 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17620 offset += 2;
17622 if ((countval & 0x01) && max_size > 1)
17624 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17625 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17626 offset += 1;
17628 return;
17630 if (max_size > 32)
17632 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17633 return;
17635 if (max_size > 16)
17637 rtx label = ix86_expand_aligntest (count, 16, true);
17638 if (TARGET_64BIT)
17640 dest = change_address (destmem, DImode, destptr);
17641 emit_insn (gen_strset (destptr, dest, value));
17642 emit_insn (gen_strset (destptr, dest, value));
17644 else
17646 dest = change_address (destmem, SImode, destptr);
17647 emit_insn (gen_strset (destptr, dest, value));
17648 emit_insn (gen_strset (destptr, dest, value));
17649 emit_insn (gen_strset (destptr, dest, value));
17650 emit_insn (gen_strset (destptr, dest, value));
17652 emit_label (label);
17653 LABEL_NUSES (label) = 1;
17655 if (max_size > 8)
17657 rtx label = ix86_expand_aligntest (count, 8, true);
17658 if (TARGET_64BIT)
17660 dest = change_address (destmem, DImode, destptr);
17661 emit_insn (gen_strset (destptr, dest, value));
17663 else
17665 dest = change_address (destmem, SImode, destptr);
17666 emit_insn (gen_strset (destptr, dest, value));
17667 emit_insn (gen_strset (destptr, dest, value));
17669 emit_label (label);
17670 LABEL_NUSES (label) = 1;
17672 if (max_size > 4)
17674 rtx label = ix86_expand_aligntest (count, 4, true);
17675 dest = change_address (destmem, SImode, destptr);
17676 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17677 emit_label (label);
17678 LABEL_NUSES (label) = 1;
17680 if (max_size > 2)
17682 rtx label = ix86_expand_aligntest (count, 2, true);
17683 dest = change_address (destmem, HImode, destptr);
17684 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17685 emit_label (label);
17686 LABEL_NUSES (label) = 1;
17688 if (max_size > 1)
17690 rtx label = ix86_expand_aligntest (count, 1, true);
17691 dest = change_address (destmem, QImode, destptr);
17692 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17693 emit_label (label);
17694 LABEL_NUSES (label) = 1;
17698 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17699 DESIRED_ALIGNMENT. */
17700 static void
17701 expand_movmem_prologue (rtx destmem, rtx srcmem,
17702 rtx destptr, rtx srcptr, rtx count,
17703 int align, int desired_alignment)
17705 if (align <= 1 && desired_alignment > 1)
17707 rtx label = ix86_expand_aligntest (destptr, 1, false);
17708 srcmem = change_address (srcmem, QImode, srcptr);
17709 destmem = change_address (destmem, QImode, destptr);
17710 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17711 ix86_adjust_counter (count, 1);
17712 emit_label (label);
17713 LABEL_NUSES (label) = 1;
17715 if (align <= 2 && desired_alignment > 2)
17717 rtx label = ix86_expand_aligntest (destptr, 2, false);
17718 srcmem = change_address (srcmem, HImode, srcptr);
17719 destmem = change_address (destmem, HImode, destptr);
17720 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17721 ix86_adjust_counter (count, 2);
17722 emit_label (label);
17723 LABEL_NUSES (label) = 1;
17725 if (align <= 4 && desired_alignment > 4)
17727 rtx label = ix86_expand_aligntest (destptr, 4, false);
17728 srcmem = change_address (srcmem, SImode, srcptr);
17729 destmem = change_address (destmem, SImode, destptr);
17730 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17731 ix86_adjust_counter (count, 4);
17732 emit_label (label);
17733 LABEL_NUSES (label) = 1;
17735 gcc_assert (desired_alignment <= 8);
17738 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17739 ALIGN_BYTES is how many bytes need to be copied. */
17740 static rtx
17741 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17742 int desired_align, int align_bytes)
17744 rtx src = *srcp;
17745 rtx src_size, dst_size;
17746 int off = 0;
17747 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17748 if (src_align_bytes >= 0)
17749 src_align_bytes = desired_align - src_align_bytes;
17750 src_size = MEM_SIZE (src);
17751 dst_size = MEM_SIZE (dst);
17752 if (align_bytes & 1)
17754 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17755 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17756 off = 1;
17757 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17759 if (align_bytes & 2)
17761 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17762 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17763 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17764 set_mem_align (dst, 2 * BITS_PER_UNIT);
17765 if (src_align_bytes >= 0
17766 && (src_align_bytes & 1) == (align_bytes & 1)
17767 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17768 set_mem_align (src, 2 * BITS_PER_UNIT);
17769 off = 2;
17770 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17772 if (align_bytes & 4)
17774 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17775 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17776 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17777 set_mem_align (dst, 4 * BITS_PER_UNIT);
17778 if (src_align_bytes >= 0)
17780 unsigned int src_align = 0;
17781 if ((src_align_bytes & 3) == (align_bytes & 3))
17782 src_align = 4;
17783 else if ((src_align_bytes & 1) == (align_bytes & 1))
17784 src_align = 2;
17785 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17786 set_mem_align (src, src_align * BITS_PER_UNIT);
17788 off = 4;
17789 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17791 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17792 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17793 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17794 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17795 if (src_align_bytes >= 0)
17797 unsigned int src_align = 0;
17798 if ((src_align_bytes & 7) == (align_bytes & 7))
17799 src_align = 8;
17800 else if ((src_align_bytes & 3) == (align_bytes & 3))
17801 src_align = 4;
17802 else if ((src_align_bytes & 1) == (align_bytes & 1))
17803 src_align = 2;
17804 if (src_align > (unsigned int) desired_align)
17805 src_align = desired_align;
17806 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17807 set_mem_align (src, src_align * BITS_PER_UNIT);
17809 if (dst_size)
17810 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17811 if (src_size)
17812 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17813 *srcp = src;
17814 return dst;
17817 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17818 DESIRED_ALIGNMENT. */
17819 static void
17820 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17821 int align, int desired_alignment)
17823 if (align <= 1 && desired_alignment > 1)
17825 rtx label = ix86_expand_aligntest (destptr, 1, false);
17826 destmem = change_address (destmem, QImode, destptr);
17827 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17828 ix86_adjust_counter (count, 1);
17829 emit_label (label);
17830 LABEL_NUSES (label) = 1;
17832 if (align <= 2 && desired_alignment > 2)
17834 rtx label = ix86_expand_aligntest (destptr, 2, false);
17835 destmem = change_address (destmem, HImode, destptr);
17836 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17837 ix86_adjust_counter (count, 2);
17838 emit_label (label);
17839 LABEL_NUSES (label) = 1;
17841 if (align <= 4 && desired_alignment > 4)
17843 rtx label = ix86_expand_aligntest (destptr, 4, false);
17844 destmem = change_address (destmem, SImode, destptr);
17845 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17846 ix86_adjust_counter (count, 4);
17847 emit_label (label);
17848 LABEL_NUSES (label) = 1;
17850 gcc_assert (desired_alignment <= 8);
17853 /* Set enough from DST to align DST known to by aligned by ALIGN to
17854 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17855 static rtx
17856 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17857 int desired_align, int align_bytes)
17859 int off = 0;
17860 rtx dst_size = MEM_SIZE (dst);
17861 if (align_bytes & 1)
17863 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17864 off = 1;
17865 emit_insn (gen_strset (destreg, dst,
17866 gen_lowpart (QImode, value)));
17868 if (align_bytes & 2)
17870 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17871 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17872 set_mem_align (dst, 2 * BITS_PER_UNIT);
17873 off = 2;
17874 emit_insn (gen_strset (destreg, dst,
17875 gen_lowpart (HImode, value)));
17877 if (align_bytes & 4)
17879 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17880 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17881 set_mem_align (dst, 4 * BITS_PER_UNIT);
17882 off = 4;
17883 emit_insn (gen_strset (destreg, dst,
17884 gen_lowpart (SImode, value)));
17886 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17887 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17888 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17889 if (dst_size)
17890 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17891 return dst;
17894 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17895 static enum stringop_alg
17896 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17897 int *dynamic_check)
17899 const struct stringop_algs * algs;
17900 bool optimize_for_speed;
17901 /* Algorithms using the rep prefix want at least edi and ecx;
17902 additionally, memset wants eax and memcpy wants esi. Don't
17903 consider such algorithms if the user has appropriated those
17904 registers for their own purposes. */
17905 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17906 || (memset
17907 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17909 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17910 || (alg != rep_prefix_1_byte \
17911 && alg != rep_prefix_4_byte \
17912 && alg != rep_prefix_8_byte))
17913 const struct processor_costs *cost;
17915 /* Even if the string operation call is cold, we still might spend a lot
17916 of time processing large blocks. */
17917 if (optimize_function_for_size_p (cfun)
17918 || (optimize_insn_for_size_p ()
17919 && expected_size != -1 && expected_size < 256))
17920 optimize_for_speed = false;
17921 else
17922 optimize_for_speed = true;
17924 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17926 *dynamic_check = -1;
17927 if (memset)
17928 algs = &cost->memset[TARGET_64BIT != 0];
17929 else
17930 algs = &cost->memcpy[TARGET_64BIT != 0];
17931 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17932 return stringop_alg;
17933 /* rep; movq or rep; movl is the smallest variant. */
17934 else if (!optimize_for_speed)
17936 if (!count || (count & 3))
17937 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17938 else
17939 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17941 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17943 else if (expected_size != -1 && expected_size < 4)
17944 return loop_1_byte;
17945 else if (expected_size != -1)
17947 unsigned int i;
17948 enum stringop_alg alg = libcall;
17949 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17951 /* We get here if the algorithms that were not libcall-based
17952 were rep-prefix based and we are unable to use rep prefixes
17953 based on global register usage. Break out of the loop and
17954 use the heuristic below. */
17955 if (algs->size[i].max == 0)
17956 break;
17957 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17959 enum stringop_alg candidate = algs->size[i].alg;
17961 if (candidate != libcall && ALG_USABLE_P (candidate))
17962 alg = candidate;
17963 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17964 last non-libcall inline algorithm. */
17965 if (TARGET_INLINE_ALL_STRINGOPS)
17967 /* When the current size is best to be copied by a libcall,
17968 but we are still forced to inline, run the heuristic below
17969 that will pick code for medium sized blocks. */
17970 if (alg != libcall)
17971 return alg;
17972 break;
17974 else if (ALG_USABLE_P (candidate))
17975 return candidate;
17978 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17980 /* When asked to inline the call anyway, try to pick meaningful choice.
17981 We look for maximal size of block that is faster to copy by hand and
17982 take blocks of at most of that size guessing that average size will
17983 be roughly half of the block.
17985 If this turns out to be bad, we might simply specify the preferred
17986 choice in ix86_costs. */
17987 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17988 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17990 int max = -1;
17991 enum stringop_alg alg;
17992 int i;
17993 bool any_alg_usable_p = true;
17995 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17997 enum stringop_alg candidate = algs->size[i].alg;
17998 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18000 if (candidate != libcall && candidate
18001 && ALG_USABLE_P (candidate))
18002 max = algs->size[i].max;
18004 /* If there aren't any usable algorithms, then recursing on
18005 smaller sizes isn't going to find anything. Just return the
18006 simple byte-at-a-time copy loop. */
18007 if (!any_alg_usable_p)
18009 /* Pick something reasonable. */
18010 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18011 *dynamic_check = 128;
18012 return loop_1_byte;
18014 if (max == -1)
18015 max = 4096;
18016 alg = decide_alg (count, max / 2, memset, dynamic_check);
18017 gcc_assert (*dynamic_check == -1);
18018 gcc_assert (alg != libcall);
18019 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18020 *dynamic_check = max;
18021 return alg;
18023 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18024 #undef ALG_USABLE_P
18027 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18028 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18029 static int
18030 decide_alignment (int align,
18031 enum stringop_alg alg,
18032 int expected_size)
18034 int desired_align = 0;
18035 switch (alg)
18037 case no_stringop:
18038 gcc_unreachable ();
18039 case loop:
18040 case unrolled_loop:
18041 desired_align = GET_MODE_SIZE (Pmode);
18042 break;
18043 case rep_prefix_8_byte:
18044 desired_align = 8;
18045 break;
18046 case rep_prefix_4_byte:
18047 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18048 copying whole cacheline at once. */
18049 if (TARGET_PENTIUMPRO)
18050 desired_align = 8;
18051 else
18052 desired_align = 4;
18053 break;
18054 case rep_prefix_1_byte:
18055 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18056 copying whole cacheline at once. */
18057 if (TARGET_PENTIUMPRO)
18058 desired_align = 8;
18059 else
18060 desired_align = 1;
18061 break;
18062 case loop_1_byte:
18063 desired_align = 1;
18064 break;
18065 case libcall:
18066 return 0;
18069 if (optimize_size)
18070 desired_align = 1;
18071 if (desired_align < align)
18072 desired_align = align;
18073 if (expected_size != -1 && expected_size < 4)
18074 desired_align = align;
18075 return desired_align;
18078 /* Return the smallest power of 2 greater than VAL. */
18079 static int
18080 smallest_pow2_greater_than (int val)
18082 int ret = 1;
18083 while (ret <= val)
18084 ret <<= 1;
18085 return ret;
18088 /* Expand string move (memcpy) operation. Use i386 string operations when
18089 profitable. expand_setmem contains similar code. The code depends upon
18090 architecture, block size and alignment, but always has the same
18091 overall structure:
18093 1) Prologue guard: Conditional that jumps up to epilogues for small
18094 blocks that can be handled by epilogue alone. This is faster but
18095 also needed for correctness, since prologue assume the block is larger
18096 than the desired alignment.
18098 Optional dynamic check for size and libcall for large
18099 blocks is emitted here too, with -minline-stringops-dynamically.
18101 2) Prologue: copy first few bytes in order to get destination aligned
18102 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18103 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18104 We emit either a jump tree on power of two sized blocks, or a byte loop.
18106 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18107 with specified algorithm.
18109 4) Epilogue: code copying tail of the block that is too small to be
18110 handled by main body (or up to size guarded by prologue guard). */
18113 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18114 rtx expected_align_exp, rtx expected_size_exp)
18116 rtx destreg;
18117 rtx srcreg;
18118 rtx label = NULL;
18119 rtx tmp;
18120 rtx jump_around_label = NULL;
18121 HOST_WIDE_INT align = 1;
18122 unsigned HOST_WIDE_INT count = 0;
18123 HOST_WIDE_INT expected_size = -1;
18124 int size_needed = 0, epilogue_size_needed;
18125 int desired_align = 0, align_bytes = 0;
18126 enum stringop_alg alg;
18127 int dynamic_check;
18128 bool need_zero_guard = false;
18130 if (CONST_INT_P (align_exp))
18131 align = INTVAL (align_exp);
18132 /* i386 can do misaligned access on reasonably increased cost. */
18133 if (CONST_INT_P (expected_align_exp)
18134 && INTVAL (expected_align_exp) > align)
18135 align = INTVAL (expected_align_exp);
18136 /* ALIGN is the minimum of destination and source alignment, but we care here
18137 just about destination alignment. */
18138 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18139 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18141 if (CONST_INT_P (count_exp))
18142 count = expected_size = INTVAL (count_exp);
18143 if (CONST_INT_P (expected_size_exp) && count == 0)
18144 expected_size = INTVAL (expected_size_exp);
18146 /* Make sure we don't need to care about overflow later on. */
18147 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18148 return 0;
18150 /* Step 0: Decide on preferred algorithm, desired alignment and
18151 size of chunks to be copied by main loop. */
18153 alg = decide_alg (count, expected_size, false, &dynamic_check);
18154 desired_align = decide_alignment (align, alg, expected_size);
18156 if (!TARGET_ALIGN_STRINGOPS)
18157 align = desired_align;
18159 if (alg == libcall)
18160 return 0;
18161 gcc_assert (alg != no_stringop);
18162 if (!count)
18163 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18164 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18165 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18166 switch (alg)
18168 case libcall:
18169 case no_stringop:
18170 gcc_unreachable ();
18171 case loop:
18172 need_zero_guard = true;
18173 size_needed = GET_MODE_SIZE (Pmode);
18174 break;
18175 case unrolled_loop:
18176 need_zero_guard = true;
18177 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18178 break;
18179 case rep_prefix_8_byte:
18180 size_needed = 8;
18181 break;
18182 case rep_prefix_4_byte:
18183 size_needed = 4;
18184 break;
18185 case rep_prefix_1_byte:
18186 size_needed = 1;
18187 break;
18188 case loop_1_byte:
18189 need_zero_guard = true;
18190 size_needed = 1;
18191 break;
18194 epilogue_size_needed = size_needed;
18196 /* Step 1: Prologue guard. */
18198 /* Alignment code needs count to be in register. */
18199 if (CONST_INT_P (count_exp) && desired_align > align)
18201 if (INTVAL (count_exp) > desired_align
18202 && INTVAL (count_exp) > size_needed)
18204 align_bytes
18205 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18206 if (align_bytes <= 0)
18207 align_bytes = 0;
18208 else
18209 align_bytes = desired_align - align_bytes;
18211 if (align_bytes == 0)
18212 count_exp = force_reg (counter_mode (count_exp), count_exp);
18214 gcc_assert (desired_align >= 1 && align >= 1);
18216 /* Ensure that alignment prologue won't copy past end of block. */
18217 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18219 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18220 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18221 Make sure it is power of 2. */
18222 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18224 if (count)
18226 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18228 /* If main algorithm works on QImode, no epilogue is needed.
18229 For small sizes just don't align anything. */
18230 if (size_needed == 1)
18231 desired_align = align;
18232 else
18233 goto epilogue;
18236 else
18238 label = gen_label_rtx ();
18239 emit_cmp_and_jump_insns (count_exp,
18240 GEN_INT (epilogue_size_needed),
18241 LTU, 0, counter_mode (count_exp), 1, label);
18242 if (expected_size == -1 || expected_size < epilogue_size_needed)
18243 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18244 else
18245 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18249 /* Emit code to decide on runtime whether library call or inline should be
18250 used. */
18251 if (dynamic_check != -1)
18253 if (CONST_INT_P (count_exp))
18255 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18257 emit_block_move_via_libcall (dst, src, count_exp, false);
18258 count_exp = const0_rtx;
18259 goto epilogue;
18262 else
18264 rtx hot_label = gen_label_rtx ();
18265 jump_around_label = gen_label_rtx ();
18266 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18267 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18268 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18269 emit_block_move_via_libcall (dst, src, count_exp, false);
18270 emit_jump (jump_around_label);
18271 emit_label (hot_label);
18275 /* Step 2: Alignment prologue. */
18277 if (desired_align > align)
18279 if (align_bytes == 0)
18281 /* Except for the first move in epilogue, we no longer know
18282 constant offset in aliasing info. It don't seems to worth
18283 the pain to maintain it for the first move, so throw away
18284 the info early. */
18285 src = change_address (src, BLKmode, srcreg);
18286 dst = change_address (dst, BLKmode, destreg);
18287 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18288 desired_align);
18290 else
18292 /* If we know how many bytes need to be stored before dst is
18293 sufficiently aligned, maintain aliasing info accurately. */
18294 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18295 desired_align, align_bytes);
18296 count_exp = plus_constant (count_exp, -align_bytes);
18297 count -= align_bytes;
18299 if (need_zero_guard
18300 && (count < (unsigned HOST_WIDE_INT) size_needed
18301 || (align_bytes == 0
18302 && count < ((unsigned HOST_WIDE_INT) size_needed
18303 + desired_align - align))))
18305 /* It is possible that we copied enough so the main loop will not
18306 execute. */
18307 gcc_assert (size_needed > 1);
18308 if (label == NULL_RTX)
18309 label = gen_label_rtx ();
18310 emit_cmp_and_jump_insns (count_exp,
18311 GEN_INT (size_needed),
18312 LTU, 0, counter_mode (count_exp), 1, label);
18313 if (expected_size == -1
18314 || expected_size < (desired_align - align) / 2 + size_needed)
18315 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18316 else
18317 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18320 if (label && size_needed == 1)
18322 emit_label (label);
18323 LABEL_NUSES (label) = 1;
18324 label = NULL;
18325 epilogue_size_needed = 1;
18327 else if (label == NULL_RTX)
18328 epilogue_size_needed = size_needed;
18330 /* Step 3: Main loop. */
18332 switch (alg)
18334 case libcall:
18335 case no_stringop:
18336 gcc_unreachable ();
18337 case loop_1_byte:
18338 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18339 count_exp, QImode, 1, expected_size);
18340 break;
18341 case loop:
18342 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18343 count_exp, Pmode, 1, expected_size);
18344 break;
18345 case unrolled_loop:
18346 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18347 registers for 4 temporaries anyway. */
18348 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18349 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18350 expected_size);
18351 break;
18352 case rep_prefix_8_byte:
18353 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18354 DImode);
18355 break;
18356 case rep_prefix_4_byte:
18357 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18358 SImode);
18359 break;
18360 case rep_prefix_1_byte:
18361 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18362 QImode);
18363 break;
18365 /* Adjust properly the offset of src and dest memory for aliasing. */
18366 if (CONST_INT_P (count_exp))
18368 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18369 (count / size_needed) * size_needed);
18370 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18371 (count / size_needed) * size_needed);
18373 else
18375 src = change_address (src, BLKmode, srcreg);
18376 dst = change_address (dst, BLKmode, destreg);
18379 /* Step 4: Epilogue to copy the remaining bytes. */
18380 epilogue:
18381 if (label)
18383 /* When the main loop is done, COUNT_EXP might hold original count,
18384 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18385 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18386 bytes. Compensate if needed. */
18388 if (size_needed < epilogue_size_needed)
18390 tmp =
18391 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18392 GEN_INT (size_needed - 1), count_exp, 1,
18393 OPTAB_DIRECT);
18394 if (tmp != count_exp)
18395 emit_move_insn (count_exp, tmp);
18397 emit_label (label);
18398 LABEL_NUSES (label) = 1;
18401 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18402 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18403 epilogue_size_needed);
18404 if (jump_around_label)
18405 emit_label (jump_around_label);
18406 return 1;
18409 /* Helper function for memcpy. For QImode value 0xXY produce
18410 0xXYXYXYXY of wide specified by MODE. This is essentially
18411 a * 0x10101010, but we can do slightly better than
18412 synth_mult by unwinding the sequence by hand on CPUs with
18413 slow multiply. */
18414 static rtx
18415 promote_duplicated_reg (enum machine_mode mode, rtx val)
18417 enum machine_mode valmode = GET_MODE (val);
18418 rtx tmp;
18419 int nops = mode == DImode ? 3 : 2;
18421 gcc_assert (mode == SImode || mode == DImode);
18422 if (val == const0_rtx)
18423 return copy_to_mode_reg (mode, const0_rtx);
18424 if (CONST_INT_P (val))
18426 HOST_WIDE_INT v = INTVAL (val) & 255;
18428 v |= v << 8;
18429 v |= v << 16;
18430 if (mode == DImode)
18431 v |= (v << 16) << 16;
18432 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18435 if (valmode == VOIDmode)
18436 valmode = QImode;
18437 if (valmode != QImode)
18438 val = gen_lowpart (QImode, val);
18439 if (mode == QImode)
18440 return val;
18441 if (!TARGET_PARTIAL_REG_STALL)
18442 nops--;
18443 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18444 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18445 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18446 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18448 rtx reg = convert_modes (mode, QImode, val, true);
18449 tmp = promote_duplicated_reg (mode, const1_rtx);
18450 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18451 OPTAB_DIRECT);
18453 else
18455 rtx reg = convert_modes (mode, QImode, val, true);
18457 if (!TARGET_PARTIAL_REG_STALL)
18458 if (mode == SImode)
18459 emit_insn (gen_movsi_insv_1 (reg, reg));
18460 else
18461 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18462 else
18464 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18465 NULL, 1, OPTAB_DIRECT);
18466 reg =
18467 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18469 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18470 NULL, 1, OPTAB_DIRECT);
18471 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18472 if (mode == SImode)
18473 return reg;
18474 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18475 NULL, 1, OPTAB_DIRECT);
18476 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18477 return reg;
18481 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18482 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18483 alignment from ALIGN to DESIRED_ALIGN. */
18484 static rtx
18485 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18487 rtx promoted_val;
18489 if (TARGET_64BIT
18490 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18491 promoted_val = promote_duplicated_reg (DImode, val);
18492 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18493 promoted_val = promote_duplicated_reg (SImode, val);
18494 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18495 promoted_val = promote_duplicated_reg (HImode, val);
18496 else
18497 promoted_val = val;
18499 return promoted_val;
18502 /* Expand string clear operation (bzero). Use i386 string operations when
18503 profitable. See expand_movmem comment for explanation of individual
18504 steps performed. */
18506 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18507 rtx expected_align_exp, rtx expected_size_exp)
18509 rtx destreg;
18510 rtx label = NULL;
18511 rtx tmp;
18512 rtx jump_around_label = NULL;
18513 HOST_WIDE_INT align = 1;
18514 unsigned HOST_WIDE_INT count = 0;
18515 HOST_WIDE_INT expected_size = -1;
18516 int size_needed = 0, epilogue_size_needed;
18517 int desired_align = 0, align_bytes = 0;
18518 enum stringop_alg alg;
18519 rtx promoted_val = NULL;
18520 bool force_loopy_epilogue = false;
18521 int dynamic_check;
18522 bool need_zero_guard = false;
18524 if (CONST_INT_P (align_exp))
18525 align = INTVAL (align_exp);
18526 /* i386 can do misaligned access on reasonably increased cost. */
18527 if (CONST_INT_P (expected_align_exp)
18528 && INTVAL (expected_align_exp) > align)
18529 align = INTVAL (expected_align_exp);
18530 if (CONST_INT_P (count_exp))
18531 count = expected_size = INTVAL (count_exp);
18532 if (CONST_INT_P (expected_size_exp) && count == 0)
18533 expected_size = INTVAL (expected_size_exp);
18535 /* Make sure we don't need to care about overflow later on. */
18536 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18537 return 0;
18539 /* Step 0: Decide on preferred algorithm, desired alignment and
18540 size of chunks to be copied by main loop. */
18542 alg = decide_alg (count, expected_size, true, &dynamic_check);
18543 desired_align = decide_alignment (align, alg, expected_size);
18545 if (!TARGET_ALIGN_STRINGOPS)
18546 align = desired_align;
18548 if (alg == libcall)
18549 return 0;
18550 gcc_assert (alg != no_stringop);
18551 if (!count)
18552 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18553 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18554 switch (alg)
18556 case libcall:
18557 case no_stringop:
18558 gcc_unreachable ();
18559 case loop:
18560 need_zero_guard = true;
18561 size_needed = GET_MODE_SIZE (Pmode);
18562 break;
18563 case unrolled_loop:
18564 need_zero_guard = true;
18565 size_needed = GET_MODE_SIZE (Pmode) * 4;
18566 break;
18567 case rep_prefix_8_byte:
18568 size_needed = 8;
18569 break;
18570 case rep_prefix_4_byte:
18571 size_needed = 4;
18572 break;
18573 case rep_prefix_1_byte:
18574 size_needed = 1;
18575 break;
18576 case loop_1_byte:
18577 need_zero_guard = true;
18578 size_needed = 1;
18579 break;
18581 epilogue_size_needed = size_needed;
18583 /* Step 1: Prologue guard. */
18585 /* Alignment code needs count to be in register. */
18586 if (CONST_INT_P (count_exp) && desired_align > align)
18588 if (INTVAL (count_exp) > desired_align
18589 && INTVAL (count_exp) > size_needed)
18591 align_bytes
18592 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18593 if (align_bytes <= 0)
18594 align_bytes = 0;
18595 else
18596 align_bytes = desired_align - align_bytes;
18598 if (align_bytes == 0)
18600 enum machine_mode mode = SImode;
18601 if (TARGET_64BIT && (count & ~0xffffffff))
18602 mode = DImode;
18603 count_exp = force_reg (mode, count_exp);
18606 /* Do the cheap promotion to allow better CSE across the
18607 main loop and epilogue (ie one load of the big constant in the
18608 front of all code. */
18609 if (CONST_INT_P (val_exp))
18610 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18611 desired_align, align);
18612 /* Ensure that alignment prologue won't copy past end of block. */
18613 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18615 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18616 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18617 Make sure it is power of 2. */
18618 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18620 /* To improve performance of small blocks, we jump around the VAL
18621 promoting mode. This mean that if the promoted VAL is not constant,
18622 we might not use it in the epilogue and have to use byte
18623 loop variant. */
18624 if (epilogue_size_needed > 2 && !promoted_val)
18625 force_loopy_epilogue = true;
18626 if (count)
18628 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18630 /* If main algorithm works on QImode, no epilogue is needed.
18631 For small sizes just don't align anything. */
18632 if (size_needed == 1)
18633 desired_align = align;
18634 else
18635 goto epilogue;
18638 else
18640 label = gen_label_rtx ();
18641 emit_cmp_and_jump_insns (count_exp,
18642 GEN_INT (epilogue_size_needed),
18643 LTU, 0, counter_mode (count_exp), 1, label);
18644 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18645 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18646 else
18647 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18650 if (dynamic_check != -1)
18652 rtx hot_label = gen_label_rtx ();
18653 jump_around_label = gen_label_rtx ();
18654 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18655 LEU, 0, counter_mode (count_exp), 1, hot_label);
18656 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18657 set_storage_via_libcall (dst, count_exp, val_exp, false);
18658 emit_jump (jump_around_label);
18659 emit_label (hot_label);
18662 /* Step 2: Alignment prologue. */
18664 /* Do the expensive promotion once we branched off the small blocks. */
18665 if (!promoted_val)
18666 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18667 desired_align, align);
18668 gcc_assert (desired_align >= 1 && align >= 1);
18670 if (desired_align > align)
18672 if (align_bytes == 0)
18674 /* Except for the first move in epilogue, we no longer know
18675 constant offset in aliasing info. It don't seems to worth
18676 the pain to maintain it for the first move, so throw away
18677 the info early. */
18678 dst = change_address (dst, BLKmode, destreg);
18679 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18680 desired_align);
18682 else
18684 /* If we know how many bytes need to be stored before dst is
18685 sufficiently aligned, maintain aliasing info accurately. */
18686 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18687 desired_align, align_bytes);
18688 count_exp = plus_constant (count_exp, -align_bytes);
18689 count -= align_bytes;
18691 if (need_zero_guard
18692 && (count < (unsigned HOST_WIDE_INT) size_needed
18693 || (align_bytes == 0
18694 && count < ((unsigned HOST_WIDE_INT) size_needed
18695 + desired_align - align))))
18697 /* It is possible that we copied enough so the main loop will not
18698 execute. */
18699 gcc_assert (size_needed > 1);
18700 if (label == NULL_RTX)
18701 label = gen_label_rtx ();
18702 emit_cmp_and_jump_insns (count_exp,
18703 GEN_INT (size_needed),
18704 LTU, 0, counter_mode (count_exp), 1, label);
18705 if (expected_size == -1
18706 || expected_size < (desired_align - align) / 2 + size_needed)
18707 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18708 else
18709 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18712 if (label && size_needed == 1)
18714 emit_label (label);
18715 LABEL_NUSES (label) = 1;
18716 label = NULL;
18717 promoted_val = val_exp;
18718 epilogue_size_needed = 1;
18720 else if (label == NULL_RTX)
18721 epilogue_size_needed = size_needed;
18723 /* Step 3: Main loop. */
18725 switch (alg)
18727 case libcall:
18728 case no_stringop:
18729 gcc_unreachable ();
18730 case loop_1_byte:
18731 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18732 count_exp, QImode, 1, expected_size);
18733 break;
18734 case loop:
18735 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18736 count_exp, Pmode, 1, expected_size);
18737 break;
18738 case unrolled_loop:
18739 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18740 count_exp, Pmode, 4, expected_size);
18741 break;
18742 case rep_prefix_8_byte:
18743 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18744 DImode, val_exp);
18745 break;
18746 case rep_prefix_4_byte:
18747 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18748 SImode, val_exp);
18749 break;
18750 case rep_prefix_1_byte:
18751 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18752 QImode, val_exp);
18753 break;
18755 /* Adjust properly the offset of src and dest memory for aliasing. */
18756 if (CONST_INT_P (count_exp))
18757 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18758 (count / size_needed) * size_needed);
18759 else
18760 dst = change_address (dst, BLKmode, destreg);
18762 /* Step 4: Epilogue to copy the remaining bytes. */
18764 if (label)
18766 /* When the main loop is done, COUNT_EXP might hold original count,
18767 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18768 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18769 bytes. Compensate if needed. */
18771 if (size_needed < epilogue_size_needed)
18773 tmp =
18774 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18775 GEN_INT (size_needed - 1), count_exp, 1,
18776 OPTAB_DIRECT);
18777 if (tmp != count_exp)
18778 emit_move_insn (count_exp, tmp);
18780 emit_label (label);
18781 LABEL_NUSES (label) = 1;
18783 epilogue:
18784 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18786 if (force_loopy_epilogue)
18787 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18788 epilogue_size_needed);
18789 else
18790 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18791 epilogue_size_needed);
18793 if (jump_around_label)
18794 emit_label (jump_around_label);
18795 return 1;
18798 /* Expand the appropriate insns for doing strlen if not just doing
18799 repnz; scasb
18801 out = result, initialized with the start address
18802 align_rtx = alignment of the address.
18803 scratch = scratch register, initialized with the startaddress when
18804 not aligned, otherwise undefined
18806 This is just the body. It needs the initializations mentioned above and
18807 some address computing at the end. These things are done in i386.md. */
18809 static void
18810 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18812 int align;
18813 rtx tmp;
18814 rtx align_2_label = NULL_RTX;
18815 rtx align_3_label = NULL_RTX;
18816 rtx align_4_label = gen_label_rtx ();
18817 rtx end_0_label = gen_label_rtx ();
18818 rtx mem;
18819 rtx tmpreg = gen_reg_rtx (SImode);
18820 rtx scratch = gen_reg_rtx (SImode);
18821 rtx cmp;
18823 align = 0;
18824 if (CONST_INT_P (align_rtx))
18825 align = INTVAL (align_rtx);
18827 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18829 /* Is there a known alignment and is it less than 4? */
18830 if (align < 4)
18832 rtx scratch1 = gen_reg_rtx (Pmode);
18833 emit_move_insn (scratch1, out);
18834 /* Is there a known alignment and is it not 2? */
18835 if (align != 2)
18837 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18838 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18840 /* Leave just the 3 lower bits. */
18841 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18842 NULL_RTX, 0, OPTAB_WIDEN);
18844 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18845 Pmode, 1, align_4_label);
18846 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18847 Pmode, 1, align_2_label);
18848 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18849 Pmode, 1, align_3_label);
18851 else
18853 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18854 check if is aligned to 4 - byte. */
18856 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18857 NULL_RTX, 0, OPTAB_WIDEN);
18859 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18860 Pmode, 1, align_4_label);
18863 mem = change_address (src, QImode, out);
18865 /* Now compare the bytes. */
18867 /* Compare the first n unaligned byte on a byte per byte basis. */
18868 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18869 QImode, 1, end_0_label);
18871 /* Increment the address. */
18872 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18874 /* Not needed with an alignment of 2 */
18875 if (align != 2)
18877 emit_label (align_2_label);
18879 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18880 end_0_label);
18882 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18884 emit_label (align_3_label);
18887 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18888 end_0_label);
18890 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18893 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18894 align this loop. It gives only huge programs, but does not help to
18895 speed up. */
18896 emit_label (align_4_label);
18898 mem = change_address (src, SImode, out);
18899 emit_move_insn (scratch, mem);
18900 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18902 /* This formula yields a nonzero result iff one of the bytes is zero.
18903 This saves three branches inside loop and many cycles. */
18905 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18906 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18907 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18908 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18909 gen_int_mode (0x80808080, SImode)));
18910 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18911 align_4_label);
18913 if (TARGET_CMOVE)
18915 rtx reg = gen_reg_rtx (SImode);
18916 rtx reg2 = gen_reg_rtx (Pmode);
18917 emit_move_insn (reg, tmpreg);
18918 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18920 /* If zero is not in the first two bytes, move two bytes forward. */
18921 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18922 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18923 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18924 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18925 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18926 reg,
18927 tmpreg)));
18928 /* Emit lea manually to avoid clobbering of flags. */
18929 emit_insn (gen_rtx_SET (SImode, reg2,
18930 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18932 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18933 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18934 emit_insn (gen_rtx_SET (VOIDmode, out,
18935 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18936 reg2,
18937 out)));
18940 else
18942 rtx end_2_label = gen_label_rtx ();
18943 /* Is zero in the first two bytes? */
18945 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18946 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18947 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18948 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18949 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18950 pc_rtx);
18951 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18952 JUMP_LABEL (tmp) = end_2_label;
18954 /* Not in the first two. Move two bytes forward. */
18955 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18956 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18958 emit_label (end_2_label);
18962 /* Avoid branch in fixing the byte. */
18963 tmpreg = gen_lowpart (QImode, tmpreg);
18964 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18965 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18966 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18968 emit_label (end_0_label);
18971 /* Expand strlen. */
18974 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18976 rtx addr, scratch1, scratch2, scratch3, scratch4;
18978 /* The generic case of strlen expander is long. Avoid it's
18979 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18981 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18982 && !TARGET_INLINE_ALL_STRINGOPS
18983 && !optimize_insn_for_size_p ()
18984 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18985 return 0;
18987 addr = force_reg (Pmode, XEXP (src, 0));
18988 scratch1 = gen_reg_rtx (Pmode);
18990 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18991 && !optimize_insn_for_size_p ())
18993 /* Well it seems that some optimizer does not combine a call like
18994 foo(strlen(bar), strlen(bar));
18995 when the move and the subtraction is done here. It does calculate
18996 the length just once when these instructions are done inside of
18997 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18998 often used and I use one fewer register for the lifetime of
18999 output_strlen_unroll() this is better. */
19001 emit_move_insn (out, addr);
19003 ix86_expand_strlensi_unroll_1 (out, src, align);
19005 /* strlensi_unroll_1 returns the address of the zero at the end of
19006 the string, like memchr(), so compute the length by subtracting
19007 the start address. */
19008 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19010 else
19012 rtx unspec;
19014 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19015 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19016 return false;
19018 scratch2 = gen_reg_rtx (Pmode);
19019 scratch3 = gen_reg_rtx (Pmode);
19020 scratch4 = force_reg (Pmode, constm1_rtx);
19022 emit_move_insn (scratch3, addr);
19023 eoschar = force_reg (QImode, eoschar);
19025 src = replace_equiv_address_nv (src, scratch3);
19027 /* If .md starts supporting :P, this can be done in .md. */
19028 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19029 scratch4), UNSPEC_SCAS);
19030 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19031 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19032 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19034 return 1;
19037 /* For given symbol (function) construct code to compute address of it's PLT
19038 entry in large x86-64 PIC model. */
19040 construct_plt_address (rtx symbol)
19042 rtx tmp = gen_reg_rtx (Pmode);
19043 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19045 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19046 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19048 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19049 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19050 return tmp;
19053 void
19054 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19055 rtx callarg2,
19056 rtx pop, int sibcall)
19058 rtx use = NULL, call;
19060 if (pop == const0_rtx)
19061 pop = NULL;
19062 gcc_assert (!TARGET_64BIT || !pop);
19064 if (TARGET_MACHO && !TARGET_64BIT)
19066 #if TARGET_MACHO
19067 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19068 fnaddr = machopic_indirect_call_target (fnaddr);
19069 #endif
19071 else
19073 /* Static functions and indirect calls don't need the pic register. */
19074 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19075 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19076 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19077 use_reg (&use, pic_offset_table_rtx);
19080 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19082 rtx al = gen_rtx_REG (QImode, AX_REG);
19083 emit_move_insn (al, callarg2);
19084 use_reg (&use, al);
19087 if (ix86_cmodel == CM_LARGE_PIC
19088 && GET_CODE (fnaddr) == MEM
19089 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19090 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19091 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19092 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19094 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19095 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19097 if (sibcall && TARGET_64BIT
19098 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19100 rtx addr;
19101 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19102 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19103 emit_move_insn (fnaddr, addr);
19104 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19107 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19108 if (retval)
19109 call = gen_rtx_SET (VOIDmode, retval, call);
19110 if (pop)
19112 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19113 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19114 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19116 if (TARGET_64BIT
19117 && ix86_cfun_abi () == MS_ABI
19118 && (!callarg2 || INTVAL (callarg2) != -2))
19120 /* We need to represent that SI and DI registers are clobbered
19121 by SYSV calls. */
19122 static int clobbered_registers[] = {
19123 XMM6_REG, XMM7_REG, XMM8_REG,
19124 XMM9_REG, XMM10_REG, XMM11_REG,
19125 XMM12_REG, XMM13_REG, XMM14_REG,
19126 XMM15_REG, SI_REG, DI_REG
19128 unsigned int i;
19129 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19130 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19131 UNSPEC_MS_TO_SYSV_CALL);
19133 vec[0] = call;
19134 vec[1] = unspec;
19135 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19136 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19137 ? TImode : DImode,
19138 gen_rtx_REG
19139 (SSE_REGNO_P (clobbered_registers[i])
19140 ? TImode : DImode,
19141 clobbered_registers[i]));
19143 call = gen_rtx_PARALLEL (VOIDmode,
19144 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19145 + 2, vec));
19148 call = emit_call_insn (call);
19149 if (use)
19150 CALL_INSN_FUNCTION_USAGE (call) = use;
19154 /* Clear stack slot assignments remembered from previous functions.
19155 This is called from INIT_EXPANDERS once before RTL is emitted for each
19156 function. */
19158 static struct machine_function *
19159 ix86_init_machine_status (void)
19161 struct machine_function *f;
19163 f = GGC_CNEW (struct machine_function);
19164 f->use_fast_prologue_epilogue_nregs = -1;
19165 f->tls_descriptor_call_expanded_p = 0;
19166 f->call_abi = ix86_abi;
19168 return f;
19171 /* Return a MEM corresponding to a stack slot with mode MODE.
19172 Allocate a new slot if necessary.
19174 The RTL for a function can have several slots available: N is
19175 which slot to use. */
19178 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19180 struct stack_local_entry *s;
19182 gcc_assert (n < MAX_386_STACK_LOCALS);
19184 /* Virtual slot is valid only before vregs are instantiated. */
19185 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19187 for (s = ix86_stack_locals; s; s = s->next)
19188 if (s->mode == mode && s->n == n)
19189 return copy_rtx (s->rtl);
19191 s = (struct stack_local_entry *)
19192 ggc_alloc (sizeof (struct stack_local_entry));
19193 s->n = n;
19194 s->mode = mode;
19195 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19197 s->next = ix86_stack_locals;
19198 ix86_stack_locals = s;
19199 return s->rtl;
19202 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19204 static GTY(()) rtx ix86_tls_symbol;
19206 ix86_tls_get_addr (void)
19209 if (!ix86_tls_symbol)
19211 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19212 (TARGET_ANY_GNU_TLS
19213 && !TARGET_64BIT)
19214 ? "___tls_get_addr"
19215 : "__tls_get_addr");
19218 return ix86_tls_symbol;
19221 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19223 static GTY(()) rtx ix86_tls_module_base_symbol;
19225 ix86_tls_module_base (void)
19228 if (!ix86_tls_module_base_symbol)
19230 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19231 "_TLS_MODULE_BASE_");
19232 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19233 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19236 return ix86_tls_module_base_symbol;
19239 /* Calculate the length of the memory address in the instruction
19240 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19243 memory_address_length (rtx addr)
19245 struct ix86_address parts;
19246 rtx base, index, disp;
19247 int len;
19248 int ok;
19250 if (GET_CODE (addr) == PRE_DEC
19251 || GET_CODE (addr) == POST_INC
19252 || GET_CODE (addr) == PRE_MODIFY
19253 || GET_CODE (addr) == POST_MODIFY)
19254 return 0;
19256 ok = ix86_decompose_address (addr, &parts);
19257 gcc_assert (ok);
19259 if (parts.base && GET_CODE (parts.base) == SUBREG)
19260 parts.base = SUBREG_REG (parts.base);
19261 if (parts.index && GET_CODE (parts.index) == SUBREG)
19262 parts.index = SUBREG_REG (parts.index);
19264 base = parts.base;
19265 index = parts.index;
19266 disp = parts.disp;
19267 len = 0;
19269 /* Rule of thumb:
19270 - esp as the base always wants an index,
19271 - ebp as the base always wants a displacement. */
19273 /* Register Indirect. */
19274 if (base && !index && !disp)
19276 /* esp (for its index) and ebp (for its displacement) need
19277 the two-byte modrm form. */
19278 if (addr == stack_pointer_rtx
19279 || addr == arg_pointer_rtx
19280 || addr == frame_pointer_rtx
19281 || addr == hard_frame_pointer_rtx)
19282 len = 1;
19285 /* Direct Addressing. */
19286 else if (disp && !base && !index)
19287 len = 4;
19289 else
19291 /* Find the length of the displacement constant. */
19292 if (disp)
19294 if (base && satisfies_constraint_K (disp))
19295 len = 1;
19296 else
19297 len = 4;
19299 /* ebp always wants a displacement. */
19300 else if (base == hard_frame_pointer_rtx)
19301 len = 1;
19303 /* An index requires the two-byte modrm form.... */
19304 if (index
19305 /* ...like esp, which always wants an index. */
19306 || base == stack_pointer_rtx
19307 || base == arg_pointer_rtx
19308 || base == frame_pointer_rtx)
19309 len += 1;
19312 return len;
19315 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19316 is set, expect that insn have 8bit immediate alternative. */
19318 ix86_attr_length_immediate_default (rtx insn, int shortform)
19320 int len = 0;
19321 int i;
19322 extract_insn_cached (insn);
19323 for (i = recog_data.n_operands - 1; i >= 0; --i)
19324 if (CONSTANT_P (recog_data.operand[i]))
19326 gcc_assert (!len);
19327 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19328 len = 1;
19329 else
19331 switch (get_attr_mode (insn))
19333 case MODE_QI:
19334 len+=1;
19335 break;
19336 case MODE_HI:
19337 len+=2;
19338 break;
19339 case MODE_SI:
19340 len+=4;
19341 break;
19342 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19343 case MODE_DI:
19344 len+=4;
19345 break;
19346 default:
19347 fatal_insn ("unknown insn mode", insn);
19351 return len;
19353 /* Compute default value for "length_address" attribute. */
19355 ix86_attr_length_address_default (rtx insn)
19357 int i;
19359 if (get_attr_type (insn) == TYPE_LEA)
19361 rtx set = PATTERN (insn);
19363 if (GET_CODE (set) == PARALLEL)
19364 set = XVECEXP (set, 0, 0);
19366 gcc_assert (GET_CODE (set) == SET);
19368 return memory_address_length (SET_SRC (set));
19371 extract_insn_cached (insn);
19372 for (i = recog_data.n_operands - 1; i >= 0; --i)
19373 if (MEM_P (recog_data.operand[i]))
19375 return memory_address_length (XEXP (recog_data.operand[i], 0));
19376 break;
19378 return 0;
19381 /* Compute default value for "length_vex" attribute. It includes
19382 2 or 3 byte VEX prefix and 1 opcode byte. */
19385 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19386 int has_vex_w)
19388 int i;
19390 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19391 byte VEX prefix. */
19392 if (!has_0f_opcode || has_vex_w)
19393 return 3 + 1;
19395 /* We can always use 2 byte VEX prefix in 32bit. */
19396 if (!TARGET_64BIT)
19397 return 2 + 1;
19399 extract_insn_cached (insn);
19401 for (i = recog_data.n_operands - 1; i >= 0; --i)
19402 if (REG_P (recog_data.operand[i]))
19404 /* REX.W bit uses 3 byte VEX prefix. */
19405 if (GET_MODE (recog_data.operand[i]) == DImode)
19406 return 3 + 1;
19408 else
19410 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19411 if (MEM_P (recog_data.operand[i])
19412 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19413 return 3 + 1;
19416 return 2 + 1;
19419 /* Return the maximum number of instructions a cpu can issue. */
19421 static int
19422 ix86_issue_rate (void)
19424 switch (ix86_tune)
19426 case PROCESSOR_PENTIUM:
19427 case PROCESSOR_ATOM:
19428 case PROCESSOR_K6:
19429 return 2;
19431 case PROCESSOR_PENTIUMPRO:
19432 case PROCESSOR_PENTIUM4:
19433 case PROCESSOR_ATHLON:
19434 case PROCESSOR_K8:
19435 case PROCESSOR_AMDFAM10:
19436 case PROCESSOR_NOCONA:
19437 case PROCESSOR_GENERIC32:
19438 case PROCESSOR_GENERIC64:
19439 return 3;
19441 case PROCESSOR_CORE2:
19442 return 4;
19444 default:
19445 return 1;
19449 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19450 by DEP_INSN and nothing set by DEP_INSN. */
19452 static int
19453 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19455 rtx set, set2;
19457 /* Simplify the test for uninteresting insns. */
19458 if (insn_type != TYPE_SETCC
19459 && insn_type != TYPE_ICMOV
19460 && insn_type != TYPE_FCMOV
19461 && insn_type != TYPE_IBR)
19462 return 0;
19464 if ((set = single_set (dep_insn)) != 0)
19466 set = SET_DEST (set);
19467 set2 = NULL_RTX;
19469 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19470 && XVECLEN (PATTERN (dep_insn), 0) == 2
19471 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19472 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19474 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19475 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19477 else
19478 return 0;
19480 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19481 return 0;
19483 /* This test is true if the dependent insn reads the flags but
19484 not any other potentially set register. */
19485 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19486 return 0;
19488 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19489 return 0;
19491 return 1;
19494 /* Return true iff USE_INSN has a memory address with operands set by
19495 SET_INSN. */
19497 bool
19498 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19500 int i;
19501 extract_insn_cached (use_insn);
19502 for (i = recog_data.n_operands - 1; i >= 0; --i)
19503 if (MEM_P (recog_data.operand[i]))
19505 rtx addr = XEXP (recog_data.operand[i], 0);
19506 return modified_in_p (addr, set_insn) != 0;
19508 return false;
19511 static int
19512 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19514 enum attr_type insn_type, dep_insn_type;
19515 enum attr_memory memory;
19516 rtx set, set2;
19517 int dep_insn_code_number;
19519 /* Anti and output dependencies have zero cost on all CPUs. */
19520 if (REG_NOTE_KIND (link) != 0)
19521 return 0;
19523 dep_insn_code_number = recog_memoized (dep_insn);
19525 /* If we can't recognize the insns, we can't really do anything. */
19526 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19527 return cost;
19529 insn_type = get_attr_type (insn);
19530 dep_insn_type = get_attr_type (dep_insn);
19532 switch (ix86_tune)
19534 case PROCESSOR_PENTIUM:
19535 /* Address Generation Interlock adds a cycle of latency. */
19536 if (insn_type == TYPE_LEA)
19538 rtx addr = PATTERN (insn);
19540 if (GET_CODE (addr) == PARALLEL)
19541 addr = XVECEXP (addr, 0, 0);
19543 gcc_assert (GET_CODE (addr) == SET);
19545 addr = SET_SRC (addr);
19546 if (modified_in_p (addr, dep_insn))
19547 cost += 1;
19549 else if (ix86_agi_dependent (dep_insn, insn))
19550 cost += 1;
19552 /* ??? Compares pair with jump/setcc. */
19553 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19554 cost = 0;
19556 /* Floating point stores require value to be ready one cycle earlier. */
19557 if (insn_type == TYPE_FMOV
19558 && get_attr_memory (insn) == MEMORY_STORE
19559 && !ix86_agi_dependent (dep_insn, insn))
19560 cost += 1;
19561 break;
19563 case PROCESSOR_PENTIUMPRO:
19564 memory = get_attr_memory (insn);
19566 /* INT->FP conversion is expensive. */
19567 if (get_attr_fp_int_src (dep_insn))
19568 cost += 5;
19570 /* There is one cycle extra latency between an FP op and a store. */
19571 if (insn_type == TYPE_FMOV
19572 && (set = single_set (dep_insn)) != NULL_RTX
19573 && (set2 = single_set (insn)) != NULL_RTX
19574 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19575 && MEM_P (SET_DEST (set2)))
19576 cost += 1;
19578 /* Show ability of reorder buffer to hide latency of load by executing
19579 in parallel with previous instruction in case
19580 previous instruction is not needed to compute the address. */
19581 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19582 && !ix86_agi_dependent (dep_insn, insn))
19584 /* Claim moves to take one cycle, as core can issue one load
19585 at time and the next load can start cycle later. */
19586 if (dep_insn_type == TYPE_IMOV
19587 || dep_insn_type == TYPE_FMOV)
19588 cost = 1;
19589 else if (cost > 1)
19590 cost--;
19592 break;
19594 case PROCESSOR_K6:
19595 memory = get_attr_memory (insn);
19597 /* The esp dependency is resolved before the instruction is really
19598 finished. */
19599 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19600 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19601 return 1;
19603 /* INT->FP conversion is expensive. */
19604 if (get_attr_fp_int_src (dep_insn))
19605 cost += 5;
19607 /* Show ability of reorder buffer to hide latency of load by executing
19608 in parallel with previous instruction in case
19609 previous instruction is not needed to compute the address. */
19610 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19611 && !ix86_agi_dependent (dep_insn, insn))
19613 /* Claim moves to take one cycle, as core can issue one load
19614 at time and the next load can start cycle later. */
19615 if (dep_insn_type == TYPE_IMOV
19616 || dep_insn_type == TYPE_FMOV)
19617 cost = 1;
19618 else if (cost > 2)
19619 cost -= 2;
19620 else
19621 cost = 1;
19623 break;
19625 case PROCESSOR_ATHLON:
19626 case PROCESSOR_K8:
19627 case PROCESSOR_AMDFAM10:
19628 case PROCESSOR_ATOM:
19629 case PROCESSOR_GENERIC32:
19630 case PROCESSOR_GENERIC64:
19631 memory = get_attr_memory (insn);
19633 /* Show ability of reorder buffer to hide latency of load by executing
19634 in parallel with previous instruction in case
19635 previous instruction is not needed to compute the address. */
19636 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19637 && !ix86_agi_dependent (dep_insn, insn))
19639 enum attr_unit unit = get_attr_unit (insn);
19640 int loadcost = 3;
19642 /* Because of the difference between the length of integer and
19643 floating unit pipeline preparation stages, the memory operands
19644 for floating point are cheaper.
19646 ??? For Athlon it the difference is most probably 2. */
19647 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19648 loadcost = 3;
19649 else
19650 loadcost = TARGET_ATHLON ? 2 : 0;
19652 if (cost >= loadcost)
19653 cost -= loadcost;
19654 else
19655 cost = 0;
19658 default:
19659 break;
19662 return cost;
19665 /* How many alternative schedules to try. This should be as wide as the
19666 scheduling freedom in the DFA, but no wider. Making this value too
19667 large results extra work for the scheduler. */
19669 static int
19670 ia32_multipass_dfa_lookahead (void)
19672 switch (ix86_tune)
19674 case PROCESSOR_PENTIUM:
19675 return 2;
19677 case PROCESSOR_PENTIUMPRO:
19678 case PROCESSOR_K6:
19679 return 1;
19681 default:
19682 return 0;
19687 /* Compute the alignment given to a constant that is being placed in memory.
19688 EXP is the constant and ALIGN is the alignment that the object would
19689 ordinarily have.
19690 The value of this function is used instead of that alignment to align
19691 the object. */
19694 ix86_constant_alignment (tree exp, int align)
19696 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19697 || TREE_CODE (exp) == INTEGER_CST)
19699 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19700 return 64;
19701 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19702 return 128;
19704 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19705 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19706 return BITS_PER_WORD;
19708 return align;
19711 /* Compute the alignment for a static variable.
19712 TYPE is the data type, and ALIGN is the alignment that
19713 the object would ordinarily have. The value of this function is used
19714 instead of that alignment to align the object. */
19717 ix86_data_alignment (tree type, int align)
19719 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19721 if (AGGREGATE_TYPE_P (type)
19722 && TYPE_SIZE (type)
19723 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19724 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19725 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19726 && align < max_align)
19727 align = max_align;
19729 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19730 to 16byte boundary. */
19731 if (TARGET_64BIT)
19733 if (AGGREGATE_TYPE_P (type)
19734 && TYPE_SIZE (type)
19735 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19736 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19737 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19738 return 128;
19741 if (TREE_CODE (type) == ARRAY_TYPE)
19743 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19744 return 64;
19745 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19746 return 128;
19748 else if (TREE_CODE (type) == COMPLEX_TYPE)
19751 if (TYPE_MODE (type) == DCmode && align < 64)
19752 return 64;
19753 if ((TYPE_MODE (type) == XCmode
19754 || TYPE_MODE (type) == TCmode) && align < 128)
19755 return 128;
19757 else if ((TREE_CODE (type) == RECORD_TYPE
19758 || TREE_CODE (type) == UNION_TYPE
19759 || TREE_CODE (type) == QUAL_UNION_TYPE)
19760 && TYPE_FIELDS (type))
19762 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19763 return 64;
19764 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19765 return 128;
19767 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19768 || TREE_CODE (type) == INTEGER_TYPE)
19770 if (TYPE_MODE (type) == DFmode && align < 64)
19771 return 64;
19772 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19773 return 128;
19776 return align;
19779 /* Compute the alignment for a local variable or a stack slot. EXP is
19780 the data type or decl itself, MODE is the widest mode available and
19781 ALIGN is the alignment that the object would ordinarily have. The
19782 value of this macro is used instead of that alignment to align the
19783 object. */
19785 unsigned int
19786 ix86_local_alignment (tree exp, enum machine_mode mode,
19787 unsigned int align)
19789 tree type, decl;
19791 if (exp && DECL_P (exp))
19793 type = TREE_TYPE (exp);
19794 decl = exp;
19796 else
19798 type = exp;
19799 decl = NULL;
19802 /* Don't do dynamic stack realignment for long long objects with
19803 -mpreferred-stack-boundary=2. */
19804 if (!TARGET_64BIT
19805 && align == 64
19806 && ix86_preferred_stack_boundary < 64
19807 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19808 && (!type || !TYPE_USER_ALIGN (type))
19809 && (!decl || !DECL_USER_ALIGN (decl)))
19810 align = 32;
19812 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19813 register in MODE. We will return the largest alignment of XF
19814 and DF. */
19815 if (!type)
19817 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19818 align = GET_MODE_ALIGNMENT (DFmode);
19819 return align;
19822 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19823 to 16byte boundary. */
19824 if (TARGET_64BIT)
19826 if (AGGREGATE_TYPE_P (type)
19827 && TYPE_SIZE (type)
19828 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19829 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19830 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19831 return 128;
19833 if (TREE_CODE (type) == ARRAY_TYPE)
19835 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19836 return 64;
19837 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19838 return 128;
19840 else if (TREE_CODE (type) == COMPLEX_TYPE)
19842 if (TYPE_MODE (type) == DCmode && align < 64)
19843 return 64;
19844 if ((TYPE_MODE (type) == XCmode
19845 || TYPE_MODE (type) == TCmode) && align < 128)
19846 return 128;
19848 else if ((TREE_CODE (type) == RECORD_TYPE
19849 || TREE_CODE (type) == UNION_TYPE
19850 || TREE_CODE (type) == QUAL_UNION_TYPE)
19851 && TYPE_FIELDS (type))
19853 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19854 return 64;
19855 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19856 return 128;
19858 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19859 || TREE_CODE (type) == INTEGER_TYPE)
19862 if (TYPE_MODE (type) == DFmode && align < 64)
19863 return 64;
19864 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19865 return 128;
19867 return align;
19870 /* Emit RTL insns to initialize the variable parts of a trampoline.
19871 FNADDR is an RTX for the address of the function's pure code.
19872 CXT is an RTX for the static chain value for the function. */
19873 void
19874 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19876 if (!TARGET_64BIT)
19878 /* Compute offset from the end of the jmp to the target function. */
19879 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19880 plus_constant (tramp, 10),
19881 NULL_RTX, 1, OPTAB_DIRECT);
19882 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19883 gen_int_mode (0xb9, QImode));
19884 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19885 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19886 gen_int_mode (0xe9, QImode));
19887 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19889 else
19891 int offset = 0;
19892 /* Try to load address using shorter movl instead of movabs.
19893 We may want to support movq for kernel mode, but kernel does not use
19894 trampolines at the moment. */
19895 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19897 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19898 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19899 gen_int_mode (0xbb41, HImode));
19900 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19901 gen_lowpart (SImode, fnaddr));
19902 offset += 6;
19904 else
19906 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19907 gen_int_mode (0xbb49, HImode));
19908 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19909 fnaddr);
19910 offset += 10;
19912 /* Load static chain using movabs to r10. */
19913 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19914 gen_int_mode (0xba49, HImode));
19915 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19916 cxt);
19917 offset += 10;
19918 /* Jump to the r11 */
19919 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19920 gen_int_mode (0xff49, HImode));
19921 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19922 gen_int_mode (0xe3, QImode));
19923 offset += 3;
19924 gcc_assert (offset <= TRAMPOLINE_SIZE);
19927 #ifdef ENABLE_EXECUTE_STACK
19928 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19929 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19930 #endif
19933 /* Codes for all the SSE/MMX builtins. */
19934 enum ix86_builtins
19936 IX86_BUILTIN_ADDPS,
19937 IX86_BUILTIN_ADDSS,
19938 IX86_BUILTIN_DIVPS,
19939 IX86_BUILTIN_DIVSS,
19940 IX86_BUILTIN_MULPS,
19941 IX86_BUILTIN_MULSS,
19942 IX86_BUILTIN_SUBPS,
19943 IX86_BUILTIN_SUBSS,
19945 IX86_BUILTIN_CMPEQPS,
19946 IX86_BUILTIN_CMPLTPS,
19947 IX86_BUILTIN_CMPLEPS,
19948 IX86_BUILTIN_CMPGTPS,
19949 IX86_BUILTIN_CMPGEPS,
19950 IX86_BUILTIN_CMPNEQPS,
19951 IX86_BUILTIN_CMPNLTPS,
19952 IX86_BUILTIN_CMPNLEPS,
19953 IX86_BUILTIN_CMPNGTPS,
19954 IX86_BUILTIN_CMPNGEPS,
19955 IX86_BUILTIN_CMPORDPS,
19956 IX86_BUILTIN_CMPUNORDPS,
19957 IX86_BUILTIN_CMPEQSS,
19958 IX86_BUILTIN_CMPLTSS,
19959 IX86_BUILTIN_CMPLESS,
19960 IX86_BUILTIN_CMPNEQSS,
19961 IX86_BUILTIN_CMPNLTSS,
19962 IX86_BUILTIN_CMPNLESS,
19963 IX86_BUILTIN_CMPNGTSS,
19964 IX86_BUILTIN_CMPNGESS,
19965 IX86_BUILTIN_CMPORDSS,
19966 IX86_BUILTIN_CMPUNORDSS,
19968 IX86_BUILTIN_COMIEQSS,
19969 IX86_BUILTIN_COMILTSS,
19970 IX86_BUILTIN_COMILESS,
19971 IX86_BUILTIN_COMIGTSS,
19972 IX86_BUILTIN_COMIGESS,
19973 IX86_BUILTIN_COMINEQSS,
19974 IX86_BUILTIN_UCOMIEQSS,
19975 IX86_BUILTIN_UCOMILTSS,
19976 IX86_BUILTIN_UCOMILESS,
19977 IX86_BUILTIN_UCOMIGTSS,
19978 IX86_BUILTIN_UCOMIGESS,
19979 IX86_BUILTIN_UCOMINEQSS,
19981 IX86_BUILTIN_CVTPI2PS,
19982 IX86_BUILTIN_CVTPS2PI,
19983 IX86_BUILTIN_CVTSI2SS,
19984 IX86_BUILTIN_CVTSI642SS,
19985 IX86_BUILTIN_CVTSS2SI,
19986 IX86_BUILTIN_CVTSS2SI64,
19987 IX86_BUILTIN_CVTTPS2PI,
19988 IX86_BUILTIN_CVTTSS2SI,
19989 IX86_BUILTIN_CVTTSS2SI64,
19991 IX86_BUILTIN_MAXPS,
19992 IX86_BUILTIN_MAXSS,
19993 IX86_BUILTIN_MINPS,
19994 IX86_BUILTIN_MINSS,
19996 IX86_BUILTIN_LOADUPS,
19997 IX86_BUILTIN_STOREUPS,
19998 IX86_BUILTIN_MOVSS,
20000 IX86_BUILTIN_MOVHLPS,
20001 IX86_BUILTIN_MOVLHPS,
20002 IX86_BUILTIN_LOADHPS,
20003 IX86_BUILTIN_LOADLPS,
20004 IX86_BUILTIN_STOREHPS,
20005 IX86_BUILTIN_STORELPS,
20007 IX86_BUILTIN_MASKMOVQ,
20008 IX86_BUILTIN_MOVMSKPS,
20009 IX86_BUILTIN_PMOVMSKB,
20011 IX86_BUILTIN_MOVNTPS,
20012 IX86_BUILTIN_MOVNTQ,
20014 IX86_BUILTIN_LOADDQU,
20015 IX86_BUILTIN_STOREDQU,
20017 IX86_BUILTIN_PACKSSWB,
20018 IX86_BUILTIN_PACKSSDW,
20019 IX86_BUILTIN_PACKUSWB,
20021 IX86_BUILTIN_PADDB,
20022 IX86_BUILTIN_PADDW,
20023 IX86_BUILTIN_PADDD,
20024 IX86_BUILTIN_PADDQ,
20025 IX86_BUILTIN_PADDSB,
20026 IX86_BUILTIN_PADDSW,
20027 IX86_BUILTIN_PADDUSB,
20028 IX86_BUILTIN_PADDUSW,
20029 IX86_BUILTIN_PSUBB,
20030 IX86_BUILTIN_PSUBW,
20031 IX86_BUILTIN_PSUBD,
20032 IX86_BUILTIN_PSUBQ,
20033 IX86_BUILTIN_PSUBSB,
20034 IX86_BUILTIN_PSUBSW,
20035 IX86_BUILTIN_PSUBUSB,
20036 IX86_BUILTIN_PSUBUSW,
20038 IX86_BUILTIN_PAND,
20039 IX86_BUILTIN_PANDN,
20040 IX86_BUILTIN_POR,
20041 IX86_BUILTIN_PXOR,
20043 IX86_BUILTIN_PAVGB,
20044 IX86_BUILTIN_PAVGW,
20046 IX86_BUILTIN_PCMPEQB,
20047 IX86_BUILTIN_PCMPEQW,
20048 IX86_BUILTIN_PCMPEQD,
20049 IX86_BUILTIN_PCMPGTB,
20050 IX86_BUILTIN_PCMPGTW,
20051 IX86_BUILTIN_PCMPGTD,
20053 IX86_BUILTIN_PMADDWD,
20055 IX86_BUILTIN_PMAXSW,
20056 IX86_BUILTIN_PMAXUB,
20057 IX86_BUILTIN_PMINSW,
20058 IX86_BUILTIN_PMINUB,
20060 IX86_BUILTIN_PMULHUW,
20061 IX86_BUILTIN_PMULHW,
20062 IX86_BUILTIN_PMULLW,
20064 IX86_BUILTIN_PSADBW,
20065 IX86_BUILTIN_PSHUFW,
20067 IX86_BUILTIN_PSLLW,
20068 IX86_BUILTIN_PSLLD,
20069 IX86_BUILTIN_PSLLQ,
20070 IX86_BUILTIN_PSRAW,
20071 IX86_BUILTIN_PSRAD,
20072 IX86_BUILTIN_PSRLW,
20073 IX86_BUILTIN_PSRLD,
20074 IX86_BUILTIN_PSRLQ,
20075 IX86_BUILTIN_PSLLWI,
20076 IX86_BUILTIN_PSLLDI,
20077 IX86_BUILTIN_PSLLQI,
20078 IX86_BUILTIN_PSRAWI,
20079 IX86_BUILTIN_PSRADI,
20080 IX86_BUILTIN_PSRLWI,
20081 IX86_BUILTIN_PSRLDI,
20082 IX86_BUILTIN_PSRLQI,
20084 IX86_BUILTIN_PUNPCKHBW,
20085 IX86_BUILTIN_PUNPCKHWD,
20086 IX86_BUILTIN_PUNPCKHDQ,
20087 IX86_BUILTIN_PUNPCKLBW,
20088 IX86_BUILTIN_PUNPCKLWD,
20089 IX86_BUILTIN_PUNPCKLDQ,
20091 IX86_BUILTIN_SHUFPS,
20093 IX86_BUILTIN_RCPPS,
20094 IX86_BUILTIN_RCPSS,
20095 IX86_BUILTIN_RSQRTPS,
20096 IX86_BUILTIN_RSQRTPS_NR,
20097 IX86_BUILTIN_RSQRTSS,
20098 IX86_BUILTIN_RSQRTF,
20099 IX86_BUILTIN_SQRTPS,
20100 IX86_BUILTIN_SQRTPS_NR,
20101 IX86_BUILTIN_SQRTSS,
20103 IX86_BUILTIN_UNPCKHPS,
20104 IX86_BUILTIN_UNPCKLPS,
20106 IX86_BUILTIN_ANDPS,
20107 IX86_BUILTIN_ANDNPS,
20108 IX86_BUILTIN_ORPS,
20109 IX86_BUILTIN_XORPS,
20111 IX86_BUILTIN_EMMS,
20112 IX86_BUILTIN_LDMXCSR,
20113 IX86_BUILTIN_STMXCSR,
20114 IX86_BUILTIN_SFENCE,
20116 /* 3DNow! Original */
20117 IX86_BUILTIN_FEMMS,
20118 IX86_BUILTIN_PAVGUSB,
20119 IX86_BUILTIN_PF2ID,
20120 IX86_BUILTIN_PFACC,
20121 IX86_BUILTIN_PFADD,
20122 IX86_BUILTIN_PFCMPEQ,
20123 IX86_BUILTIN_PFCMPGE,
20124 IX86_BUILTIN_PFCMPGT,
20125 IX86_BUILTIN_PFMAX,
20126 IX86_BUILTIN_PFMIN,
20127 IX86_BUILTIN_PFMUL,
20128 IX86_BUILTIN_PFRCP,
20129 IX86_BUILTIN_PFRCPIT1,
20130 IX86_BUILTIN_PFRCPIT2,
20131 IX86_BUILTIN_PFRSQIT1,
20132 IX86_BUILTIN_PFRSQRT,
20133 IX86_BUILTIN_PFSUB,
20134 IX86_BUILTIN_PFSUBR,
20135 IX86_BUILTIN_PI2FD,
20136 IX86_BUILTIN_PMULHRW,
20138 /* 3DNow! Athlon Extensions */
20139 IX86_BUILTIN_PF2IW,
20140 IX86_BUILTIN_PFNACC,
20141 IX86_BUILTIN_PFPNACC,
20142 IX86_BUILTIN_PI2FW,
20143 IX86_BUILTIN_PSWAPDSI,
20144 IX86_BUILTIN_PSWAPDSF,
20146 /* SSE2 */
20147 IX86_BUILTIN_ADDPD,
20148 IX86_BUILTIN_ADDSD,
20149 IX86_BUILTIN_DIVPD,
20150 IX86_BUILTIN_DIVSD,
20151 IX86_BUILTIN_MULPD,
20152 IX86_BUILTIN_MULSD,
20153 IX86_BUILTIN_SUBPD,
20154 IX86_BUILTIN_SUBSD,
20156 IX86_BUILTIN_CMPEQPD,
20157 IX86_BUILTIN_CMPLTPD,
20158 IX86_BUILTIN_CMPLEPD,
20159 IX86_BUILTIN_CMPGTPD,
20160 IX86_BUILTIN_CMPGEPD,
20161 IX86_BUILTIN_CMPNEQPD,
20162 IX86_BUILTIN_CMPNLTPD,
20163 IX86_BUILTIN_CMPNLEPD,
20164 IX86_BUILTIN_CMPNGTPD,
20165 IX86_BUILTIN_CMPNGEPD,
20166 IX86_BUILTIN_CMPORDPD,
20167 IX86_BUILTIN_CMPUNORDPD,
20168 IX86_BUILTIN_CMPEQSD,
20169 IX86_BUILTIN_CMPLTSD,
20170 IX86_BUILTIN_CMPLESD,
20171 IX86_BUILTIN_CMPNEQSD,
20172 IX86_BUILTIN_CMPNLTSD,
20173 IX86_BUILTIN_CMPNLESD,
20174 IX86_BUILTIN_CMPORDSD,
20175 IX86_BUILTIN_CMPUNORDSD,
20177 IX86_BUILTIN_COMIEQSD,
20178 IX86_BUILTIN_COMILTSD,
20179 IX86_BUILTIN_COMILESD,
20180 IX86_BUILTIN_COMIGTSD,
20181 IX86_BUILTIN_COMIGESD,
20182 IX86_BUILTIN_COMINEQSD,
20183 IX86_BUILTIN_UCOMIEQSD,
20184 IX86_BUILTIN_UCOMILTSD,
20185 IX86_BUILTIN_UCOMILESD,
20186 IX86_BUILTIN_UCOMIGTSD,
20187 IX86_BUILTIN_UCOMIGESD,
20188 IX86_BUILTIN_UCOMINEQSD,
20190 IX86_BUILTIN_MAXPD,
20191 IX86_BUILTIN_MAXSD,
20192 IX86_BUILTIN_MINPD,
20193 IX86_BUILTIN_MINSD,
20195 IX86_BUILTIN_ANDPD,
20196 IX86_BUILTIN_ANDNPD,
20197 IX86_BUILTIN_ORPD,
20198 IX86_BUILTIN_XORPD,
20200 IX86_BUILTIN_SQRTPD,
20201 IX86_BUILTIN_SQRTSD,
20203 IX86_BUILTIN_UNPCKHPD,
20204 IX86_BUILTIN_UNPCKLPD,
20206 IX86_BUILTIN_SHUFPD,
20208 IX86_BUILTIN_LOADUPD,
20209 IX86_BUILTIN_STOREUPD,
20210 IX86_BUILTIN_MOVSD,
20212 IX86_BUILTIN_LOADHPD,
20213 IX86_BUILTIN_LOADLPD,
20215 IX86_BUILTIN_CVTDQ2PD,
20216 IX86_BUILTIN_CVTDQ2PS,
20218 IX86_BUILTIN_CVTPD2DQ,
20219 IX86_BUILTIN_CVTPD2PI,
20220 IX86_BUILTIN_CVTPD2PS,
20221 IX86_BUILTIN_CVTTPD2DQ,
20222 IX86_BUILTIN_CVTTPD2PI,
20224 IX86_BUILTIN_CVTPI2PD,
20225 IX86_BUILTIN_CVTSI2SD,
20226 IX86_BUILTIN_CVTSI642SD,
20228 IX86_BUILTIN_CVTSD2SI,
20229 IX86_BUILTIN_CVTSD2SI64,
20230 IX86_BUILTIN_CVTSD2SS,
20231 IX86_BUILTIN_CVTSS2SD,
20232 IX86_BUILTIN_CVTTSD2SI,
20233 IX86_BUILTIN_CVTTSD2SI64,
20235 IX86_BUILTIN_CVTPS2DQ,
20236 IX86_BUILTIN_CVTPS2PD,
20237 IX86_BUILTIN_CVTTPS2DQ,
20239 IX86_BUILTIN_MOVNTI,
20240 IX86_BUILTIN_MOVNTPD,
20241 IX86_BUILTIN_MOVNTDQ,
20243 IX86_BUILTIN_MOVQ128,
20245 /* SSE2 MMX */
20246 IX86_BUILTIN_MASKMOVDQU,
20247 IX86_BUILTIN_MOVMSKPD,
20248 IX86_BUILTIN_PMOVMSKB128,
20250 IX86_BUILTIN_PACKSSWB128,
20251 IX86_BUILTIN_PACKSSDW128,
20252 IX86_BUILTIN_PACKUSWB128,
20254 IX86_BUILTIN_PADDB128,
20255 IX86_BUILTIN_PADDW128,
20256 IX86_BUILTIN_PADDD128,
20257 IX86_BUILTIN_PADDQ128,
20258 IX86_BUILTIN_PADDSB128,
20259 IX86_BUILTIN_PADDSW128,
20260 IX86_BUILTIN_PADDUSB128,
20261 IX86_BUILTIN_PADDUSW128,
20262 IX86_BUILTIN_PSUBB128,
20263 IX86_BUILTIN_PSUBW128,
20264 IX86_BUILTIN_PSUBD128,
20265 IX86_BUILTIN_PSUBQ128,
20266 IX86_BUILTIN_PSUBSB128,
20267 IX86_BUILTIN_PSUBSW128,
20268 IX86_BUILTIN_PSUBUSB128,
20269 IX86_BUILTIN_PSUBUSW128,
20271 IX86_BUILTIN_PAND128,
20272 IX86_BUILTIN_PANDN128,
20273 IX86_BUILTIN_POR128,
20274 IX86_BUILTIN_PXOR128,
20276 IX86_BUILTIN_PAVGB128,
20277 IX86_BUILTIN_PAVGW128,
20279 IX86_BUILTIN_PCMPEQB128,
20280 IX86_BUILTIN_PCMPEQW128,
20281 IX86_BUILTIN_PCMPEQD128,
20282 IX86_BUILTIN_PCMPGTB128,
20283 IX86_BUILTIN_PCMPGTW128,
20284 IX86_BUILTIN_PCMPGTD128,
20286 IX86_BUILTIN_PMADDWD128,
20288 IX86_BUILTIN_PMAXSW128,
20289 IX86_BUILTIN_PMAXUB128,
20290 IX86_BUILTIN_PMINSW128,
20291 IX86_BUILTIN_PMINUB128,
20293 IX86_BUILTIN_PMULUDQ,
20294 IX86_BUILTIN_PMULUDQ128,
20295 IX86_BUILTIN_PMULHUW128,
20296 IX86_BUILTIN_PMULHW128,
20297 IX86_BUILTIN_PMULLW128,
20299 IX86_BUILTIN_PSADBW128,
20300 IX86_BUILTIN_PSHUFHW,
20301 IX86_BUILTIN_PSHUFLW,
20302 IX86_BUILTIN_PSHUFD,
20304 IX86_BUILTIN_PSLLDQI128,
20305 IX86_BUILTIN_PSLLWI128,
20306 IX86_BUILTIN_PSLLDI128,
20307 IX86_BUILTIN_PSLLQI128,
20308 IX86_BUILTIN_PSRAWI128,
20309 IX86_BUILTIN_PSRADI128,
20310 IX86_BUILTIN_PSRLDQI128,
20311 IX86_BUILTIN_PSRLWI128,
20312 IX86_BUILTIN_PSRLDI128,
20313 IX86_BUILTIN_PSRLQI128,
20315 IX86_BUILTIN_PSLLDQ128,
20316 IX86_BUILTIN_PSLLW128,
20317 IX86_BUILTIN_PSLLD128,
20318 IX86_BUILTIN_PSLLQ128,
20319 IX86_BUILTIN_PSRAW128,
20320 IX86_BUILTIN_PSRAD128,
20321 IX86_BUILTIN_PSRLW128,
20322 IX86_BUILTIN_PSRLD128,
20323 IX86_BUILTIN_PSRLQ128,
20325 IX86_BUILTIN_PUNPCKHBW128,
20326 IX86_BUILTIN_PUNPCKHWD128,
20327 IX86_BUILTIN_PUNPCKHDQ128,
20328 IX86_BUILTIN_PUNPCKHQDQ128,
20329 IX86_BUILTIN_PUNPCKLBW128,
20330 IX86_BUILTIN_PUNPCKLWD128,
20331 IX86_BUILTIN_PUNPCKLDQ128,
20332 IX86_BUILTIN_PUNPCKLQDQ128,
20334 IX86_BUILTIN_CLFLUSH,
20335 IX86_BUILTIN_MFENCE,
20336 IX86_BUILTIN_LFENCE,
20338 /* SSE3. */
20339 IX86_BUILTIN_ADDSUBPS,
20340 IX86_BUILTIN_HADDPS,
20341 IX86_BUILTIN_HSUBPS,
20342 IX86_BUILTIN_MOVSHDUP,
20343 IX86_BUILTIN_MOVSLDUP,
20344 IX86_BUILTIN_ADDSUBPD,
20345 IX86_BUILTIN_HADDPD,
20346 IX86_BUILTIN_HSUBPD,
20347 IX86_BUILTIN_LDDQU,
20349 IX86_BUILTIN_MONITOR,
20350 IX86_BUILTIN_MWAIT,
20352 /* SSSE3. */
20353 IX86_BUILTIN_PHADDW,
20354 IX86_BUILTIN_PHADDD,
20355 IX86_BUILTIN_PHADDSW,
20356 IX86_BUILTIN_PHSUBW,
20357 IX86_BUILTIN_PHSUBD,
20358 IX86_BUILTIN_PHSUBSW,
20359 IX86_BUILTIN_PMADDUBSW,
20360 IX86_BUILTIN_PMULHRSW,
20361 IX86_BUILTIN_PSHUFB,
20362 IX86_BUILTIN_PSIGNB,
20363 IX86_BUILTIN_PSIGNW,
20364 IX86_BUILTIN_PSIGND,
20365 IX86_BUILTIN_PALIGNR,
20366 IX86_BUILTIN_PABSB,
20367 IX86_BUILTIN_PABSW,
20368 IX86_BUILTIN_PABSD,
20370 IX86_BUILTIN_PHADDW128,
20371 IX86_BUILTIN_PHADDD128,
20372 IX86_BUILTIN_PHADDSW128,
20373 IX86_BUILTIN_PHSUBW128,
20374 IX86_BUILTIN_PHSUBD128,
20375 IX86_BUILTIN_PHSUBSW128,
20376 IX86_BUILTIN_PMADDUBSW128,
20377 IX86_BUILTIN_PMULHRSW128,
20378 IX86_BUILTIN_PSHUFB128,
20379 IX86_BUILTIN_PSIGNB128,
20380 IX86_BUILTIN_PSIGNW128,
20381 IX86_BUILTIN_PSIGND128,
20382 IX86_BUILTIN_PALIGNR128,
20383 IX86_BUILTIN_PABSB128,
20384 IX86_BUILTIN_PABSW128,
20385 IX86_BUILTIN_PABSD128,
20387 /* AMDFAM10 - SSE4A New Instructions. */
20388 IX86_BUILTIN_MOVNTSD,
20389 IX86_BUILTIN_MOVNTSS,
20390 IX86_BUILTIN_EXTRQI,
20391 IX86_BUILTIN_EXTRQ,
20392 IX86_BUILTIN_INSERTQI,
20393 IX86_BUILTIN_INSERTQ,
20395 /* SSE4.1. */
20396 IX86_BUILTIN_BLENDPD,
20397 IX86_BUILTIN_BLENDPS,
20398 IX86_BUILTIN_BLENDVPD,
20399 IX86_BUILTIN_BLENDVPS,
20400 IX86_BUILTIN_PBLENDVB128,
20401 IX86_BUILTIN_PBLENDW128,
20403 IX86_BUILTIN_DPPD,
20404 IX86_BUILTIN_DPPS,
20406 IX86_BUILTIN_INSERTPS128,
20408 IX86_BUILTIN_MOVNTDQA,
20409 IX86_BUILTIN_MPSADBW128,
20410 IX86_BUILTIN_PACKUSDW128,
20411 IX86_BUILTIN_PCMPEQQ,
20412 IX86_BUILTIN_PHMINPOSUW128,
20414 IX86_BUILTIN_PMAXSB128,
20415 IX86_BUILTIN_PMAXSD128,
20416 IX86_BUILTIN_PMAXUD128,
20417 IX86_BUILTIN_PMAXUW128,
20419 IX86_BUILTIN_PMINSB128,
20420 IX86_BUILTIN_PMINSD128,
20421 IX86_BUILTIN_PMINUD128,
20422 IX86_BUILTIN_PMINUW128,
20424 IX86_BUILTIN_PMOVSXBW128,
20425 IX86_BUILTIN_PMOVSXBD128,
20426 IX86_BUILTIN_PMOVSXBQ128,
20427 IX86_BUILTIN_PMOVSXWD128,
20428 IX86_BUILTIN_PMOVSXWQ128,
20429 IX86_BUILTIN_PMOVSXDQ128,
20431 IX86_BUILTIN_PMOVZXBW128,
20432 IX86_BUILTIN_PMOVZXBD128,
20433 IX86_BUILTIN_PMOVZXBQ128,
20434 IX86_BUILTIN_PMOVZXWD128,
20435 IX86_BUILTIN_PMOVZXWQ128,
20436 IX86_BUILTIN_PMOVZXDQ128,
20438 IX86_BUILTIN_PMULDQ128,
20439 IX86_BUILTIN_PMULLD128,
20441 IX86_BUILTIN_ROUNDPD,
20442 IX86_BUILTIN_ROUNDPS,
20443 IX86_BUILTIN_ROUNDSD,
20444 IX86_BUILTIN_ROUNDSS,
20446 IX86_BUILTIN_PTESTZ,
20447 IX86_BUILTIN_PTESTC,
20448 IX86_BUILTIN_PTESTNZC,
20450 IX86_BUILTIN_VEC_INIT_V2SI,
20451 IX86_BUILTIN_VEC_INIT_V4HI,
20452 IX86_BUILTIN_VEC_INIT_V8QI,
20453 IX86_BUILTIN_VEC_EXT_V2DF,
20454 IX86_BUILTIN_VEC_EXT_V2DI,
20455 IX86_BUILTIN_VEC_EXT_V4SF,
20456 IX86_BUILTIN_VEC_EXT_V4SI,
20457 IX86_BUILTIN_VEC_EXT_V8HI,
20458 IX86_BUILTIN_VEC_EXT_V2SI,
20459 IX86_BUILTIN_VEC_EXT_V4HI,
20460 IX86_BUILTIN_VEC_EXT_V16QI,
20461 IX86_BUILTIN_VEC_SET_V2DI,
20462 IX86_BUILTIN_VEC_SET_V4SF,
20463 IX86_BUILTIN_VEC_SET_V4SI,
20464 IX86_BUILTIN_VEC_SET_V8HI,
20465 IX86_BUILTIN_VEC_SET_V4HI,
20466 IX86_BUILTIN_VEC_SET_V16QI,
20468 IX86_BUILTIN_VEC_PACK_SFIX,
20470 /* SSE4.2. */
20471 IX86_BUILTIN_CRC32QI,
20472 IX86_BUILTIN_CRC32HI,
20473 IX86_BUILTIN_CRC32SI,
20474 IX86_BUILTIN_CRC32DI,
20476 IX86_BUILTIN_PCMPESTRI128,
20477 IX86_BUILTIN_PCMPESTRM128,
20478 IX86_BUILTIN_PCMPESTRA128,
20479 IX86_BUILTIN_PCMPESTRC128,
20480 IX86_BUILTIN_PCMPESTRO128,
20481 IX86_BUILTIN_PCMPESTRS128,
20482 IX86_BUILTIN_PCMPESTRZ128,
20483 IX86_BUILTIN_PCMPISTRI128,
20484 IX86_BUILTIN_PCMPISTRM128,
20485 IX86_BUILTIN_PCMPISTRA128,
20486 IX86_BUILTIN_PCMPISTRC128,
20487 IX86_BUILTIN_PCMPISTRO128,
20488 IX86_BUILTIN_PCMPISTRS128,
20489 IX86_BUILTIN_PCMPISTRZ128,
20491 IX86_BUILTIN_PCMPGTQ,
20493 /* AES instructions */
20494 IX86_BUILTIN_AESENC128,
20495 IX86_BUILTIN_AESENCLAST128,
20496 IX86_BUILTIN_AESDEC128,
20497 IX86_BUILTIN_AESDECLAST128,
20498 IX86_BUILTIN_AESIMC128,
20499 IX86_BUILTIN_AESKEYGENASSIST128,
20501 /* PCLMUL instruction */
20502 IX86_BUILTIN_PCLMULQDQ128,
20504 /* AVX */
20505 IX86_BUILTIN_ADDPD256,
20506 IX86_BUILTIN_ADDPS256,
20507 IX86_BUILTIN_ADDSUBPD256,
20508 IX86_BUILTIN_ADDSUBPS256,
20509 IX86_BUILTIN_ANDPD256,
20510 IX86_BUILTIN_ANDPS256,
20511 IX86_BUILTIN_ANDNPD256,
20512 IX86_BUILTIN_ANDNPS256,
20513 IX86_BUILTIN_BLENDPD256,
20514 IX86_BUILTIN_BLENDPS256,
20515 IX86_BUILTIN_BLENDVPD256,
20516 IX86_BUILTIN_BLENDVPS256,
20517 IX86_BUILTIN_DIVPD256,
20518 IX86_BUILTIN_DIVPS256,
20519 IX86_BUILTIN_DPPS256,
20520 IX86_BUILTIN_HADDPD256,
20521 IX86_BUILTIN_HADDPS256,
20522 IX86_BUILTIN_HSUBPD256,
20523 IX86_BUILTIN_HSUBPS256,
20524 IX86_BUILTIN_MAXPD256,
20525 IX86_BUILTIN_MAXPS256,
20526 IX86_BUILTIN_MINPD256,
20527 IX86_BUILTIN_MINPS256,
20528 IX86_BUILTIN_MULPD256,
20529 IX86_BUILTIN_MULPS256,
20530 IX86_BUILTIN_ORPD256,
20531 IX86_BUILTIN_ORPS256,
20532 IX86_BUILTIN_SHUFPD256,
20533 IX86_BUILTIN_SHUFPS256,
20534 IX86_BUILTIN_SUBPD256,
20535 IX86_BUILTIN_SUBPS256,
20536 IX86_BUILTIN_XORPD256,
20537 IX86_BUILTIN_XORPS256,
20538 IX86_BUILTIN_CMPSD,
20539 IX86_BUILTIN_CMPSS,
20540 IX86_BUILTIN_CMPPD,
20541 IX86_BUILTIN_CMPPS,
20542 IX86_BUILTIN_CMPPD256,
20543 IX86_BUILTIN_CMPPS256,
20544 IX86_BUILTIN_CVTDQ2PD256,
20545 IX86_BUILTIN_CVTDQ2PS256,
20546 IX86_BUILTIN_CVTPD2PS256,
20547 IX86_BUILTIN_CVTPS2DQ256,
20548 IX86_BUILTIN_CVTPS2PD256,
20549 IX86_BUILTIN_CVTTPD2DQ256,
20550 IX86_BUILTIN_CVTPD2DQ256,
20551 IX86_BUILTIN_CVTTPS2DQ256,
20552 IX86_BUILTIN_EXTRACTF128PD256,
20553 IX86_BUILTIN_EXTRACTF128PS256,
20554 IX86_BUILTIN_EXTRACTF128SI256,
20555 IX86_BUILTIN_VZEROALL,
20556 IX86_BUILTIN_VZEROUPPER,
20557 IX86_BUILTIN_VZEROUPPER_REX64,
20558 IX86_BUILTIN_VPERMILVARPD,
20559 IX86_BUILTIN_VPERMILVARPS,
20560 IX86_BUILTIN_VPERMILVARPD256,
20561 IX86_BUILTIN_VPERMILVARPS256,
20562 IX86_BUILTIN_VPERMILPD,
20563 IX86_BUILTIN_VPERMILPS,
20564 IX86_BUILTIN_VPERMILPD256,
20565 IX86_BUILTIN_VPERMILPS256,
20566 IX86_BUILTIN_VPERM2F128PD256,
20567 IX86_BUILTIN_VPERM2F128PS256,
20568 IX86_BUILTIN_VPERM2F128SI256,
20569 IX86_BUILTIN_VBROADCASTSS,
20570 IX86_BUILTIN_VBROADCASTSD256,
20571 IX86_BUILTIN_VBROADCASTSS256,
20572 IX86_BUILTIN_VBROADCASTPD256,
20573 IX86_BUILTIN_VBROADCASTPS256,
20574 IX86_BUILTIN_VINSERTF128PD256,
20575 IX86_BUILTIN_VINSERTF128PS256,
20576 IX86_BUILTIN_VINSERTF128SI256,
20577 IX86_BUILTIN_LOADUPD256,
20578 IX86_BUILTIN_LOADUPS256,
20579 IX86_BUILTIN_STOREUPD256,
20580 IX86_BUILTIN_STOREUPS256,
20581 IX86_BUILTIN_LDDQU256,
20582 IX86_BUILTIN_MOVNTDQ256,
20583 IX86_BUILTIN_MOVNTPD256,
20584 IX86_BUILTIN_MOVNTPS256,
20585 IX86_BUILTIN_LOADDQU256,
20586 IX86_BUILTIN_STOREDQU256,
20587 IX86_BUILTIN_MASKLOADPD,
20588 IX86_BUILTIN_MASKLOADPS,
20589 IX86_BUILTIN_MASKSTOREPD,
20590 IX86_BUILTIN_MASKSTOREPS,
20591 IX86_BUILTIN_MASKLOADPD256,
20592 IX86_BUILTIN_MASKLOADPS256,
20593 IX86_BUILTIN_MASKSTOREPD256,
20594 IX86_BUILTIN_MASKSTOREPS256,
20595 IX86_BUILTIN_MOVSHDUP256,
20596 IX86_BUILTIN_MOVSLDUP256,
20597 IX86_BUILTIN_MOVDDUP256,
20599 IX86_BUILTIN_SQRTPD256,
20600 IX86_BUILTIN_SQRTPS256,
20601 IX86_BUILTIN_SQRTPS_NR256,
20602 IX86_BUILTIN_RSQRTPS256,
20603 IX86_BUILTIN_RSQRTPS_NR256,
20605 IX86_BUILTIN_RCPPS256,
20607 IX86_BUILTIN_ROUNDPD256,
20608 IX86_BUILTIN_ROUNDPS256,
20610 IX86_BUILTIN_UNPCKHPD256,
20611 IX86_BUILTIN_UNPCKLPD256,
20612 IX86_BUILTIN_UNPCKHPS256,
20613 IX86_BUILTIN_UNPCKLPS256,
20615 IX86_BUILTIN_SI256_SI,
20616 IX86_BUILTIN_PS256_PS,
20617 IX86_BUILTIN_PD256_PD,
20618 IX86_BUILTIN_SI_SI256,
20619 IX86_BUILTIN_PS_PS256,
20620 IX86_BUILTIN_PD_PD256,
20622 IX86_BUILTIN_VTESTZPD,
20623 IX86_BUILTIN_VTESTCPD,
20624 IX86_BUILTIN_VTESTNZCPD,
20625 IX86_BUILTIN_VTESTZPS,
20626 IX86_BUILTIN_VTESTCPS,
20627 IX86_BUILTIN_VTESTNZCPS,
20628 IX86_BUILTIN_VTESTZPD256,
20629 IX86_BUILTIN_VTESTCPD256,
20630 IX86_BUILTIN_VTESTNZCPD256,
20631 IX86_BUILTIN_VTESTZPS256,
20632 IX86_BUILTIN_VTESTCPS256,
20633 IX86_BUILTIN_VTESTNZCPS256,
20634 IX86_BUILTIN_PTESTZ256,
20635 IX86_BUILTIN_PTESTC256,
20636 IX86_BUILTIN_PTESTNZC256,
20638 IX86_BUILTIN_MOVMSKPD256,
20639 IX86_BUILTIN_MOVMSKPS256,
20641 /* TFmode support builtins. */
20642 IX86_BUILTIN_INFQ,
20643 IX86_BUILTIN_HUGE_VALQ,
20644 IX86_BUILTIN_FABSQ,
20645 IX86_BUILTIN_COPYSIGNQ,
20647 /* SSE5 instructions */
20648 IX86_BUILTIN_FMADDSS,
20649 IX86_BUILTIN_FMADDSD,
20650 IX86_BUILTIN_FMADDPS,
20651 IX86_BUILTIN_FMADDPD,
20652 IX86_BUILTIN_FMSUBSS,
20653 IX86_BUILTIN_FMSUBSD,
20654 IX86_BUILTIN_FMSUBPS,
20655 IX86_BUILTIN_FMSUBPD,
20656 IX86_BUILTIN_FNMADDSS,
20657 IX86_BUILTIN_FNMADDSD,
20658 IX86_BUILTIN_FNMADDPS,
20659 IX86_BUILTIN_FNMADDPD,
20660 IX86_BUILTIN_FNMSUBSS,
20661 IX86_BUILTIN_FNMSUBSD,
20662 IX86_BUILTIN_FNMSUBPS,
20663 IX86_BUILTIN_FNMSUBPD,
20664 IX86_BUILTIN_PCMOV,
20665 IX86_BUILTIN_PCMOV_V2DI,
20666 IX86_BUILTIN_PCMOV_V4SI,
20667 IX86_BUILTIN_PCMOV_V8HI,
20668 IX86_BUILTIN_PCMOV_V16QI,
20669 IX86_BUILTIN_PCMOV_V4SF,
20670 IX86_BUILTIN_PCMOV_V2DF,
20671 IX86_BUILTIN_PPERM,
20672 IX86_BUILTIN_PERMPS,
20673 IX86_BUILTIN_PERMPD,
20674 IX86_BUILTIN_PMACSSWW,
20675 IX86_BUILTIN_PMACSWW,
20676 IX86_BUILTIN_PMACSSWD,
20677 IX86_BUILTIN_PMACSWD,
20678 IX86_BUILTIN_PMACSSDD,
20679 IX86_BUILTIN_PMACSDD,
20680 IX86_BUILTIN_PMACSSDQL,
20681 IX86_BUILTIN_PMACSSDQH,
20682 IX86_BUILTIN_PMACSDQL,
20683 IX86_BUILTIN_PMACSDQH,
20684 IX86_BUILTIN_PMADCSSWD,
20685 IX86_BUILTIN_PMADCSWD,
20686 IX86_BUILTIN_PHADDBW,
20687 IX86_BUILTIN_PHADDBD,
20688 IX86_BUILTIN_PHADDBQ,
20689 IX86_BUILTIN_PHADDWD,
20690 IX86_BUILTIN_PHADDWQ,
20691 IX86_BUILTIN_PHADDDQ,
20692 IX86_BUILTIN_PHADDUBW,
20693 IX86_BUILTIN_PHADDUBD,
20694 IX86_BUILTIN_PHADDUBQ,
20695 IX86_BUILTIN_PHADDUWD,
20696 IX86_BUILTIN_PHADDUWQ,
20697 IX86_BUILTIN_PHADDUDQ,
20698 IX86_BUILTIN_PHSUBBW,
20699 IX86_BUILTIN_PHSUBWD,
20700 IX86_BUILTIN_PHSUBDQ,
20701 IX86_BUILTIN_PROTB,
20702 IX86_BUILTIN_PROTW,
20703 IX86_BUILTIN_PROTD,
20704 IX86_BUILTIN_PROTQ,
20705 IX86_BUILTIN_PROTB_IMM,
20706 IX86_BUILTIN_PROTW_IMM,
20707 IX86_BUILTIN_PROTD_IMM,
20708 IX86_BUILTIN_PROTQ_IMM,
20709 IX86_BUILTIN_PSHLB,
20710 IX86_BUILTIN_PSHLW,
20711 IX86_BUILTIN_PSHLD,
20712 IX86_BUILTIN_PSHLQ,
20713 IX86_BUILTIN_PSHAB,
20714 IX86_BUILTIN_PSHAW,
20715 IX86_BUILTIN_PSHAD,
20716 IX86_BUILTIN_PSHAQ,
20717 IX86_BUILTIN_FRCZSS,
20718 IX86_BUILTIN_FRCZSD,
20719 IX86_BUILTIN_FRCZPS,
20720 IX86_BUILTIN_FRCZPD,
20721 IX86_BUILTIN_CVTPH2PS,
20722 IX86_BUILTIN_CVTPS2PH,
20724 IX86_BUILTIN_COMEQSS,
20725 IX86_BUILTIN_COMNESS,
20726 IX86_BUILTIN_COMLTSS,
20727 IX86_BUILTIN_COMLESS,
20728 IX86_BUILTIN_COMGTSS,
20729 IX86_BUILTIN_COMGESS,
20730 IX86_BUILTIN_COMUEQSS,
20731 IX86_BUILTIN_COMUNESS,
20732 IX86_BUILTIN_COMULTSS,
20733 IX86_BUILTIN_COMULESS,
20734 IX86_BUILTIN_COMUGTSS,
20735 IX86_BUILTIN_COMUGESS,
20736 IX86_BUILTIN_COMORDSS,
20737 IX86_BUILTIN_COMUNORDSS,
20738 IX86_BUILTIN_COMFALSESS,
20739 IX86_BUILTIN_COMTRUESS,
20741 IX86_BUILTIN_COMEQSD,
20742 IX86_BUILTIN_COMNESD,
20743 IX86_BUILTIN_COMLTSD,
20744 IX86_BUILTIN_COMLESD,
20745 IX86_BUILTIN_COMGTSD,
20746 IX86_BUILTIN_COMGESD,
20747 IX86_BUILTIN_COMUEQSD,
20748 IX86_BUILTIN_COMUNESD,
20749 IX86_BUILTIN_COMULTSD,
20750 IX86_BUILTIN_COMULESD,
20751 IX86_BUILTIN_COMUGTSD,
20752 IX86_BUILTIN_COMUGESD,
20753 IX86_BUILTIN_COMORDSD,
20754 IX86_BUILTIN_COMUNORDSD,
20755 IX86_BUILTIN_COMFALSESD,
20756 IX86_BUILTIN_COMTRUESD,
20758 IX86_BUILTIN_COMEQPS,
20759 IX86_BUILTIN_COMNEPS,
20760 IX86_BUILTIN_COMLTPS,
20761 IX86_BUILTIN_COMLEPS,
20762 IX86_BUILTIN_COMGTPS,
20763 IX86_BUILTIN_COMGEPS,
20764 IX86_BUILTIN_COMUEQPS,
20765 IX86_BUILTIN_COMUNEPS,
20766 IX86_BUILTIN_COMULTPS,
20767 IX86_BUILTIN_COMULEPS,
20768 IX86_BUILTIN_COMUGTPS,
20769 IX86_BUILTIN_COMUGEPS,
20770 IX86_BUILTIN_COMORDPS,
20771 IX86_BUILTIN_COMUNORDPS,
20772 IX86_BUILTIN_COMFALSEPS,
20773 IX86_BUILTIN_COMTRUEPS,
20775 IX86_BUILTIN_COMEQPD,
20776 IX86_BUILTIN_COMNEPD,
20777 IX86_BUILTIN_COMLTPD,
20778 IX86_BUILTIN_COMLEPD,
20779 IX86_BUILTIN_COMGTPD,
20780 IX86_BUILTIN_COMGEPD,
20781 IX86_BUILTIN_COMUEQPD,
20782 IX86_BUILTIN_COMUNEPD,
20783 IX86_BUILTIN_COMULTPD,
20784 IX86_BUILTIN_COMULEPD,
20785 IX86_BUILTIN_COMUGTPD,
20786 IX86_BUILTIN_COMUGEPD,
20787 IX86_BUILTIN_COMORDPD,
20788 IX86_BUILTIN_COMUNORDPD,
20789 IX86_BUILTIN_COMFALSEPD,
20790 IX86_BUILTIN_COMTRUEPD,
20792 IX86_BUILTIN_PCOMEQUB,
20793 IX86_BUILTIN_PCOMNEUB,
20794 IX86_BUILTIN_PCOMLTUB,
20795 IX86_BUILTIN_PCOMLEUB,
20796 IX86_BUILTIN_PCOMGTUB,
20797 IX86_BUILTIN_PCOMGEUB,
20798 IX86_BUILTIN_PCOMFALSEUB,
20799 IX86_BUILTIN_PCOMTRUEUB,
20800 IX86_BUILTIN_PCOMEQUW,
20801 IX86_BUILTIN_PCOMNEUW,
20802 IX86_BUILTIN_PCOMLTUW,
20803 IX86_BUILTIN_PCOMLEUW,
20804 IX86_BUILTIN_PCOMGTUW,
20805 IX86_BUILTIN_PCOMGEUW,
20806 IX86_BUILTIN_PCOMFALSEUW,
20807 IX86_BUILTIN_PCOMTRUEUW,
20808 IX86_BUILTIN_PCOMEQUD,
20809 IX86_BUILTIN_PCOMNEUD,
20810 IX86_BUILTIN_PCOMLTUD,
20811 IX86_BUILTIN_PCOMLEUD,
20812 IX86_BUILTIN_PCOMGTUD,
20813 IX86_BUILTIN_PCOMGEUD,
20814 IX86_BUILTIN_PCOMFALSEUD,
20815 IX86_BUILTIN_PCOMTRUEUD,
20816 IX86_BUILTIN_PCOMEQUQ,
20817 IX86_BUILTIN_PCOMNEUQ,
20818 IX86_BUILTIN_PCOMLTUQ,
20819 IX86_BUILTIN_PCOMLEUQ,
20820 IX86_BUILTIN_PCOMGTUQ,
20821 IX86_BUILTIN_PCOMGEUQ,
20822 IX86_BUILTIN_PCOMFALSEUQ,
20823 IX86_BUILTIN_PCOMTRUEUQ,
20825 IX86_BUILTIN_PCOMEQB,
20826 IX86_BUILTIN_PCOMNEB,
20827 IX86_BUILTIN_PCOMLTB,
20828 IX86_BUILTIN_PCOMLEB,
20829 IX86_BUILTIN_PCOMGTB,
20830 IX86_BUILTIN_PCOMGEB,
20831 IX86_BUILTIN_PCOMFALSEB,
20832 IX86_BUILTIN_PCOMTRUEB,
20833 IX86_BUILTIN_PCOMEQW,
20834 IX86_BUILTIN_PCOMNEW,
20835 IX86_BUILTIN_PCOMLTW,
20836 IX86_BUILTIN_PCOMLEW,
20837 IX86_BUILTIN_PCOMGTW,
20838 IX86_BUILTIN_PCOMGEW,
20839 IX86_BUILTIN_PCOMFALSEW,
20840 IX86_BUILTIN_PCOMTRUEW,
20841 IX86_BUILTIN_PCOMEQD,
20842 IX86_BUILTIN_PCOMNED,
20843 IX86_BUILTIN_PCOMLTD,
20844 IX86_BUILTIN_PCOMLED,
20845 IX86_BUILTIN_PCOMGTD,
20846 IX86_BUILTIN_PCOMGED,
20847 IX86_BUILTIN_PCOMFALSED,
20848 IX86_BUILTIN_PCOMTRUED,
20849 IX86_BUILTIN_PCOMEQQ,
20850 IX86_BUILTIN_PCOMNEQ,
20851 IX86_BUILTIN_PCOMLTQ,
20852 IX86_BUILTIN_PCOMLEQ,
20853 IX86_BUILTIN_PCOMGTQ,
20854 IX86_BUILTIN_PCOMGEQ,
20855 IX86_BUILTIN_PCOMFALSEQ,
20856 IX86_BUILTIN_PCOMTRUEQ,
20858 IX86_BUILTIN_MAX
20861 /* Table for the ix86 builtin decls. */
20862 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20864 /* Table of all of the builtin functions that are possible with different ISA's
20865 but are waiting to be built until a function is declared to use that
20866 ISA. */
20867 struct builtin_isa GTY(())
20869 tree type; /* builtin type to use in the declaration */
20870 const char *name; /* function name */
20871 int isa; /* isa_flags this builtin is defined for */
20872 bool const_p; /* true if the declaration is constant */
20875 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20878 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20879 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20880 * function decl in the ix86_builtins array. Returns the function decl or
20881 * NULL_TREE, if the builtin was not added.
20883 * If the front end has a special hook for builtin functions, delay adding
20884 * builtin functions that aren't in the current ISA until the ISA is changed
20885 * with function specific optimization. Doing so, can save about 300K for the
20886 * default compiler. When the builtin is expanded, check at that time whether
20887 * it is valid.
20889 * If the front end doesn't have a special hook, record all builtins, even if
20890 * it isn't an instruction set in the current ISA in case the user uses
20891 * function specific options for a different ISA, so that we don't get scope
20892 * errors if a builtin is added in the middle of a function scope. */
20894 static inline tree
20895 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20897 tree decl = NULL_TREE;
20899 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20901 ix86_builtins_isa[(int) code].isa = mask;
20903 if ((mask & ix86_isa_flags) != 0
20904 || (lang_hooks.builtin_function
20905 == lang_hooks.builtin_function_ext_scope))
20908 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20909 NULL_TREE);
20910 ix86_builtins[(int) code] = decl;
20911 ix86_builtins_isa[(int) code].type = NULL_TREE;
20913 else
20915 ix86_builtins[(int) code] = NULL_TREE;
20916 ix86_builtins_isa[(int) code].const_p = false;
20917 ix86_builtins_isa[(int) code].type = type;
20918 ix86_builtins_isa[(int) code].name = name;
20922 return decl;
20925 /* Like def_builtin, but also marks the function decl "const". */
20927 static inline tree
20928 def_builtin_const (int mask, const char *name, tree type,
20929 enum ix86_builtins code)
20931 tree decl = def_builtin (mask, name, type, code);
20932 if (decl)
20933 TREE_READONLY (decl) = 1;
20934 else
20935 ix86_builtins_isa[(int) code].const_p = true;
20937 return decl;
20940 /* Add any new builtin functions for a given ISA that may not have been
20941 declared. This saves a bit of space compared to adding all of the
20942 declarations to the tree, even if we didn't use them. */
20944 static void
20945 ix86_add_new_builtins (int isa)
20947 int i;
20948 tree decl;
20950 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20952 if ((ix86_builtins_isa[i].isa & isa) != 0
20953 && ix86_builtins_isa[i].type != NULL_TREE)
20955 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20956 ix86_builtins_isa[i].type,
20957 i, BUILT_IN_MD, NULL,
20958 NULL_TREE);
20960 ix86_builtins[i] = decl;
20961 ix86_builtins_isa[i].type = NULL_TREE;
20962 if (ix86_builtins_isa[i].const_p)
20963 TREE_READONLY (decl) = 1;
20968 /* Bits for builtin_description.flag. */
20970 /* Set when we don't support the comparison natively, and should
20971 swap_comparison in order to support it. */
20972 #define BUILTIN_DESC_SWAP_OPERANDS 1
20974 struct builtin_description
20976 const unsigned int mask;
20977 const enum insn_code icode;
20978 const char *const name;
20979 const enum ix86_builtins code;
20980 const enum rtx_code comparison;
20981 const int flag;
20984 static const struct builtin_description bdesc_comi[] =
20986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20992 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20993 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20994 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20995 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20997 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21012 static const struct builtin_description bdesc_pcmpestr[] =
21014 /* SSE4.2 */
21015 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21016 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21017 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21018 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21019 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21020 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21021 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21024 static const struct builtin_description bdesc_pcmpistr[] =
21026 /* SSE4.2 */
21027 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21028 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21029 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21030 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21031 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21032 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21033 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21036 /* Special builtin types */
21037 enum ix86_special_builtin_type
21039 SPECIAL_FTYPE_UNKNOWN,
21040 VOID_FTYPE_VOID,
21041 V32QI_FTYPE_PCCHAR,
21042 V16QI_FTYPE_PCCHAR,
21043 V8SF_FTYPE_PCV4SF,
21044 V8SF_FTYPE_PCFLOAT,
21045 V4DF_FTYPE_PCV2DF,
21046 V4DF_FTYPE_PCDOUBLE,
21047 V4SF_FTYPE_PCFLOAT,
21048 V2DF_FTYPE_PCDOUBLE,
21049 V8SF_FTYPE_PCV8SF_V8SF,
21050 V4DF_FTYPE_PCV4DF_V4DF,
21051 V4SF_FTYPE_V4SF_PCV2SF,
21052 V4SF_FTYPE_PCV4SF_V4SF,
21053 V2DF_FTYPE_V2DF_PCDOUBLE,
21054 V2DF_FTYPE_PCV2DF_V2DF,
21055 V2DI_FTYPE_PV2DI,
21056 VOID_FTYPE_PV2SF_V4SF,
21057 VOID_FTYPE_PV4DI_V4DI,
21058 VOID_FTYPE_PV2DI_V2DI,
21059 VOID_FTYPE_PCHAR_V32QI,
21060 VOID_FTYPE_PCHAR_V16QI,
21061 VOID_FTYPE_PFLOAT_V8SF,
21062 VOID_FTYPE_PFLOAT_V4SF,
21063 VOID_FTYPE_PDOUBLE_V4DF,
21064 VOID_FTYPE_PDOUBLE_V2DF,
21065 VOID_FTYPE_PDI_DI,
21066 VOID_FTYPE_PINT_INT,
21067 VOID_FTYPE_PV8SF_V8SF_V8SF,
21068 VOID_FTYPE_PV4DF_V4DF_V4DF,
21069 VOID_FTYPE_PV4SF_V4SF_V4SF,
21070 VOID_FTYPE_PV2DF_V2DF_V2DF
21073 /* Builtin types */
21074 enum ix86_builtin_type
21076 FTYPE_UNKNOWN,
21077 FLOAT128_FTYPE_FLOAT128,
21078 FLOAT_FTYPE_FLOAT,
21079 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21080 INT_FTYPE_V8SF_V8SF_PTEST,
21081 INT_FTYPE_V4DI_V4DI_PTEST,
21082 INT_FTYPE_V4DF_V4DF_PTEST,
21083 INT_FTYPE_V4SF_V4SF_PTEST,
21084 INT_FTYPE_V2DI_V2DI_PTEST,
21085 INT_FTYPE_V2DF_V2DF_PTEST,
21086 INT64_FTYPE_V4SF,
21087 INT64_FTYPE_V2DF,
21088 INT_FTYPE_V16QI,
21089 INT_FTYPE_V8QI,
21090 INT_FTYPE_V8SF,
21091 INT_FTYPE_V4DF,
21092 INT_FTYPE_V4SF,
21093 INT_FTYPE_V2DF,
21094 V16QI_FTYPE_V16QI,
21095 V8SI_FTYPE_V8SF,
21096 V8SI_FTYPE_V4SI,
21097 V8HI_FTYPE_V8HI,
21098 V8HI_FTYPE_V16QI,
21099 V8QI_FTYPE_V8QI,
21100 V8SF_FTYPE_V8SF,
21101 V8SF_FTYPE_V8SI,
21102 V8SF_FTYPE_V4SF,
21103 V4SI_FTYPE_V4SI,
21104 V4SI_FTYPE_V16QI,
21105 V4SI_FTYPE_V8SI,
21106 V4SI_FTYPE_V8HI,
21107 V4SI_FTYPE_V4DF,
21108 V4SI_FTYPE_V4SF,
21109 V4SI_FTYPE_V2DF,
21110 V4HI_FTYPE_V4HI,
21111 V4DF_FTYPE_V4DF,
21112 V4DF_FTYPE_V4SI,
21113 V4DF_FTYPE_V4SF,
21114 V4DF_FTYPE_V2DF,
21115 V4SF_FTYPE_V4DF,
21116 V4SF_FTYPE_V4SF,
21117 V4SF_FTYPE_V4SF_VEC_MERGE,
21118 V4SF_FTYPE_V8SF,
21119 V4SF_FTYPE_V4SI,
21120 V4SF_FTYPE_V2DF,
21121 V2DI_FTYPE_V2DI,
21122 V2DI_FTYPE_V16QI,
21123 V2DI_FTYPE_V8HI,
21124 V2DI_FTYPE_V4SI,
21125 V2DF_FTYPE_V2DF,
21126 V2DF_FTYPE_V2DF_VEC_MERGE,
21127 V2DF_FTYPE_V4SI,
21128 V2DF_FTYPE_V4DF,
21129 V2DF_FTYPE_V4SF,
21130 V2DF_FTYPE_V2SI,
21131 V2SI_FTYPE_V2SI,
21132 V2SI_FTYPE_V4SF,
21133 V2SI_FTYPE_V2SF,
21134 V2SI_FTYPE_V2DF,
21135 V2SF_FTYPE_V2SF,
21136 V2SF_FTYPE_V2SI,
21137 V16QI_FTYPE_V16QI_V16QI,
21138 V16QI_FTYPE_V8HI_V8HI,
21139 V8QI_FTYPE_V8QI_V8QI,
21140 V8QI_FTYPE_V4HI_V4HI,
21141 V8HI_FTYPE_V8HI_V8HI,
21142 V8HI_FTYPE_V8HI_V8HI_COUNT,
21143 V8HI_FTYPE_V16QI_V16QI,
21144 V8HI_FTYPE_V4SI_V4SI,
21145 V8HI_FTYPE_V8HI_SI_COUNT,
21146 V8SF_FTYPE_V8SF_V8SF,
21147 V8SF_FTYPE_V8SF_V8SI,
21148 V4SI_FTYPE_V4SI_V4SI,
21149 V4SI_FTYPE_V4SI_V4SI_COUNT,
21150 V4SI_FTYPE_V8HI_V8HI,
21151 V4SI_FTYPE_V4SF_V4SF,
21152 V4SI_FTYPE_V2DF_V2DF,
21153 V4SI_FTYPE_V4SI_SI_COUNT,
21154 V4HI_FTYPE_V4HI_V4HI,
21155 V4HI_FTYPE_V4HI_V4HI_COUNT,
21156 V4HI_FTYPE_V8QI_V8QI,
21157 V4HI_FTYPE_V2SI_V2SI,
21158 V4HI_FTYPE_V4HI_SI_COUNT,
21159 V4DF_FTYPE_V4DF_V4DF,
21160 V4DF_FTYPE_V4DF_V4DI,
21161 V4SF_FTYPE_V4SF_V4SF,
21162 V4SF_FTYPE_V4SF_V4SF_SWAP,
21163 V4SF_FTYPE_V4SF_V4SI,
21164 V4SF_FTYPE_V4SF_V2SI,
21165 V4SF_FTYPE_V4SF_V2DF,
21166 V4SF_FTYPE_V4SF_DI,
21167 V4SF_FTYPE_V4SF_SI,
21168 V2DI_FTYPE_V2DI_V2DI,
21169 V2DI_FTYPE_V2DI_V2DI_COUNT,
21170 V2DI_FTYPE_V16QI_V16QI,
21171 V2DI_FTYPE_V4SI_V4SI,
21172 V2DI_FTYPE_V2DI_V16QI,
21173 V2DI_FTYPE_V2DF_V2DF,
21174 V2DI_FTYPE_V2DI_SI_COUNT,
21175 V2SI_FTYPE_V2SI_V2SI,
21176 V2SI_FTYPE_V2SI_V2SI_COUNT,
21177 V2SI_FTYPE_V4HI_V4HI,
21178 V2SI_FTYPE_V2SF_V2SF,
21179 V2SI_FTYPE_V2SI_SI_COUNT,
21180 V2DF_FTYPE_V2DF_V2DF,
21181 V2DF_FTYPE_V2DF_V2DF_SWAP,
21182 V2DF_FTYPE_V2DF_V4SF,
21183 V2DF_FTYPE_V2DF_V2DI,
21184 V2DF_FTYPE_V2DF_DI,
21185 V2DF_FTYPE_V2DF_SI,
21186 V2SF_FTYPE_V2SF_V2SF,
21187 V1DI_FTYPE_V1DI_V1DI,
21188 V1DI_FTYPE_V1DI_V1DI_COUNT,
21189 V1DI_FTYPE_V8QI_V8QI,
21190 V1DI_FTYPE_V2SI_V2SI,
21191 V1DI_FTYPE_V1DI_SI_COUNT,
21192 UINT64_FTYPE_UINT64_UINT64,
21193 UINT_FTYPE_UINT_UINT,
21194 UINT_FTYPE_UINT_USHORT,
21195 UINT_FTYPE_UINT_UCHAR,
21196 V8HI_FTYPE_V8HI_INT,
21197 V4SI_FTYPE_V4SI_INT,
21198 V4HI_FTYPE_V4HI_INT,
21199 V8SF_FTYPE_V8SF_INT,
21200 V4SI_FTYPE_V8SI_INT,
21201 V4SF_FTYPE_V8SF_INT,
21202 V2DF_FTYPE_V4DF_INT,
21203 V4DF_FTYPE_V4DF_INT,
21204 V4SF_FTYPE_V4SF_INT,
21205 V2DI_FTYPE_V2DI_INT,
21206 V2DI2TI_FTYPE_V2DI_INT,
21207 V2DF_FTYPE_V2DF_INT,
21208 V16QI_FTYPE_V16QI_V16QI_V16QI,
21209 V8SF_FTYPE_V8SF_V8SF_V8SF,
21210 V4DF_FTYPE_V4DF_V4DF_V4DF,
21211 V4SF_FTYPE_V4SF_V4SF_V4SF,
21212 V2DF_FTYPE_V2DF_V2DF_V2DF,
21213 V16QI_FTYPE_V16QI_V16QI_INT,
21214 V8SI_FTYPE_V8SI_V8SI_INT,
21215 V8SI_FTYPE_V8SI_V4SI_INT,
21216 V8HI_FTYPE_V8HI_V8HI_INT,
21217 V8SF_FTYPE_V8SF_V8SF_INT,
21218 V8SF_FTYPE_V8SF_V4SF_INT,
21219 V4SI_FTYPE_V4SI_V4SI_INT,
21220 V4DF_FTYPE_V4DF_V4DF_INT,
21221 V4DF_FTYPE_V4DF_V2DF_INT,
21222 V4SF_FTYPE_V4SF_V4SF_INT,
21223 V2DI_FTYPE_V2DI_V2DI_INT,
21224 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21225 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21226 V2DF_FTYPE_V2DF_V2DF_INT,
21227 V2DI_FTYPE_V2DI_UINT_UINT,
21228 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21231 /* Special builtins with variable number of arguments. */
21232 static const struct builtin_description bdesc_special_args[] =
21234 /* MMX */
21235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21237 /* 3DNow! */
21238 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21240 /* SSE */
21241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21250 /* SSE or 3DNow!A */
21251 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21252 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21254 /* SSE2 */
21255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21263 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21268 /* SSE3 */
21269 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21271 /* SSE4.1 */
21272 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21274 /* SSE4A */
21275 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21276 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21278 /* AVX */
21279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21281 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21297 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21301 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21302 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21305 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21311 /* Builtins with variable number of arguments. */
21312 static const struct builtin_description bdesc_args[] =
21314 /* MMX */
21315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21378 /* 3DNow! */
21379 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21381 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21382 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21384 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21385 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21386 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21387 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21388 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21389 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21390 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21391 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21392 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21393 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21394 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21395 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21396 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21397 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21398 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21400 /* 3DNow!A */
21401 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21402 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21403 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21404 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21405 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21406 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21408 /* SSE */
21409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21411 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21413 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21417 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21420 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21424 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21425 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21426 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21456 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21457 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21461 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21463 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21464 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21474 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21476 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21482 /* SSE MMX or 3Dnow!A */
21483 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21484 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21485 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21487 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21488 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21489 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21490 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21492 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21493 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21495 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21497 /* SSE2 */
21498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21516 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21517 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21523 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21524 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21525 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21526 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21554 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21558 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21560 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21561 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21567 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21569 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21570 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21571 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21572 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21573 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21574 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21575 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21576 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21587 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21588 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21590 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21592 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21593 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21605 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21607 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21623 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21632 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21637 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21638 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21639 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21640 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21641 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21642 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21645 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21646 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21647 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21648 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21649 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21650 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21652 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21653 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21654 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21655 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21663 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21664 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21666 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21668 /* SSE2 MMX */
21669 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21670 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21672 /* SSE3 */
21673 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21674 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21676 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21677 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21678 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21679 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21680 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21681 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21683 /* SSSE3 */
21684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21703 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21716 /* SSSE3. */
21717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21720 /* SSE4.1 */
21721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21759 /* SSE4.1 and SSE5 */
21760 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21761 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21762 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21763 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21765 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21766 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21767 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21769 /* SSE4.2 */
21770 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21771 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21772 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21773 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21774 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21776 /* SSE4A */
21777 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21778 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21779 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21780 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21782 /* AES */
21783 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21784 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21786 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21787 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21788 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21789 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21791 /* PCLMUL */
21792 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21794 /* AVX */
21795 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21796 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21799 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21800 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21803 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21809 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21810 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21811 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21812 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21813 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21814 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21815 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21816 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21817 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21818 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21819 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21820 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21866 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21868 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21870 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21909 /* SSE5 */
21910 enum multi_arg_type {
21911 MULTI_ARG_UNKNOWN,
21912 MULTI_ARG_3_SF,
21913 MULTI_ARG_3_DF,
21914 MULTI_ARG_3_DI,
21915 MULTI_ARG_3_SI,
21916 MULTI_ARG_3_SI_DI,
21917 MULTI_ARG_3_HI,
21918 MULTI_ARG_3_HI_SI,
21919 MULTI_ARG_3_QI,
21920 MULTI_ARG_3_PERMPS,
21921 MULTI_ARG_3_PERMPD,
21922 MULTI_ARG_2_SF,
21923 MULTI_ARG_2_DF,
21924 MULTI_ARG_2_DI,
21925 MULTI_ARG_2_SI,
21926 MULTI_ARG_2_HI,
21927 MULTI_ARG_2_QI,
21928 MULTI_ARG_2_DI_IMM,
21929 MULTI_ARG_2_SI_IMM,
21930 MULTI_ARG_2_HI_IMM,
21931 MULTI_ARG_2_QI_IMM,
21932 MULTI_ARG_2_SF_CMP,
21933 MULTI_ARG_2_DF_CMP,
21934 MULTI_ARG_2_DI_CMP,
21935 MULTI_ARG_2_SI_CMP,
21936 MULTI_ARG_2_HI_CMP,
21937 MULTI_ARG_2_QI_CMP,
21938 MULTI_ARG_2_DI_TF,
21939 MULTI_ARG_2_SI_TF,
21940 MULTI_ARG_2_HI_TF,
21941 MULTI_ARG_2_QI_TF,
21942 MULTI_ARG_2_SF_TF,
21943 MULTI_ARG_2_DF_TF,
21944 MULTI_ARG_1_SF,
21945 MULTI_ARG_1_DF,
21946 MULTI_ARG_1_DI,
21947 MULTI_ARG_1_SI,
21948 MULTI_ARG_1_HI,
21949 MULTI_ARG_1_QI,
21950 MULTI_ARG_1_SI_DI,
21951 MULTI_ARG_1_HI_DI,
21952 MULTI_ARG_1_HI_SI,
21953 MULTI_ARG_1_QI_DI,
21954 MULTI_ARG_1_QI_SI,
21955 MULTI_ARG_1_QI_HI,
21956 MULTI_ARG_1_PH2PS,
21957 MULTI_ARG_1_PS2PH
21960 static const struct builtin_description bdesc_multi_arg[] =
21962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
21980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
21981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
21982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
21983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
21984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
21985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
21986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
21987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
21988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
21989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
21990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
21992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
21993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
21994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
22000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
22001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
22002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
22007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
22016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
22017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
22018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
22019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
22020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
22021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
22022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
22023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
22024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
22025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
22026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
22027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
22028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
22029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
22030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
22031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
22032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
22033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
22034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
22035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
22036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
22038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22198 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22199 in the current target ISA to allow the user to compile particular modules
22200 with different target specific options that differ from the command line
22201 options. */
22202 static void
22203 ix86_init_mmx_sse_builtins (void)
22205 const struct builtin_description * d;
22206 size_t i;
22208 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22209 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22210 tree V1DI_type_node
22211 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22212 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22213 tree V2DI_type_node
22214 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22215 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22216 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22217 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22218 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22219 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22220 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22222 tree pchar_type_node = build_pointer_type (char_type_node);
22223 tree pcchar_type_node
22224 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22225 tree pfloat_type_node = build_pointer_type (float_type_node);
22226 tree pcfloat_type_node
22227 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22228 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22229 tree pcv2sf_type_node
22230 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22231 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22232 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22234 /* Comparisons. */
22235 tree int_ftype_v4sf_v4sf
22236 = build_function_type_list (integer_type_node,
22237 V4SF_type_node, V4SF_type_node, NULL_TREE);
22238 tree v4si_ftype_v4sf_v4sf
22239 = build_function_type_list (V4SI_type_node,
22240 V4SF_type_node, V4SF_type_node, NULL_TREE);
22241 /* MMX/SSE/integer conversions. */
22242 tree int_ftype_v4sf
22243 = build_function_type_list (integer_type_node,
22244 V4SF_type_node, NULL_TREE);
22245 tree int64_ftype_v4sf
22246 = build_function_type_list (long_long_integer_type_node,
22247 V4SF_type_node, NULL_TREE);
22248 tree int_ftype_v8qi
22249 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22250 tree v4sf_ftype_v4sf_int
22251 = build_function_type_list (V4SF_type_node,
22252 V4SF_type_node, integer_type_node, NULL_TREE);
22253 tree v4sf_ftype_v4sf_int64
22254 = build_function_type_list (V4SF_type_node,
22255 V4SF_type_node, long_long_integer_type_node,
22256 NULL_TREE);
22257 tree v4sf_ftype_v4sf_v2si
22258 = build_function_type_list (V4SF_type_node,
22259 V4SF_type_node, V2SI_type_node, NULL_TREE);
22261 /* Miscellaneous. */
22262 tree v8qi_ftype_v4hi_v4hi
22263 = build_function_type_list (V8QI_type_node,
22264 V4HI_type_node, V4HI_type_node, NULL_TREE);
22265 tree v4hi_ftype_v2si_v2si
22266 = build_function_type_list (V4HI_type_node,
22267 V2SI_type_node, V2SI_type_node, NULL_TREE);
22268 tree v4sf_ftype_v4sf_v4sf_int
22269 = build_function_type_list (V4SF_type_node,
22270 V4SF_type_node, V4SF_type_node,
22271 integer_type_node, NULL_TREE);
22272 tree v2si_ftype_v4hi_v4hi
22273 = build_function_type_list (V2SI_type_node,
22274 V4HI_type_node, V4HI_type_node, NULL_TREE);
22275 tree v4hi_ftype_v4hi_int
22276 = build_function_type_list (V4HI_type_node,
22277 V4HI_type_node, integer_type_node, NULL_TREE);
22278 tree v2si_ftype_v2si_int
22279 = build_function_type_list (V2SI_type_node,
22280 V2SI_type_node, integer_type_node, NULL_TREE);
22281 tree v1di_ftype_v1di_int
22282 = build_function_type_list (V1DI_type_node,
22283 V1DI_type_node, integer_type_node, NULL_TREE);
22285 tree void_ftype_void
22286 = build_function_type (void_type_node, void_list_node);
22287 tree void_ftype_unsigned
22288 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22289 tree void_ftype_unsigned_unsigned
22290 = build_function_type_list (void_type_node, unsigned_type_node,
22291 unsigned_type_node, NULL_TREE);
22292 tree void_ftype_pcvoid_unsigned_unsigned
22293 = build_function_type_list (void_type_node, const_ptr_type_node,
22294 unsigned_type_node, unsigned_type_node,
22295 NULL_TREE);
22296 tree unsigned_ftype_void
22297 = build_function_type (unsigned_type_node, void_list_node);
22298 tree v2si_ftype_v4sf
22299 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22300 /* Loads/stores. */
22301 tree void_ftype_v8qi_v8qi_pchar
22302 = build_function_type_list (void_type_node,
22303 V8QI_type_node, V8QI_type_node,
22304 pchar_type_node, NULL_TREE);
22305 tree v4sf_ftype_pcfloat
22306 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22307 tree v4sf_ftype_v4sf_pcv2sf
22308 = build_function_type_list (V4SF_type_node,
22309 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22310 tree void_ftype_pv2sf_v4sf
22311 = build_function_type_list (void_type_node,
22312 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22313 tree void_ftype_pfloat_v4sf
22314 = build_function_type_list (void_type_node,
22315 pfloat_type_node, V4SF_type_node, NULL_TREE);
22316 tree void_ftype_pdi_di
22317 = build_function_type_list (void_type_node,
22318 pdi_type_node, long_long_unsigned_type_node,
22319 NULL_TREE);
22320 tree void_ftype_pv2di_v2di
22321 = build_function_type_list (void_type_node,
22322 pv2di_type_node, V2DI_type_node, NULL_TREE);
22323 /* Normal vector unops. */
22324 tree v4sf_ftype_v4sf
22325 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22326 tree v16qi_ftype_v16qi
22327 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22328 tree v8hi_ftype_v8hi
22329 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22330 tree v4si_ftype_v4si
22331 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22332 tree v8qi_ftype_v8qi
22333 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22334 tree v4hi_ftype_v4hi
22335 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22337 /* Normal vector binops. */
22338 tree v4sf_ftype_v4sf_v4sf
22339 = build_function_type_list (V4SF_type_node,
22340 V4SF_type_node, V4SF_type_node, NULL_TREE);
22341 tree v8qi_ftype_v8qi_v8qi
22342 = build_function_type_list (V8QI_type_node,
22343 V8QI_type_node, V8QI_type_node, NULL_TREE);
22344 tree v4hi_ftype_v4hi_v4hi
22345 = build_function_type_list (V4HI_type_node,
22346 V4HI_type_node, V4HI_type_node, NULL_TREE);
22347 tree v2si_ftype_v2si_v2si
22348 = build_function_type_list (V2SI_type_node,
22349 V2SI_type_node, V2SI_type_node, NULL_TREE);
22350 tree v1di_ftype_v1di_v1di
22351 = build_function_type_list (V1DI_type_node,
22352 V1DI_type_node, V1DI_type_node, NULL_TREE);
22353 tree v1di_ftype_v1di_v1di_int
22354 = build_function_type_list (V1DI_type_node,
22355 V1DI_type_node, V1DI_type_node,
22356 integer_type_node, NULL_TREE);
22357 tree v2si_ftype_v2sf
22358 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22359 tree v2sf_ftype_v2si
22360 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22361 tree v2si_ftype_v2si
22362 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22363 tree v2sf_ftype_v2sf
22364 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22365 tree v2sf_ftype_v2sf_v2sf
22366 = build_function_type_list (V2SF_type_node,
22367 V2SF_type_node, V2SF_type_node, NULL_TREE);
22368 tree v2si_ftype_v2sf_v2sf
22369 = build_function_type_list (V2SI_type_node,
22370 V2SF_type_node, V2SF_type_node, NULL_TREE);
22371 tree pint_type_node = build_pointer_type (integer_type_node);
22372 tree pdouble_type_node = build_pointer_type (double_type_node);
22373 tree pcdouble_type_node = build_pointer_type (
22374 build_type_variant (double_type_node, 1, 0));
22375 tree int_ftype_v2df_v2df
22376 = build_function_type_list (integer_type_node,
22377 V2DF_type_node, V2DF_type_node, NULL_TREE);
22379 tree void_ftype_pcvoid
22380 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22381 tree v4sf_ftype_v4si
22382 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22383 tree v4si_ftype_v4sf
22384 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22385 tree v2df_ftype_v4si
22386 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22387 tree v4si_ftype_v2df
22388 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22389 tree v4si_ftype_v2df_v2df
22390 = build_function_type_list (V4SI_type_node,
22391 V2DF_type_node, V2DF_type_node, NULL_TREE);
22392 tree v2si_ftype_v2df
22393 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22394 tree v4sf_ftype_v2df
22395 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22396 tree v2df_ftype_v2si
22397 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22398 tree v2df_ftype_v4sf
22399 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22400 tree int_ftype_v2df
22401 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22402 tree int64_ftype_v2df
22403 = build_function_type_list (long_long_integer_type_node,
22404 V2DF_type_node, NULL_TREE);
22405 tree v2df_ftype_v2df_int
22406 = build_function_type_list (V2DF_type_node,
22407 V2DF_type_node, integer_type_node, NULL_TREE);
22408 tree v2df_ftype_v2df_int64
22409 = build_function_type_list (V2DF_type_node,
22410 V2DF_type_node, long_long_integer_type_node,
22411 NULL_TREE);
22412 tree v4sf_ftype_v4sf_v2df
22413 = build_function_type_list (V4SF_type_node,
22414 V4SF_type_node, V2DF_type_node, NULL_TREE);
22415 tree v2df_ftype_v2df_v4sf
22416 = build_function_type_list (V2DF_type_node,
22417 V2DF_type_node, V4SF_type_node, NULL_TREE);
22418 tree v2df_ftype_v2df_v2df_int
22419 = build_function_type_list (V2DF_type_node,
22420 V2DF_type_node, V2DF_type_node,
22421 integer_type_node,
22422 NULL_TREE);
22423 tree v2df_ftype_v2df_pcdouble
22424 = build_function_type_list (V2DF_type_node,
22425 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22426 tree void_ftype_pdouble_v2df
22427 = build_function_type_list (void_type_node,
22428 pdouble_type_node, V2DF_type_node, NULL_TREE);
22429 tree void_ftype_pint_int
22430 = build_function_type_list (void_type_node,
22431 pint_type_node, integer_type_node, NULL_TREE);
22432 tree void_ftype_v16qi_v16qi_pchar
22433 = build_function_type_list (void_type_node,
22434 V16QI_type_node, V16QI_type_node,
22435 pchar_type_node, NULL_TREE);
22436 tree v2df_ftype_pcdouble
22437 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22438 tree v2df_ftype_v2df_v2df
22439 = build_function_type_list (V2DF_type_node,
22440 V2DF_type_node, V2DF_type_node, NULL_TREE);
22441 tree v16qi_ftype_v16qi_v16qi
22442 = build_function_type_list (V16QI_type_node,
22443 V16QI_type_node, V16QI_type_node, NULL_TREE);
22444 tree v8hi_ftype_v8hi_v8hi
22445 = build_function_type_list (V8HI_type_node,
22446 V8HI_type_node, V8HI_type_node, NULL_TREE);
22447 tree v4si_ftype_v4si_v4si
22448 = build_function_type_list (V4SI_type_node,
22449 V4SI_type_node, V4SI_type_node, NULL_TREE);
22450 tree v2di_ftype_v2di_v2di
22451 = build_function_type_list (V2DI_type_node,
22452 V2DI_type_node, V2DI_type_node, NULL_TREE);
22453 tree v2di_ftype_v2df_v2df
22454 = build_function_type_list (V2DI_type_node,
22455 V2DF_type_node, V2DF_type_node, NULL_TREE);
22456 tree v2df_ftype_v2df
22457 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22458 tree v2di_ftype_v2di_int
22459 = build_function_type_list (V2DI_type_node,
22460 V2DI_type_node, integer_type_node, NULL_TREE);
22461 tree v2di_ftype_v2di_v2di_int
22462 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22463 V2DI_type_node, integer_type_node, NULL_TREE);
22464 tree v4si_ftype_v4si_int
22465 = build_function_type_list (V4SI_type_node,
22466 V4SI_type_node, integer_type_node, NULL_TREE);
22467 tree v8hi_ftype_v8hi_int
22468 = build_function_type_list (V8HI_type_node,
22469 V8HI_type_node, integer_type_node, NULL_TREE);
22470 tree v4si_ftype_v8hi_v8hi
22471 = build_function_type_list (V4SI_type_node,
22472 V8HI_type_node, V8HI_type_node, NULL_TREE);
22473 tree v1di_ftype_v8qi_v8qi
22474 = build_function_type_list (V1DI_type_node,
22475 V8QI_type_node, V8QI_type_node, NULL_TREE);
22476 tree v1di_ftype_v2si_v2si
22477 = build_function_type_list (V1DI_type_node,
22478 V2SI_type_node, V2SI_type_node, NULL_TREE);
22479 tree v2di_ftype_v16qi_v16qi
22480 = build_function_type_list (V2DI_type_node,
22481 V16QI_type_node, V16QI_type_node, NULL_TREE);
22482 tree v2di_ftype_v4si_v4si
22483 = build_function_type_list (V2DI_type_node,
22484 V4SI_type_node, V4SI_type_node, NULL_TREE);
22485 tree int_ftype_v16qi
22486 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22487 tree v16qi_ftype_pcchar
22488 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22489 tree void_ftype_pchar_v16qi
22490 = build_function_type_list (void_type_node,
22491 pchar_type_node, V16QI_type_node, NULL_TREE);
22493 tree v2di_ftype_v2di_unsigned_unsigned
22494 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22495 unsigned_type_node, unsigned_type_node,
22496 NULL_TREE);
22497 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22498 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22499 unsigned_type_node, unsigned_type_node,
22500 NULL_TREE);
22501 tree v2di_ftype_v2di_v16qi
22502 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22503 NULL_TREE);
22504 tree v2df_ftype_v2df_v2df_v2df
22505 = build_function_type_list (V2DF_type_node,
22506 V2DF_type_node, V2DF_type_node,
22507 V2DF_type_node, NULL_TREE);
22508 tree v4sf_ftype_v4sf_v4sf_v4sf
22509 = build_function_type_list (V4SF_type_node,
22510 V4SF_type_node, V4SF_type_node,
22511 V4SF_type_node, NULL_TREE);
22512 tree v8hi_ftype_v16qi
22513 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22514 NULL_TREE);
22515 tree v4si_ftype_v16qi
22516 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22517 NULL_TREE);
22518 tree v2di_ftype_v16qi
22519 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22520 NULL_TREE);
22521 tree v4si_ftype_v8hi
22522 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22523 NULL_TREE);
22524 tree v2di_ftype_v8hi
22525 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22526 NULL_TREE);
22527 tree v2di_ftype_v4si
22528 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22529 NULL_TREE);
22530 tree v2di_ftype_pv2di
22531 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22532 NULL_TREE);
22533 tree v16qi_ftype_v16qi_v16qi_int
22534 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22535 V16QI_type_node, integer_type_node,
22536 NULL_TREE);
22537 tree v16qi_ftype_v16qi_v16qi_v16qi
22538 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22539 V16QI_type_node, V16QI_type_node,
22540 NULL_TREE);
22541 tree v8hi_ftype_v8hi_v8hi_int
22542 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22543 V8HI_type_node, integer_type_node,
22544 NULL_TREE);
22545 tree v4si_ftype_v4si_v4si_int
22546 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22547 V4SI_type_node, integer_type_node,
22548 NULL_TREE);
22549 tree int_ftype_v2di_v2di
22550 = build_function_type_list (integer_type_node,
22551 V2DI_type_node, V2DI_type_node,
22552 NULL_TREE);
22553 tree int_ftype_v16qi_int_v16qi_int_int
22554 = build_function_type_list (integer_type_node,
22555 V16QI_type_node,
22556 integer_type_node,
22557 V16QI_type_node,
22558 integer_type_node,
22559 integer_type_node,
22560 NULL_TREE);
22561 tree v16qi_ftype_v16qi_int_v16qi_int_int
22562 = build_function_type_list (V16QI_type_node,
22563 V16QI_type_node,
22564 integer_type_node,
22565 V16QI_type_node,
22566 integer_type_node,
22567 integer_type_node,
22568 NULL_TREE);
22569 tree int_ftype_v16qi_v16qi_int
22570 = build_function_type_list (integer_type_node,
22571 V16QI_type_node,
22572 V16QI_type_node,
22573 integer_type_node,
22574 NULL_TREE);
22576 /* SSE5 instructions */
22577 tree v2di_ftype_v2di_v2di_v2di
22578 = build_function_type_list (V2DI_type_node,
22579 V2DI_type_node,
22580 V2DI_type_node,
22581 V2DI_type_node,
22582 NULL_TREE);
22584 tree v4si_ftype_v4si_v4si_v4si
22585 = build_function_type_list (V4SI_type_node,
22586 V4SI_type_node,
22587 V4SI_type_node,
22588 V4SI_type_node,
22589 NULL_TREE);
22591 tree v4si_ftype_v4si_v4si_v2di
22592 = build_function_type_list (V4SI_type_node,
22593 V4SI_type_node,
22594 V4SI_type_node,
22595 V2DI_type_node,
22596 NULL_TREE);
22598 tree v8hi_ftype_v8hi_v8hi_v8hi
22599 = build_function_type_list (V8HI_type_node,
22600 V8HI_type_node,
22601 V8HI_type_node,
22602 V8HI_type_node,
22603 NULL_TREE);
22605 tree v8hi_ftype_v8hi_v8hi_v4si
22606 = build_function_type_list (V8HI_type_node,
22607 V8HI_type_node,
22608 V8HI_type_node,
22609 V4SI_type_node,
22610 NULL_TREE);
22612 tree v2df_ftype_v2df_v2df_v16qi
22613 = build_function_type_list (V2DF_type_node,
22614 V2DF_type_node,
22615 V2DF_type_node,
22616 V16QI_type_node,
22617 NULL_TREE);
22619 tree v4sf_ftype_v4sf_v4sf_v16qi
22620 = build_function_type_list (V4SF_type_node,
22621 V4SF_type_node,
22622 V4SF_type_node,
22623 V16QI_type_node,
22624 NULL_TREE);
22626 tree v2di_ftype_v2di_si
22627 = build_function_type_list (V2DI_type_node,
22628 V2DI_type_node,
22629 integer_type_node,
22630 NULL_TREE);
22632 tree v4si_ftype_v4si_si
22633 = build_function_type_list (V4SI_type_node,
22634 V4SI_type_node,
22635 integer_type_node,
22636 NULL_TREE);
22638 tree v8hi_ftype_v8hi_si
22639 = build_function_type_list (V8HI_type_node,
22640 V8HI_type_node,
22641 integer_type_node,
22642 NULL_TREE);
22644 tree v16qi_ftype_v16qi_si
22645 = build_function_type_list (V16QI_type_node,
22646 V16QI_type_node,
22647 integer_type_node,
22648 NULL_TREE);
22649 tree v4sf_ftype_v4hi
22650 = build_function_type_list (V4SF_type_node,
22651 V4HI_type_node,
22652 NULL_TREE);
22654 tree v4hi_ftype_v4sf
22655 = build_function_type_list (V4HI_type_node,
22656 V4SF_type_node,
22657 NULL_TREE);
22659 tree v2di_ftype_v2di
22660 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22662 tree v16qi_ftype_v8hi_v8hi
22663 = build_function_type_list (V16QI_type_node,
22664 V8HI_type_node, V8HI_type_node,
22665 NULL_TREE);
22666 tree v8hi_ftype_v4si_v4si
22667 = build_function_type_list (V8HI_type_node,
22668 V4SI_type_node, V4SI_type_node,
22669 NULL_TREE);
22670 tree v8hi_ftype_v16qi_v16qi
22671 = build_function_type_list (V8HI_type_node,
22672 V16QI_type_node, V16QI_type_node,
22673 NULL_TREE);
22674 tree v4hi_ftype_v8qi_v8qi
22675 = build_function_type_list (V4HI_type_node,
22676 V8QI_type_node, V8QI_type_node,
22677 NULL_TREE);
22678 tree unsigned_ftype_unsigned_uchar
22679 = build_function_type_list (unsigned_type_node,
22680 unsigned_type_node,
22681 unsigned_char_type_node,
22682 NULL_TREE);
22683 tree unsigned_ftype_unsigned_ushort
22684 = build_function_type_list (unsigned_type_node,
22685 unsigned_type_node,
22686 short_unsigned_type_node,
22687 NULL_TREE);
22688 tree unsigned_ftype_unsigned_unsigned
22689 = build_function_type_list (unsigned_type_node,
22690 unsigned_type_node,
22691 unsigned_type_node,
22692 NULL_TREE);
22693 tree uint64_ftype_uint64_uint64
22694 = build_function_type_list (long_long_unsigned_type_node,
22695 long_long_unsigned_type_node,
22696 long_long_unsigned_type_node,
22697 NULL_TREE);
22698 tree float_ftype_float
22699 = build_function_type_list (float_type_node,
22700 float_type_node,
22701 NULL_TREE);
22703 /* AVX builtins */
22704 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22705 V32QImode);
22706 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22707 V8SImode);
22708 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22709 V8SFmode);
22710 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22711 V4DImode);
22712 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22713 V4DFmode);
22714 tree v8sf_ftype_v8sf
22715 = build_function_type_list (V8SF_type_node,
22716 V8SF_type_node,
22717 NULL_TREE);
22718 tree v8si_ftype_v8sf
22719 = build_function_type_list (V8SI_type_node,
22720 V8SF_type_node,
22721 NULL_TREE);
22722 tree v8sf_ftype_v8si
22723 = build_function_type_list (V8SF_type_node,
22724 V8SI_type_node,
22725 NULL_TREE);
22726 tree v4si_ftype_v4df
22727 = build_function_type_list (V4SI_type_node,
22728 V4DF_type_node,
22729 NULL_TREE);
22730 tree v4df_ftype_v4df
22731 = build_function_type_list (V4DF_type_node,
22732 V4DF_type_node,
22733 NULL_TREE);
22734 tree v4df_ftype_v4si
22735 = build_function_type_list (V4DF_type_node,
22736 V4SI_type_node,
22737 NULL_TREE);
22738 tree v4df_ftype_v4sf
22739 = build_function_type_list (V4DF_type_node,
22740 V4SF_type_node,
22741 NULL_TREE);
22742 tree v4sf_ftype_v4df
22743 = build_function_type_list (V4SF_type_node,
22744 V4DF_type_node,
22745 NULL_TREE);
22746 tree v8sf_ftype_v8sf_v8sf
22747 = build_function_type_list (V8SF_type_node,
22748 V8SF_type_node, V8SF_type_node,
22749 NULL_TREE);
22750 tree v4df_ftype_v4df_v4df
22751 = build_function_type_list (V4DF_type_node,
22752 V4DF_type_node, V4DF_type_node,
22753 NULL_TREE);
22754 tree v8sf_ftype_v8sf_int
22755 = build_function_type_list (V8SF_type_node,
22756 V8SF_type_node, integer_type_node,
22757 NULL_TREE);
22758 tree v4si_ftype_v8si_int
22759 = build_function_type_list (V4SI_type_node,
22760 V8SI_type_node, integer_type_node,
22761 NULL_TREE);
22762 tree v4df_ftype_v4df_int
22763 = build_function_type_list (V4DF_type_node,
22764 V4DF_type_node, integer_type_node,
22765 NULL_TREE);
22766 tree v4sf_ftype_v8sf_int
22767 = build_function_type_list (V4SF_type_node,
22768 V8SF_type_node, integer_type_node,
22769 NULL_TREE);
22770 tree v2df_ftype_v4df_int
22771 = build_function_type_list (V2DF_type_node,
22772 V4DF_type_node, integer_type_node,
22773 NULL_TREE);
22774 tree v8sf_ftype_v8sf_v8sf_int
22775 = build_function_type_list (V8SF_type_node,
22776 V8SF_type_node, V8SF_type_node,
22777 integer_type_node,
22778 NULL_TREE);
22779 tree v8sf_ftype_v8sf_v8sf_v8sf
22780 = build_function_type_list (V8SF_type_node,
22781 V8SF_type_node, V8SF_type_node,
22782 V8SF_type_node,
22783 NULL_TREE);
22784 tree v4df_ftype_v4df_v4df_v4df
22785 = build_function_type_list (V4DF_type_node,
22786 V4DF_type_node, V4DF_type_node,
22787 V4DF_type_node,
22788 NULL_TREE);
22789 tree v8si_ftype_v8si_v8si_int
22790 = build_function_type_list (V8SI_type_node,
22791 V8SI_type_node, V8SI_type_node,
22792 integer_type_node,
22793 NULL_TREE);
22794 tree v4df_ftype_v4df_v4df_int
22795 = build_function_type_list (V4DF_type_node,
22796 V4DF_type_node, V4DF_type_node,
22797 integer_type_node,
22798 NULL_TREE);
22799 tree v8sf_ftype_pcfloat
22800 = build_function_type_list (V8SF_type_node,
22801 pcfloat_type_node,
22802 NULL_TREE);
22803 tree v4df_ftype_pcdouble
22804 = build_function_type_list (V4DF_type_node,
22805 pcdouble_type_node,
22806 NULL_TREE);
22807 tree pcv4sf_type_node
22808 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22809 tree pcv2df_type_node
22810 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22811 tree v8sf_ftype_pcv4sf
22812 = build_function_type_list (V8SF_type_node,
22813 pcv4sf_type_node,
22814 NULL_TREE);
22815 tree v4df_ftype_pcv2df
22816 = build_function_type_list (V4DF_type_node,
22817 pcv2df_type_node,
22818 NULL_TREE);
22819 tree v32qi_ftype_pcchar
22820 = build_function_type_list (V32QI_type_node,
22821 pcchar_type_node,
22822 NULL_TREE);
22823 tree void_ftype_pchar_v32qi
22824 = build_function_type_list (void_type_node,
22825 pchar_type_node, V32QI_type_node,
22826 NULL_TREE);
22827 tree v8si_ftype_v8si_v4si_int
22828 = build_function_type_list (V8SI_type_node,
22829 V8SI_type_node, V4SI_type_node,
22830 integer_type_node,
22831 NULL_TREE);
22832 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22833 tree void_ftype_pv4di_v4di
22834 = build_function_type_list (void_type_node,
22835 pv4di_type_node, V4DI_type_node,
22836 NULL_TREE);
22837 tree v8sf_ftype_v8sf_v4sf_int
22838 = build_function_type_list (V8SF_type_node,
22839 V8SF_type_node, V4SF_type_node,
22840 integer_type_node,
22841 NULL_TREE);
22842 tree v4df_ftype_v4df_v2df_int
22843 = build_function_type_list (V4DF_type_node,
22844 V4DF_type_node, V2DF_type_node,
22845 integer_type_node,
22846 NULL_TREE);
22847 tree void_ftype_pfloat_v8sf
22848 = build_function_type_list (void_type_node,
22849 pfloat_type_node, V8SF_type_node,
22850 NULL_TREE);
22851 tree void_ftype_pdouble_v4df
22852 = build_function_type_list (void_type_node,
22853 pdouble_type_node, V4DF_type_node,
22854 NULL_TREE);
22855 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22856 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22857 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22858 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22859 tree pcv8sf_type_node
22860 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22861 tree pcv4df_type_node
22862 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22863 tree v8sf_ftype_pcv8sf_v8sf
22864 = build_function_type_list (V8SF_type_node,
22865 pcv8sf_type_node, V8SF_type_node,
22866 NULL_TREE);
22867 tree v4df_ftype_pcv4df_v4df
22868 = build_function_type_list (V4DF_type_node,
22869 pcv4df_type_node, V4DF_type_node,
22870 NULL_TREE);
22871 tree v4sf_ftype_pcv4sf_v4sf
22872 = build_function_type_list (V4SF_type_node,
22873 pcv4sf_type_node, V4SF_type_node,
22874 NULL_TREE);
22875 tree v2df_ftype_pcv2df_v2df
22876 = build_function_type_list (V2DF_type_node,
22877 pcv2df_type_node, V2DF_type_node,
22878 NULL_TREE);
22879 tree void_ftype_pv8sf_v8sf_v8sf
22880 = build_function_type_list (void_type_node,
22881 pv8sf_type_node, V8SF_type_node,
22882 V8SF_type_node,
22883 NULL_TREE);
22884 tree void_ftype_pv4df_v4df_v4df
22885 = build_function_type_list (void_type_node,
22886 pv4df_type_node, V4DF_type_node,
22887 V4DF_type_node,
22888 NULL_TREE);
22889 tree void_ftype_pv4sf_v4sf_v4sf
22890 = build_function_type_list (void_type_node,
22891 pv4sf_type_node, V4SF_type_node,
22892 V4SF_type_node,
22893 NULL_TREE);
22894 tree void_ftype_pv2df_v2df_v2df
22895 = build_function_type_list (void_type_node,
22896 pv2df_type_node, V2DF_type_node,
22897 V2DF_type_node,
22898 NULL_TREE);
22899 tree v4df_ftype_v2df
22900 = build_function_type_list (V4DF_type_node,
22901 V2DF_type_node,
22902 NULL_TREE);
22903 tree v8sf_ftype_v4sf
22904 = build_function_type_list (V8SF_type_node,
22905 V4SF_type_node,
22906 NULL_TREE);
22907 tree v8si_ftype_v4si
22908 = build_function_type_list (V8SI_type_node,
22909 V4SI_type_node,
22910 NULL_TREE);
22911 tree v2df_ftype_v4df
22912 = build_function_type_list (V2DF_type_node,
22913 V4DF_type_node,
22914 NULL_TREE);
22915 tree v4sf_ftype_v8sf
22916 = build_function_type_list (V4SF_type_node,
22917 V8SF_type_node,
22918 NULL_TREE);
22919 tree v4si_ftype_v8si
22920 = build_function_type_list (V4SI_type_node,
22921 V8SI_type_node,
22922 NULL_TREE);
22923 tree int_ftype_v4df
22924 = build_function_type_list (integer_type_node,
22925 V4DF_type_node,
22926 NULL_TREE);
22927 tree int_ftype_v8sf
22928 = build_function_type_list (integer_type_node,
22929 V8SF_type_node,
22930 NULL_TREE);
22931 tree int_ftype_v8sf_v8sf
22932 = build_function_type_list (integer_type_node,
22933 V8SF_type_node, V8SF_type_node,
22934 NULL_TREE);
22935 tree int_ftype_v4di_v4di
22936 = build_function_type_list (integer_type_node,
22937 V4DI_type_node, V4DI_type_node,
22938 NULL_TREE);
22939 tree int_ftype_v4df_v4df
22940 = build_function_type_list (integer_type_node,
22941 V4DF_type_node, V4DF_type_node,
22942 NULL_TREE);
22943 tree v8sf_ftype_v8sf_v8si
22944 = build_function_type_list (V8SF_type_node,
22945 V8SF_type_node, V8SI_type_node,
22946 NULL_TREE);
22947 tree v4df_ftype_v4df_v4di
22948 = build_function_type_list (V4DF_type_node,
22949 V4DF_type_node, V4DI_type_node,
22950 NULL_TREE);
22951 tree v4sf_ftype_v4sf_v4si
22952 = build_function_type_list (V4SF_type_node,
22953 V4SF_type_node, V4SI_type_node, NULL_TREE);
22954 tree v2df_ftype_v2df_v2di
22955 = build_function_type_list (V2DF_type_node,
22956 V2DF_type_node, V2DI_type_node, NULL_TREE);
22958 tree ftype;
22960 /* Add all special builtins with variable number of operands. */
22961 for (i = 0, d = bdesc_special_args;
22962 i < ARRAY_SIZE (bdesc_special_args);
22963 i++, d++)
22965 tree type;
22967 if (d->name == 0)
22968 continue;
22970 switch ((enum ix86_special_builtin_type) d->flag)
22972 case VOID_FTYPE_VOID:
22973 type = void_ftype_void;
22974 break;
22975 case V32QI_FTYPE_PCCHAR:
22976 type = v32qi_ftype_pcchar;
22977 break;
22978 case V16QI_FTYPE_PCCHAR:
22979 type = v16qi_ftype_pcchar;
22980 break;
22981 case V8SF_FTYPE_PCV4SF:
22982 type = v8sf_ftype_pcv4sf;
22983 break;
22984 case V8SF_FTYPE_PCFLOAT:
22985 type = v8sf_ftype_pcfloat;
22986 break;
22987 case V4DF_FTYPE_PCV2DF:
22988 type = v4df_ftype_pcv2df;
22989 break;
22990 case V4DF_FTYPE_PCDOUBLE:
22991 type = v4df_ftype_pcdouble;
22992 break;
22993 case V4SF_FTYPE_PCFLOAT:
22994 type = v4sf_ftype_pcfloat;
22995 break;
22996 case V2DI_FTYPE_PV2DI:
22997 type = v2di_ftype_pv2di;
22998 break;
22999 case V2DF_FTYPE_PCDOUBLE:
23000 type = v2df_ftype_pcdouble;
23001 break;
23002 case V8SF_FTYPE_PCV8SF_V8SF:
23003 type = v8sf_ftype_pcv8sf_v8sf;
23004 break;
23005 case V4DF_FTYPE_PCV4DF_V4DF:
23006 type = v4df_ftype_pcv4df_v4df;
23007 break;
23008 case V4SF_FTYPE_V4SF_PCV2SF:
23009 type = v4sf_ftype_v4sf_pcv2sf;
23010 break;
23011 case V4SF_FTYPE_PCV4SF_V4SF:
23012 type = v4sf_ftype_pcv4sf_v4sf;
23013 break;
23014 case V2DF_FTYPE_V2DF_PCDOUBLE:
23015 type = v2df_ftype_v2df_pcdouble;
23016 break;
23017 case V2DF_FTYPE_PCV2DF_V2DF:
23018 type = v2df_ftype_pcv2df_v2df;
23019 break;
23020 case VOID_FTYPE_PV2SF_V4SF:
23021 type = void_ftype_pv2sf_v4sf;
23022 break;
23023 case VOID_FTYPE_PV4DI_V4DI:
23024 type = void_ftype_pv4di_v4di;
23025 break;
23026 case VOID_FTYPE_PV2DI_V2DI:
23027 type = void_ftype_pv2di_v2di;
23028 break;
23029 case VOID_FTYPE_PCHAR_V32QI:
23030 type = void_ftype_pchar_v32qi;
23031 break;
23032 case VOID_FTYPE_PCHAR_V16QI:
23033 type = void_ftype_pchar_v16qi;
23034 break;
23035 case VOID_FTYPE_PFLOAT_V8SF:
23036 type = void_ftype_pfloat_v8sf;
23037 break;
23038 case VOID_FTYPE_PFLOAT_V4SF:
23039 type = void_ftype_pfloat_v4sf;
23040 break;
23041 case VOID_FTYPE_PDOUBLE_V4DF:
23042 type = void_ftype_pdouble_v4df;
23043 break;
23044 case VOID_FTYPE_PDOUBLE_V2DF:
23045 type = void_ftype_pdouble_v2df;
23046 break;
23047 case VOID_FTYPE_PDI_DI:
23048 type = void_ftype_pdi_di;
23049 break;
23050 case VOID_FTYPE_PINT_INT:
23051 type = void_ftype_pint_int;
23052 break;
23053 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23054 type = void_ftype_pv8sf_v8sf_v8sf;
23055 break;
23056 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23057 type = void_ftype_pv4df_v4df_v4df;
23058 break;
23059 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23060 type = void_ftype_pv4sf_v4sf_v4sf;
23061 break;
23062 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23063 type = void_ftype_pv2df_v2df_v2df;
23064 break;
23065 default:
23066 gcc_unreachable ();
23069 def_builtin (d->mask, d->name, type, d->code);
23072 /* Add all builtins with variable number of operands. */
23073 for (i = 0, d = bdesc_args;
23074 i < ARRAY_SIZE (bdesc_args);
23075 i++, d++)
23077 tree type;
23079 if (d->name == 0)
23080 continue;
23082 switch ((enum ix86_builtin_type) d->flag)
23084 case FLOAT_FTYPE_FLOAT:
23085 type = float_ftype_float;
23086 break;
23087 case INT_FTYPE_V8SF_V8SF_PTEST:
23088 type = int_ftype_v8sf_v8sf;
23089 break;
23090 case INT_FTYPE_V4DI_V4DI_PTEST:
23091 type = int_ftype_v4di_v4di;
23092 break;
23093 case INT_FTYPE_V4DF_V4DF_PTEST:
23094 type = int_ftype_v4df_v4df;
23095 break;
23096 case INT_FTYPE_V4SF_V4SF_PTEST:
23097 type = int_ftype_v4sf_v4sf;
23098 break;
23099 case INT_FTYPE_V2DI_V2DI_PTEST:
23100 type = int_ftype_v2di_v2di;
23101 break;
23102 case INT_FTYPE_V2DF_V2DF_PTEST:
23103 type = int_ftype_v2df_v2df;
23104 break;
23105 case INT64_FTYPE_V4SF:
23106 type = int64_ftype_v4sf;
23107 break;
23108 case INT64_FTYPE_V2DF:
23109 type = int64_ftype_v2df;
23110 break;
23111 case INT_FTYPE_V16QI:
23112 type = int_ftype_v16qi;
23113 break;
23114 case INT_FTYPE_V8QI:
23115 type = int_ftype_v8qi;
23116 break;
23117 case INT_FTYPE_V8SF:
23118 type = int_ftype_v8sf;
23119 break;
23120 case INT_FTYPE_V4DF:
23121 type = int_ftype_v4df;
23122 break;
23123 case INT_FTYPE_V4SF:
23124 type = int_ftype_v4sf;
23125 break;
23126 case INT_FTYPE_V2DF:
23127 type = int_ftype_v2df;
23128 break;
23129 case V16QI_FTYPE_V16QI:
23130 type = v16qi_ftype_v16qi;
23131 break;
23132 case V8SI_FTYPE_V8SF:
23133 type = v8si_ftype_v8sf;
23134 break;
23135 case V8SI_FTYPE_V4SI:
23136 type = v8si_ftype_v4si;
23137 break;
23138 case V8HI_FTYPE_V8HI:
23139 type = v8hi_ftype_v8hi;
23140 break;
23141 case V8HI_FTYPE_V16QI:
23142 type = v8hi_ftype_v16qi;
23143 break;
23144 case V8QI_FTYPE_V8QI:
23145 type = v8qi_ftype_v8qi;
23146 break;
23147 case V8SF_FTYPE_V8SF:
23148 type = v8sf_ftype_v8sf;
23149 break;
23150 case V8SF_FTYPE_V8SI:
23151 type = v8sf_ftype_v8si;
23152 break;
23153 case V8SF_FTYPE_V4SF:
23154 type = v8sf_ftype_v4sf;
23155 break;
23156 case V4SI_FTYPE_V4DF:
23157 type = v4si_ftype_v4df;
23158 break;
23159 case V4SI_FTYPE_V4SI:
23160 type = v4si_ftype_v4si;
23161 break;
23162 case V4SI_FTYPE_V16QI:
23163 type = v4si_ftype_v16qi;
23164 break;
23165 case V4SI_FTYPE_V8SI:
23166 type = v4si_ftype_v8si;
23167 break;
23168 case V4SI_FTYPE_V8HI:
23169 type = v4si_ftype_v8hi;
23170 break;
23171 case V4SI_FTYPE_V4SF:
23172 type = v4si_ftype_v4sf;
23173 break;
23174 case V4SI_FTYPE_V2DF:
23175 type = v4si_ftype_v2df;
23176 break;
23177 case V4HI_FTYPE_V4HI:
23178 type = v4hi_ftype_v4hi;
23179 break;
23180 case V4DF_FTYPE_V4DF:
23181 type = v4df_ftype_v4df;
23182 break;
23183 case V4DF_FTYPE_V4SI:
23184 type = v4df_ftype_v4si;
23185 break;
23186 case V4DF_FTYPE_V4SF:
23187 type = v4df_ftype_v4sf;
23188 break;
23189 case V4DF_FTYPE_V2DF:
23190 type = v4df_ftype_v2df;
23191 break;
23192 case V4SF_FTYPE_V4SF:
23193 case V4SF_FTYPE_V4SF_VEC_MERGE:
23194 type = v4sf_ftype_v4sf;
23195 break;
23196 case V4SF_FTYPE_V8SF:
23197 type = v4sf_ftype_v8sf;
23198 break;
23199 case V4SF_FTYPE_V4SI:
23200 type = v4sf_ftype_v4si;
23201 break;
23202 case V4SF_FTYPE_V4DF:
23203 type = v4sf_ftype_v4df;
23204 break;
23205 case V4SF_FTYPE_V2DF:
23206 type = v4sf_ftype_v2df;
23207 break;
23208 case V2DI_FTYPE_V2DI:
23209 type = v2di_ftype_v2di;
23210 break;
23211 case V2DI_FTYPE_V16QI:
23212 type = v2di_ftype_v16qi;
23213 break;
23214 case V2DI_FTYPE_V8HI:
23215 type = v2di_ftype_v8hi;
23216 break;
23217 case V2DI_FTYPE_V4SI:
23218 type = v2di_ftype_v4si;
23219 break;
23220 case V2SI_FTYPE_V2SI:
23221 type = v2si_ftype_v2si;
23222 break;
23223 case V2SI_FTYPE_V4SF:
23224 type = v2si_ftype_v4sf;
23225 break;
23226 case V2SI_FTYPE_V2DF:
23227 type = v2si_ftype_v2df;
23228 break;
23229 case V2SI_FTYPE_V2SF:
23230 type = v2si_ftype_v2sf;
23231 break;
23232 case V2DF_FTYPE_V4DF:
23233 type = v2df_ftype_v4df;
23234 break;
23235 case V2DF_FTYPE_V4SF:
23236 type = v2df_ftype_v4sf;
23237 break;
23238 case V2DF_FTYPE_V2DF:
23239 case V2DF_FTYPE_V2DF_VEC_MERGE:
23240 type = v2df_ftype_v2df;
23241 break;
23242 case V2DF_FTYPE_V2SI:
23243 type = v2df_ftype_v2si;
23244 break;
23245 case V2DF_FTYPE_V4SI:
23246 type = v2df_ftype_v4si;
23247 break;
23248 case V2SF_FTYPE_V2SF:
23249 type = v2sf_ftype_v2sf;
23250 break;
23251 case V2SF_FTYPE_V2SI:
23252 type = v2sf_ftype_v2si;
23253 break;
23254 case V16QI_FTYPE_V16QI_V16QI:
23255 type = v16qi_ftype_v16qi_v16qi;
23256 break;
23257 case V16QI_FTYPE_V8HI_V8HI:
23258 type = v16qi_ftype_v8hi_v8hi;
23259 break;
23260 case V8QI_FTYPE_V8QI_V8QI:
23261 type = v8qi_ftype_v8qi_v8qi;
23262 break;
23263 case V8QI_FTYPE_V4HI_V4HI:
23264 type = v8qi_ftype_v4hi_v4hi;
23265 break;
23266 case V8HI_FTYPE_V8HI_V8HI:
23267 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23268 type = v8hi_ftype_v8hi_v8hi;
23269 break;
23270 case V8HI_FTYPE_V16QI_V16QI:
23271 type = v8hi_ftype_v16qi_v16qi;
23272 break;
23273 case V8HI_FTYPE_V4SI_V4SI:
23274 type = v8hi_ftype_v4si_v4si;
23275 break;
23276 case V8HI_FTYPE_V8HI_SI_COUNT:
23277 type = v8hi_ftype_v8hi_int;
23278 break;
23279 case V8SF_FTYPE_V8SF_V8SF:
23280 type = v8sf_ftype_v8sf_v8sf;
23281 break;
23282 case V8SF_FTYPE_V8SF_V8SI:
23283 type = v8sf_ftype_v8sf_v8si;
23284 break;
23285 case V4SI_FTYPE_V4SI_V4SI:
23286 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23287 type = v4si_ftype_v4si_v4si;
23288 break;
23289 case V4SI_FTYPE_V8HI_V8HI:
23290 type = v4si_ftype_v8hi_v8hi;
23291 break;
23292 case V4SI_FTYPE_V4SF_V4SF:
23293 type = v4si_ftype_v4sf_v4sf;
23294 break;
23295 case V4SI_FTYPE_V2DF_V2DF:
23296 type = v4si_ftype_v2df_v2df;
23297 break;
23298 case V4SI_FTYPE_V4SI_SI_COUNT:
23299 type = v4si_ftype_v4si_int;
23300 break;
23301 case V4HI_FTYPE_V4HI_V4HI:
23302 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23303 type = v4hi_ftype_v4hi_v4hi;
23304 break;
23305 case V4HI_FTYPE_V8QI_V8QI:
23306 type = v4hi_ftype_v8qi_v8qi;
23307 break;
23308 case V4HI_FTYPE_V2SI_V2SI:
23309 type = v4hi_ftype_v2si_v2si;
23310 break;
23311 case V4HI_FTYPE_V4HI_SI_COUNT:
23312 type = v4hi_ftype_v4hi_int;
23313 break;
23314 case V4DF_FTYPE_V4DF_V4DF:
23315 type = v4df_ftype_v4df_v4df;
23316 break;
23317 case V4DF_FTYPE_V4DF_V4DI:
23318 type = v4df_ftype_v4df_v4di;
23319 break;
23320 case V4SF_FTYPE_V4SF_V4SF:
23321 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23322 type = v4sf_ftype_v4sf_v4sf;
23323 break;
23324 case V4SF_FTYPE_V4SF_V4SI:
23325 type = v4sf_ftype_v4sf_v4si;
23326 break;
23327 case V4SF_FTYPE_V4SF_V2SI:
23328 type = v4sf_ftype_v4sf_v2si;
23329 break;
23330 case V4SF_FTYPE_V4SF_V2DF:
23331 type = v4sf_ftype_v4sf_v2df;
23332 break;
23333 case V4SF_FTYPE_V4SF_DI:
23334 type = v4sf_ftype_v4sf_int64;
23335 break;
23336 case V4SF_FTYPE_V4SF_SI:
23337 type = v4sf_ftype_v4sf_int;
23338 break;
23339 case V2DI_FTYPE_V2DI_V2DI:
23340 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23341 type = v2di_ftype_v2di_v2di;
23342 break;
23343 case V2DI_FTYPE_V16QI_V16QI:
23344 type = v2di_ftype_v16qi_v16qi;
23345 break;
23346 case V2DI_FTYPE_V4SI_V4SI:
23347 type = v2di_ftype_v4si_v4si;
23348 break;
23349 case V2DI_FTYPE_V2DI_V16QI:
23350 type = v2di_ftype_v2di_v16qi;
23351 break;
23352 case V2DI_FTYPE_V2DF_V2DF:
23353 type = v2di_ftype_v2df_v2df;
23354 break;
23355 case V2DI_FTYPE_V2DI_SI_COUNT:
23356 type = v2di_ftype_v2di_int;
23357 break;
23358 case V2SI_FTYPE_V2SI_V2SI:
23359 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23360 type = v2si_ftype_v2si_v2si;
23361 break;
23362 case V2SI_FTYPE_V4HI_V4HI:
23363 type = v2si_ftype_v4hi_v4hi;
23364 break;
23365 case V2SI_FTYPE_V2SF_V2SF:
23366 type = v2si_ftype_v2sf_v2sf;
23367 break;
23368 case V2SI_FTYPE_V2SI_SI_COUNT:
23369 type = v2si_ftype_v2si_int;
23370 break;
23371 case V2DF_FTYPE_V2DF_V2DF:
23372 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23373 type = v2df_ftype_v2df_v2df;
23374 break;
23375 case V2DF_FTYPE_V2DF_V4SF:
23376 type = v2df_ftype_v2df_v4sf;
23377 break;
23378 case V2DF_FTYPE_V2DF_V2DI:
23379 type = v2df_ftype_v2df_v2di;
23380 break;
23381 case V2DF_FTYPE_V2DF_DI:
23382 type = v2df_ftype_v2df_int64;
23383 break;
23384 case V2DF_FTYPE_V2DF_SI:
23385 type = v2df_ftype_v2df_int;
23386 break;
23387 case V2SF_FTYPE_V2SF_V2SF:
23388 type = v2sf_ftype_v2sf_v2sf;
23389 break;
23390 case V1DI_FTYPE_V1DI_V1DI:
23391 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23392 type = v1di_ftype_v1di_v1di;
23393 break;
23394 case V1DI_FTYPE_V8QI_V8QI:
23395 type = v1di_ftype_v8qi_v8qi;
23396 break;
23397 case V1DI_FTYPE_V2SI_V2SI:
23398 type = v1di_ftype_v2si_v2si;
23399 break;
23400 case V1DI_FTYPE_V1DI_SI_COUNT:
23401 type = v1di_ftype_v1di_int;
23402 break;
23403 case UINT64_FTYPE_UINT64_UINT64:
23404 type = uint64_ftype_uint64_uint64;
23405 break;
23406 case UINT_FTYPE_UINT_UINT:
23407 type = unsigned_ftype_unsigned_unsigned;
23408 break;
23409 case UINT_FTYPE_UINT_USHORT:
23410 type = unsigned_ftype_unsigned_ushort;
23411 break;
23412 case UINT_FTYPE_UINT_UCHAR:
23413 type = unsigned_ftype_unsigned_uchar;
23414 break;
23415 case V8HI_FTYPE_V8HI_INT:
23416 type = v8hi_ftype_v8hi_int;
23417 break;
23418 case V8SF_FTYPE_V8SF_INT:
23419 type = v8sf_ftype_v8sf_int;
23420 break;
23421 case V4SI_FTYPE_V4SI_INT:
23422 type = v4si_ftype_v4si_int;
23423 break;
23424 case V4SI_FTYPE_V8SI_INT:
23425 type = v4si_ftype_v8si_int;
23426 break;
23427 case V4HI_FTYPE_V4HI_INT:
23428 type = v4hi_ftype_v4hi_int;
23429 break;
23430 case V4DF_FTYPE_V4DF_INT:
23431 type = v4df_ftype_v4df_int;
23432 break;
23433 case V4SF_FTYPE_V4SF_INT:
23434 type = v4sf_ftype_v4sf_int;
23435 break;
23436 case V4SF_FTYPE_V8SF_INT:
23437 type = v4sf_ftype_v8sf_int;
23438 break;
23439 case V2DI_FTYPE_V2DI_INT:
23440 case V2DI2TI_FTYPE_V2DI_INT:
23441 type = v2di_ftype_v2di_int;
23442 break;
23443 case V2DF_FTYPE_V2DF_INT:
23444 type = v2df_ftype_v2df_int;
23445 break;
23446 case V2DF_FTYPE_V4DF_INT:
23447 type = v2df_ftype_v4df_int;
23448 break;
23449 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23450 type = v16qi_ftype_v16qi_v16qi_v16qi;
23451 break;
23452 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23453 type = v8sf_ftype_v8sf_v8sf_v8sf;
23454 break;
23455 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23456 type = v4df_ftype_v4df_v4df_v4df;
23457 break;
23458 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23459 type = v4sf_ftype_v4sf_v4sf_v4sf;
23460 break;
23461 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23462 type = v2df_ftype_v2df_v2df_v2df;
23463 break;
23464 case V16QI_FTYPE_V16QI_V16QI_INT:
23465 type = v16qi_ftype_v16qi_v16qi_int;
23466 break;
23467 case V8SI_FTYPE_V8SI_V8SI_INT:
23468 type = v8si_ftype_v8si_v8si_int;
23469 break;
23470 case V8SI_FTYPE_V8SI_V4SI_INT:
23471 type = v8si_ftype_v8si_v4si_int;
23472 break;
23473 case V8HI_FTYPE_V8HI_V8HI_INT:
23474 type = v8hi_ftype_v8hi_v8hi_int;
23475 break;
23476 case V8SF_FTYPE_V8SF_V8SF_INT:
23477 type = v8sf_ftype_v8sf_v8sf_int;
23478 break;
23479 case V8SF_FTYPE_V8SF_V4SF_INT:
23480 type = v8sf_ftype_v8sf_v4sf_int;
23481 break;
23482 case V4SI_FTYPE_V4SI_V4SI_INT:
23483 type = v4si_ftype_v4si_v4si_int;
23484 break;
23485 case V4DF_FTYPE_V4DF_V4DF_INT:
23486 type = v4df_ftype_v4df_v4df_int;
23487 break;
23488 case V4DF_FTYPE_V4DF_V2DF_INT:
23489 type = v4df_ftype_v4df_v2df_int;
23490 break;
23491 case V4SF_FTYPE_V4SF_V4SF_INT:
23492 type = v4sf_ftype_v4sf_v4sf_int;
23493 break;
23494 case V2DI_FTYPE_V2DI_V2DI_INT:
23495 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23496 type = v2di_ftype_v2di_v2di_int;
23497 break;
23498 case V2DF_FTYPE_V2DF_V2DF_INT:
23499 type = v2df_ftype_v2df_v2df_int;
23500 break;
23501 case V2DI_FTYPE_V2DI_UINT_UINT:
23502 type = v2di_ftype_v2di_unsigned_unsigned;
23503 break;
23504 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23505 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23506 break;
23507 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23508 type = v1di_ftype_v1di_v1di_int;
23509 break;
23510 default:
23511 gcc_unreachable ();
23514 def_builtin_const (d->mask, d->name, type, d->code);
23517 /* pcmpestr[im] insns. */
23518 for (i = 0, d = bdesc_pcmpestr;
23519 i < ARRAY_SIZE (bdesc_pcmpestr);
23520 i++, d++)
23522 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23523 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23524 else
23525 ftype = int_ftype_v16qi_int_v16qi_int_int;
23526 def_builtin_const (d->mask, d->name, ftype, d->code);
23529 /* pcmpistr[im] insns. */
23530 for (i = 0, d = bdesc_pcmpistr;
23531 i < ARRAY_SIZE (bdesc_pcmpistr);
23532 i++, d++)
23534 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23535 ftype = v16qi_ftype_v16qi_v16qi_int;
23536 else
23537 ftype = int_ftype_v16qi_v16qi_int;
23538 def_builtin_const (d->mask, d->name, ftype, d->code);
23541 /* comi/ucomi insns. */
23542 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23543 if (d->mask == OPTION_MASK_ISA_SSE2)
23544 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23545 else
23546 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23548 /* SSE */
23549 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23550 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23552 /* SSE or 3DNow!A */
23553 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23555 /* SSE2 */
23556 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23558 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23559 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23561 /* SSE3. */
23562 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23563 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23565 /* AES */
23566 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23567 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23568 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23569 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23570 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23571 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23573 /* PCLMUL */
23574 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23576 /* AVX */
23577 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23578 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23580 /* Access to the vec_init patterns. */
23581 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23582 integer_type_node, NULL_TREE);
23583 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23585 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23586 short_integer_type_node,
23587 short_integer_type_node,
23588 short_integer_type_node, NULL_TREE);
23589 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23591 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23592 char_type_node, char_type_node,
23593 char_type_node, char_type_node,
23594 char_type_node, char_type_node,
23595 char_type_node, NULL_TREE);
23596 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23598 /* Access to the vec_extract patterns. */
23599 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23600 integer_type_node, NULL_TREE);
23601 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23603 ftype = build_function_type_list (long_long_integer_type_node,
23604 V2DI_type_node, integer_type_node,
23605 NULL_TREE);
23606 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23608 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23609 integer_type_node, NULL_TREE);
23610 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23612 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23613 integer_type_node, NULL_TREE);
23614 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23616 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23617 integer_type_node, NULL_TREE);
23618 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23620 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23621 integer_type_node, NULL_TREE);
23622 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23624 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23625 integer_type_node, NULL_TREE);
23626 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23628 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23629 integer_type_node, NULL_TREE);
23630 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23632 /* Access to the vec_set patterns. */
23633 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23634 intDI_type_node,
23635 integer_type_node, NULL_TREE);
23636 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23638 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23639 float_type_node,
23640 integer_type_node, NULL_TREE);
23641 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23643 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23644 intSI_type_node,
23645 integer_type_node, NULL_TREE);
23646 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23648 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23649 intHI_type_node,
23650 integer_type_node, NULL_TREE);
23651 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23653 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23654 intHI_type_node,
23655 integer_type_node, NULL_TREE);
23656 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23658 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23659 intQI_type_node,
23660 integer_type_node, NULL_TREE);
23661 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23663 /* Add SSE5 multi-arg argument instructions */
23664 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23666 tree mtype = NULL_TREE;
23668 if (d->name == 0)
23669 continue;
23671 switch ((enum multi_arg_type)d->flag)
23673 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23674 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23675 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23676 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23677 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23678 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23679 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23680 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23681 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23682 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23683 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23684 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23685 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23686 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23687 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23688 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23689 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23690 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23691 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23692 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23693 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23694 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23695 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23696 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23697 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23698 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23699 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23700 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23701 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23702 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23703 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23704 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23705 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23706 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23707 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23708 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23709 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23710 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23711 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23712 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23713 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23714 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23715 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23716 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23717 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23718 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23719 case MULTI_ARG_UNKNOWN:
23720 default:
23721 gcc_unreachable ();
23724 if (mtype)
23725 def_builtin_const (d->mask, d->name, mtype, d->code);
23729 /* Internal method for ix86_init_builtins. */
23731 static void
23732 ix86_init_builtins_va_builtins_abi (void)
23734 tree ms_va_ref, sysv_va_ref;
23735 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23736 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23737 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23738 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23740 if (!TARGET_64BIT)
23741 return;
23742 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23743 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23744 ms_va_ref = build_reference_type (ms_va_list_type_node);
23745 sysv_va_ref =
23746 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23748 fnvoid_va_end_ms =
23749 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23750 fnvoid_va_start_ms =
23751 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23752 fnvoid_va_end_sysv =
23753 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23754 fnvoid_va_start_sysv =
23755 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23756 NULL_TREE);
23757 fnvoid_va_copy_ms =
23758 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23759 NULL_TREE);
23760 fnvoid_va_copy_sysv =
23761 build_function_type_list (void_type_node, sysv_va_ref,
23762 sysv_va_ref, NULL_TREE);
23764 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23765 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23766 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23767 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23768 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23769 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23770 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23771 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23772 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23773 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23774 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23775 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23778 static void
23779 ix86_init_builtins (void)
23781 tree float128_type_node = make_node (REAL_TYPE);
23782 tree ftype, decl;
23784 /* The __float80 type. */
23785 if (TYPE_MODE (long_double_type_node) == XFmode)
23786 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23787 "__float80");
23788 else
23790 /* The __float80 type. */
23791 tree float80_type_node = make_node (REAL_TYPE);
23793 TYPE_PRECISION (float80_type_node) = 80;
23794 layout_type (float80_type_node);
23795 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23796 "__float80");
23799 /* The __float128 type. */
23800 TYPE_PRECISION (float128_type_node) = 128;
23801 layout_type (float128_type_node);
23802 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23803 "__float128");
23805 /* TFmode support builtins. */
23806 ftype = build_function_type (float128_type_node, void_list_node);
23807 decl = add_builtin_function ("__builtin_infq", ftype,
23808 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23809 NULL, NULL_TREE);
23810 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23812 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23813 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23814 NULL, NULL_TREE);
23815 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23817 /* We will expand them to normal call if SSE2 isn't available since
23818 they are used by libgcc. */
23819 ftype = build_function_type_list (float128_type_node,
23820 float128_type_node,
23821 NULL_TREE);
23822 decl = add_builtin_function ("__builtin_fabsq", ftype,
23823 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23824 "__fabstf2", NULL_TREE);
23825 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23826 TREE_READONLY (decl) = 1;
23828 ftype = build_function_type_list (float128_type_node,
23829 float128_type_node,
23830 float128_type_node,
23831 NULL_TREE);
23832 decl = add_builtin_function ("__builtin_copysignq", ftype,
23833 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23834 "__copysigntf3", NULL_TREE);
23835 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23836 TREE_READONLY (decl) = 1;
23838 ix86_init_mmx_sse_builtins ();
23839 if (TARGET_64BIT)
23840 ix86_init_builtins_va_builtins_abi ();
23843 /* Errors in the source file can cause expand_expr to return const0_rtx
23844 where we expect a vector. To avoid crashing, use one of the vector
23845 clear instructions. */
23846 static rtx
23847 safe_vector_operand (rtx x, enum machine_mode mode)
23849 if (x == const0_rtx)
23850 x = CONST0_RTX (mode);
23851 return x;
23854 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23856 static rtx
23857 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23859 rtx pat;
23860 tree arg0 = CALL_EXPR_ARG (exp, 0);
23861 tree arg1 = CALL_EXPR_ARG (exp, 1);
23862 rtx op0 = expand_normal (arg0);
23863 rtx op1 = expand_normal (arg1);
23864 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23865 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23866 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23868 if (VECTOR_MODE_P (mode0))
23869 op0 = safe_vector_operand (op0, mode0);
23870 if (VECTOR_MODE_P (mode1))
23871 op1 = safe_vector_operand (op1, mode1);
23873 if (optimize || !target
23874 || GET_MODE (target) != tmode
23875 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23876 target = gen_reg_rtx (tmode);
23878 if (GET_MODE (op1) == SImode && mode1 == TImode)
23880 rtx x = gen_reg_rtx (V4SImode);
23881 emit_insn (gen_sse2_loadd (x, op1));
23882 op1 = gen_lowpart (TImode, x);
23885 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23886 op0 = copy_to_mode_reg (mode0, op0);
23887 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23888 op1 = copy_to_mode_reg (mode1, op1);
23890 pat = GEN_FCN (icode) (target, op0, op1);
23891 if (! pat)
23892 return 0;
23894 emit_insn (pat);
23896 return target;
23899 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23901 static rtx
23902 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23903 enum multi_arg_type m_type,
23904 enum rtx_code sub_code)
23906 rtx pat;
23907 int i;
23908 int nargs;
23909 bool comparison_p = false;
23910 bool tf_p = false;
23911 bool last_arg_constant = false;
23912 int num_memory = 0;
23913 struct {
23914 rtx op;
23915 enum machine_mode mode;
23916 } args[4];
23918 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23920 switch (m_type)
23922 case MULTI_ARG_3_SF:
23923 case MULTI_ARG_3_DF:
23924 case MULTI_ARG_3_DI:
23925 case MULTI_ARG_3_SI:
23926 case MULTI_ARG_3_SI_DI:
23927 case MULTI_ARG_3_HI:
23928 case MULTI_ARG_3_HI_SI:
23929 case MULTI_ARG_3_QI:
23930 case MULTI_ARG_3_PERMPS:
23931 case MULTI_ARG_3_PERMPD:
23932 nargs = 3;
23933 break;
23935 case MULTI_ARG_2_SF:
23936 case MULTI_ARG_2_DF:
23937 case MULTI_ARG_2_DI:
23938 case MULTI_ARG_2_SI:
23939 case MULTI_ARG_2_HI:
23940 case MULTI_ARG_2_QI:
23941 nargs = 2;
23942 break;
23944 case MULTI_ARG_2_DI_IMM:
23945 case MULTI_ARG_2_SI_IMM:
23946 case MULTI_ARG_2_HI_IMM:
23947 case MULTI_ARG_2_QI_IMM:
23948 nargs = 2;
23949 last_arg_constant = true;
23950 break;
23952 case MULTI_ARG_1_SF:
23953 case MULTI_ARG_1_DF:
23954 case MULTI_ARG_1_DI:
23955 case MULTI_ARG_1_SI:
23956 case MULTI_ARG_1_HI:
23957 case MULTI_ARG_1_QI:
23958 case MULTI_ARG_1_SI_DI:
23959 case MULTI_ARG_1_HI_DI:
23960 case MULTI_ARG_1_HI_SI:
23961 case MULTI_ARG_1_QI_DI:
23962 case MULTI_ARG_1_QI_SI:
23963 case MULTI_ARG_1_QI_HI:
23964 case MULTI_ARG_1_PH2PS:
23965 case MULTI_ARG_1_PS2PH:
23966 nargs = 1;
23967 break;
23969 case MULTI_ARG_2_SF_CMP:
23970 case MULTI_ARG_2_DF_CMP:
23971 case MULTI_ARG_2_DI_CMP:
23972 case MULTI_ARG_2_SI_CMP:
23973 case MULTI_ARG_2_HI_CMP:
23974 case MULTI_ARG_2_QI_CMP:
23975 nargs = 2;
23976 comparison_p = true;
23977 break;
23979 case MULTI_ARG_2_SF_TF:
23980 case MULTI_ARG_2_DF_TF:
23981 case MULTI_ARG_2_DI_TF:
23982 case MULTI_ARG_2_SI_TF:
23983 case MULTI_ARG_2_HI_TF:
23984 case MULTI_ARG_2_QI_TF:
23985 nargs = 2;
23986 tf_p = true;
23987 break;
23989 case MULTI_ARG_UNKNOWN:
23990 default:
23991 gcc_unreachable ();
23994 if (optimize || !target
23995 || GET_MODE (target) != tmode
23996 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23997 target = gen_reg_rtx (tmode);
23999 gcc_assert (nargs <= 4);
24001 for (i = 0; i < nargs; i++)
24003 tree arg = CALL_EXPR_ARG (exp, i);
24004 rtx op = expand_normal (arg);
24005 int adjust = (comparison_p) ? 1 : 0;
24006 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24008 if (last_arg_constant && i == nargs-1)
24010 if (GET_CODE (op) != CONST_INT)
24012 error ("last argument must be an immediate");
24013 return gen_reg_rtx (tmode);
24016 else
24018 if (VECTOR_MODE_P (mode))
24019 op = safe_vector_operand (op, mode);
24021 /* If we aren't optimizing, only allow one memory operand to be
24022 generated. */
24023 if (memory_operand (op, mode))
24024 num_memory++;
24026 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24028 if (optimize
24029 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24030 || num_memory > 1)
24031 op = force_reg (mode, op);
24034 args[i].op = op;
24035 args[i].mode = mode;
24038 switch (nargs)
24040 case 1:
24041 pat = GEN_FCN (icode) (target, args[0].op);
24042 break;
24044 case 2:
24045 if (tf_p)
24046 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24047 GEN_INT ((int)sub_code));
24048 else if (! comparison_p)
24049 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24050 else
24052 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24053 args[0].op,
24054 args[1].op);
24056 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24058 break;
24060 case 3:
24061 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24062 break;
24064 default:
24065 gcc_unreachable ();
24068 if (! pat)
24069 return 0;
24071 emit_insn (pat);
24072 return target;
24075 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24076 insns with vec_merge. */
24078 static rtx
24079 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24080 rtx target)
24082 rtx pat;
24083 tree arg0 = CALL_EXPR_ARG (exp, 0);
24084 rtx op1, op0 = expand_normal (arg0);
24085 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24086 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24088 if (optimize || !target
24089 || GET_MODE (target) != tmode
24090 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24091 target = gen_reg_rtx (tmode);
24093 if (VECTOR_MODE_P (mode0))
24094 op0 = safe_vector_operand (op0, mode0);
24096 if ((optimize && !register_operand (op0, mode0))
24097 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24098 op0 = copy_to_mode_reg (mode0, op0);
24100 op1 = op0;
24101 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24102 op1 = copy_to_mode_reg (mode0, op1);
24104 pat = GEN_FCN (icode) (target, op0, op1);
24105 if (! pat)
24106 return 0;
24107 emit_insn (pat);
24108 return target;
24111 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24113 static rtx
24114 ix86_expand_sse_compare (const struct builtin_description *d,
24115 tree exp, rtx target, bool swap)
24117 rtx pat;
24118 tree arg0 = CALL_EXPR_ARG (exp, 0);
24119 tree arg1 = CALL_EXPR_ARG (exp, 1);
24120 rtx op0 = expand_normal (arg0);
24121 rtx op1 = expand_normal (arg1);
24122 rtx op2;
24123 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24124 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24125 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24126 enum rtx_code comparison = d->comparison;
24128 if (VECTOR_MODE_P (mode0))
24129 op0 = safe_vector_operand (op0, mode0);
24130 if (VECTOR_MODE_P (mode1))
24131 op1 = safe_vector_operand (op1, mode1);
24133 /* Swap operands if we have a comparison that isn't available in
24134 hardware. */
24135 if (swap)
24137 rtx tmp = gen_reg_rtx (mode1);
24138 emit_move_insn (tmp, op1);
24139 op1 = op0;
24140 op0 = tmp;
24143 if (optimize || !target
24144 || GET_MODE (target) != tmode
24145 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24146 target = gen_reg_rtx (tmode);
24148 if ((optimize && !register_operand (op0, mode0))
24149 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24150 op0 = copy_to_mode_reg (mode0, op0);
24151 if ((optimize && !register_operand (op1, mode1))
24152 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24153 op1 = copy_to_mode_reg (mode1, op1);
24155 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24156 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24157 if (! pat)
24158 return 0;
24159 emit_insn (pat);
24160 return target;
24163 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24165 static rtx
24166 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24167 rtx target)
24169 rtx pat;
24170 tree arg0 = CALL_EXPR_ARG (exp, 0);
24171 tree arg1 = CALL_EXPR_ARG (exp, 1);
24172 rtx op0 = expand_normal (arg0);
24173 rtx op1 = expand_normal (arg1);
24174 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24175 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24176 enum rtx_code comparison = d->comparison;
24178 if (VECTOR_MODE_P (mode0))
24179 op0 = safe_vector_operand (op0, mode0);
24180 if (VECTOR_MODE_P (mode1))
24181 op1 = safe_vector_operand (op1, mode1);
24183 /* Swap operands if we have a comparison that isn't available in
24184 hardware. */
24185 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24187 rtx tmp = op1;
24188 op1 = op0;
24189 op0 = tmp;
24192 target = gen_reg_rtx (SImode);
24193 emit_move_insn (target, const0_rtx);
24194 target = gen_rtx_SUBREG (QImode, target, 0);
24196 if ((optimize && !register_operand (op0, mode0))
24197 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24198 op0 = copy_to_mode_reg (mode0, op0);
24199 if ((optimize && !register_operand (op1, mode1))
24200 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24201 op1 = copy_to_mode_reg (mode1, op1);
24203 pat = GEN_FCN (d->icode) (op0, op1);
24204 if (! pat)
24205 return 0;
24206 emit_insn (pat);
24207 emit_insn (gen_rtx_SET (VOIDmode,
24208 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24209 gen_rtx_fmt_ee (comparison, QImode,
24210 SET_DEST (pat),
24211 const0_rtx)));
24213 return SUBREG_REG (target);
24216 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24218 static rtx
24219 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24220 rtx target)
24222 rtx pat;
24223 tree arg0 = CALL_EXPR_ARG (exp, 0);
24224 tree arg1 = CALL_EXPR_ARG (exp, 1);
24225 rtx op0 = expand_normal (arg0);
24226 rtx op1 = expand_normal (arg1);
24227 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24228 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24229 enum rtx_code comparison = d->comparison;
24231 if (VECTOR_MODE_P (mode0))
24232 op0 = safe_vector_operand (op0, mode0);
24233 if (VECTOR_MODE_P (mode1))
24234 op1 = safe_vector_operand (op1, mode1);
24236 target = gen_reg_rtx (SImode);
24237 emit_move_insn (target, const0_rtx);
24238 target = gen_rtx_SUBREG (QImode, target, 0);
24240 if ((optimize && !register_operand (op0, mode0))
24241 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24242 op0 = copy_to_mode_reg (mode0, op0);
24243 if ((optimize && !register_operand (op1, mode1))
24244 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24245 op1 = copy_to_mode_reg (mode1, op1);
24247 pat = GEN_FCN (d->icode) (op0, op1);
24248 if (! pat)
24249 return 0;
24250 emit_insn (pat);
24251 emit_insn (gen_rtx_SET (VOIDmode,
24252 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24253 gen_rtx_fmt_ee (comparison, QImode,
24254 SET_DEST (pat),
24255 const0_rtx)));
24257 return SUBREG_REG (target);
24260 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24262 static rtx
24263 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24264 tree exp, rtx target)
24266 rtx pat;
24267 tree arg0 = CALL_EXPR_ARG (exp, 0);
24268 tree arg1 = CALL_EXPR_ARG (exp, 1);
24269 tree arg2 = CALL_EXPR_ARG (exp, 2);
24270 tree arg3 = CALL_EXPR_ARG (exp, 3);
24271 tree arg4 = CALL_EXPR_ARG (exp, 4);
24272 rtx scratch0, scratch1;
24273 rtx op0 = expand_normal (arg0);
24274 rtx op1 = expand_normal (arg1);
24275 rtx op2 = expand_normal (arg2);
24276 rtx op3 = expand_normal (arg3);
24277 rtx op4 = expand_normal (arg4);
24278 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24280 tmode0 = insn_data[d->icode].operand[0].mode;
24281 tmode1 = insn_data[d->icode].operand[1].mode;
24282 modev2 = insn_data[d->icode].operand[2].mode;
24283 modei3 = insn_data[d->icode].operand[3].mode;
24284 modev4 = insn_data[d->icode].operand[4].mode;
24285 modei5 = insn_data[d->icode].operand[5].mode;
24286 modeimm = insn_data[d->icode].operand[6].mode;
24288 if (VECTOR_MODE_P (modev2))
24289 op0 = safe_vector_operand (op0, modev2);
24290 if (VECTOR_MODE_P (modev4))
24291 op2 = safe_vector_operand (op2, modev4);
24293 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24294 op0 = copy_to_mode_reg (modev2, op0);
24295 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24296 op1 = copy_to_mode_reg (modei3, op1);
24297 if ((optimize && !register_operand (op2, modev4))
24298 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24299 op2 = copy_to_mode_reg (modev4, op2);
24300 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24301 op3 = copy_to_mode_reg (modei5, op3);
24303 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24305 error ("the fifth argument must be a 8-bit immediate");
24306 return const0_rtx;
24309 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24311 if (optimize || !target
24312 || GET_MODE (target) != tmode0
24313 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24314 target = gen_reg_rtx (tmode0);
24316 scratch1 = gen_reg_rtx (tmode1);
24318 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24320 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24322 if (optimize || !target
24323 || GET_MODE (target) != tmode1
24324 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24325 target = gen_reg_rtx (tmode1);
24327 scratch0 = gen_reg_rtx (tmode0);
24329 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24331 else
24333 gcc_assert (d->flag);
24335 scratch0 = gen_reg_rtx (tmode0);
24336 scratch1 = gen_reg_rtx (tmode1);
24338 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24341 if (! pat)
24342 return 0;
24344 emit_insn (pat);
24346 if (d->flag)
24348 target = gen_reg_rtx (SImode);
24349 emit_move_insn (target, const0_rtx);
24350 target = gen_rtx_SUBREG (QImode, target, 0);
24352 emit_insn
24353 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24354 gen_rtx_fmt_ee (EQ, QImode,
24355 gen_rtx_REG ((enum machine_mode) d->flag,
24356 FLAGS_REG),
24357 const0_rtx)));
24358 return SUBREG_REG (target);
24360 else
24361 return target;
24365 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24367 static rtx
24368 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24369 tree exp, rtx target)
24371 rtx pat;
24372 tree arg0 = CALL_EXPR_ARG (exp, 0);
24373 tree arg1 = CALL_EXPR_ARG (exp, 1);
24374 tree arg2 = CALL_EXPR_ARG (exp, 2);
24375 rtx scratch0, scratch1;
24376 rtx op0 = expand_normal (arg0);
24377 rtx op1 = expand_normal (arg1);
24378 rtx op2 = expand_normal (arg2);
24379 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24381 tmode0 = insn_data[d->icode].operand[0].mode;
24382 tmode1 = insn_data[d->icode].operand[1].mode;
24383 modev2 = insn_data[d->icode].operand[2].mode;
24384 modev3 = insn_data[d->icode].operand[3].mode;
24385 modeimm = insn_data[d->icode].operand[4].mode;
24387 if (VECTOR_MODE_P (modev2))
24388 op0 = safe_vector_operand (op0, modev2);
24389 if (VECTOR_MODE_P (modev3))
24390 op1 = safe_vector_operand (op1, modev3);
24392 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24393 op0 = copy_to_mode_reg (modev2, op0);
24394 if ((optimize && !register_operand (op1, modev3))
24395 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24396 op1 = copy_to_mode_reg (modev3, op1);
24398 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24400 error ("the third argument must be a 8-bit immediate");
24401 return const0_rtx;
24404 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24406 if (optimize || !target
24407 || GET_MODE (target) != tmode0
24408 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24409 target = gen_reg_rtx (tmode0);
24411 scratch1 = gen_reg_rtx (tmode1);
24413 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24415 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24417 if (optimize || !target
24418 || GET_MODE (target) != tmode1
24419 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24420 target = gen_reg_rtx (tmode1);
24422 scratch0 = gen_reg_rtx (tmode0);
24424 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24426 else
24428 gcc_assert (d->flag);
24430 scratch0 = gen_reg_rtx (tmode0);
24431 scratch1 = gen_reg_rtx (tmode1);
24433 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24436 if (! pat)
24437 return 0;
24439 emit_insn (pat);
24441 if (d->flag)
24443 target = gen_reg_rtx (SImode);
24444 emit_move_insn (target, const0_rtx);
24445 target = gen_rtx_SUBREG (QImode, target, 0);
24447 emit_insn
24448 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24449 gen_rtx_fmt_ee (EQ, QImode,
24450 gen_rtx_REG ((enum machine_mode) d->flag,
24451 FLAGS_REG),
24452 const0_rtx)));
24453 return SUBREG_REG (target);
24455 else
24456 return target;
24459 /* Subroutine of ix86_expand_builtin to take care of insns with
24460 variable number of operands. */
24462 static rtx
24463 ix86_expand_args_builtin (const struct builtin_description *d,
24464 tree exp, rtx target)
24466 rtx pat, real_target;
24467 unsigned int i, nargs;
24468 unsigned int nargs_constant = 0;
24469 int num_memory = 0;
24470 struct
24472 rtx op;
24473 enum machine_mode mode;
24474 } args[4];
24475 bool last_arg_count = false;
24476 enum insn_code icode = d->icode;
24477 const struct insn_data *insn_p = &insn_data[icode];
24478 enum machine_mode tmode = insn_p->operand[0].mode;
24479 enum machine_mode rmode = VOIDmode;
24480 bool swap = false;
24481 enum rtx_code comparison = d->comparison;
24483 switch ((enum ix86_builtin_type) d->flag)
24485 case INT_FTYPE_V8SF_V8SF_PTEST:
24486 case INT_FTYPE_V4DI_V4DI_PTEST:
24487 case INT_FTYPE_V4DF_V4DF_PTEST:
24488 case INT_FTYPE_V4SF_V4SF_PTEST:
24489 case INT_FTYPE_V2DI_V2DI_PTEST:
24490 case INT_FTYPE_V2DF_V2DF_PTEST:
24491 return ix86_expand_sse_ptest (d, exp, target);
24492 case FLOAT128_FTYPE_FLOAT128:
24493 case FLOAT_FTYPE_FLOAT:
24494 case INT64_FTYPE_V4SF:
24495 case INT64_FTYPE_V2DF:
24496 case INT_FTYPE_V16QI:
24497 case INT_FTYPE_V8QI:
24498 case INT_FTYPE_V8SF:
24499 case INT_FTYPE_V4DF:
24500 case INT_FTYPE_V4SF:
24501 case INT_FTYPE_V2DF:
24502 case V16QI_FTYPE_V16QI:
24503 case V8SI_FTYPE_V8SF:
24504 case V8SI_FTYPE_V4SI:
24505 case V8HI_FTYPE_V8HI:
24506 case V8HI_FTYPE_V16QI:
24507 case V8QI_FTYPE_V8QI:
24508 case V8SF_FTYPE_V8SF:
24509 case V8SF_FTYPE_V8SI:
24510 case V8SF_FTYPE_V4SF:
24511 case V4SI_FTYPE_V4SI:
24512 case V4SI_FTYPE_V16QI:
24513 case V4SI_FTYPE_V4SF:
24514 case V4SI_FTYPE_V8SI:
24515 case V4SI_FTYPE_V8HI:
24516 case V4SI_FTYPE_V4DF:
24517 case V4SI_FTYPE_V2DF:
24518 case V4HI_FTYPE_V4HI:
24519 case V4DF_FTYPE_V4DF:
24520 case V4DF_FTYPE_V4SI:
24521 case V4DF_FTYPE_V4SF:
24522 case V4DF_FTYPE_V2DF:
24523 case V4SF_FTYPE_V4SF:
24524 case V4SF_FTYPE_V4SI:
24525 case V4SF_FTYPE_V8SF:
24526 case V4SF_FTYPE_V4DF:
24527 case V4SF_FTYPE_V2DF:
24528 case V2DI_FTYPE_V2DI:
24529 case V2DI_FTYPE_V16QI:
24530 case V2DI_FTYPE_V8HI:
24531 case V2DI_FTYPE_V4SI:
24532 case V2DF_FTYPE_V2DF:
24533 case V2DF_FTYPE_V4SI:
24534 case V2DF_FTYPE_V4DF:
24535 case V2DF_FTYPE_V4SF:
24536 case V2DF_FTYPE_V2SI:
24537 case V2SI_FTYPE_V2SI:
24538 case V2SI_FTYPE_V4SF:
24539 case V2SI_FTYPE_V2SF:
24540 case V2SI_FTYPE_V2DF:
24541 case V2SF_FTYPE_V2SF:
24542 case V2SF_FTYPE_V2SI:
24543 nargs = 1;
24544 break;
24545 case V4SF_FTYPE_V4SF_VEC_MERGE:
24546 case V2DF_FTYPE_V2DF_VEC_MERGE:
24547 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24548 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24549 case V16QI_FTYPE_V16QI_V16QI:
24550 case V16QI_FTYPE_V8HI_V8HI:
24551 case V8QI_FTYPE_V8QI_V8QI:
24552 case V8QI_FTYPE_V4HI_V4HI:
24553 case V8HI_FTYPE_V8HI_V8HI:
24554 case V8HI_FTYPE_V16QI_V16QI:
24555 case V8HI_FTYPE_V4SI_V4SI:
24556 case V8SF_FTYPE_V8SF_V8SF:
24557 case V8SF_FTYPE_V8SF_V8SI:
24558 case V4SI_FTYPE_V4SI_V4SI:
24559 case V4SI_FTYPE_V8HI_V8HI:
24560 case V4SI_FTYPE_V4SF_V4SF:
24561 case V4SI_FTYPE_V2DF_V2DF:
24562 case V4HI_FTYPE_V4HI_V4HI:
24563 case V4HI_FTYPE_V8QI_V8QI:
24564 case V4HI_FTYPE_V2SI_V2SI:
24565 case V4DF_FTYPE_V4DF_V4DF:
24566 case V4DF_FTYPE_V4DF_V4DI:
24567 case V4SF_FTYPE_V4SF_V4SF:
24568 case V4SF_FTYPE_V4SF_V4SI:
24569 case V4SF_FTYPE_V4SF_V2SI:
24570 case V4SF_FTYPE_V4SF_V2DF:
24571 case V4SF_FTYPE_V4SF_DI:
24572 case V4SF_FTYPE_V4SF_SI:
24573 case V2DI_FTYPE_V2DI_V2DI:
24574 case V2DI_FTYPE_V16QI_V16QI:
24575 case V2DI_FTYPE_V4SI_V4SI:
24576 case V2DI_FTYPE_V2DI_V16QI:
24577 case V2DI_FTYPE_V2DF_V2DF:
24578 case V2SI_FTYPE_V2SI_V2SI:
24579 case V2SI_FTYPE_V4HI_V4HI:
24580 case V2SI_FTYPE_V2SF_V2SF:
24581 case V2DF_FTYPE_V2DF_V2DF:
24582 case V2DF_FTYPE_V2DF_V4SF:
24583 case V2DF_FTYPE_V2DF_V2DI:
24584 case V2DF_FTYPE_V2DF_DI:
24585 case V2DF_FTYPE_V2DF_SI:
24586 case V2SF_FTYPE_V2SF_V2SF:
24587 case V1DI_FTYPE_V1DI_V1DI:
24588 case V1DI_FTYPE_V8QI_V8QI:
24589 case V1DI_FTYPE_V2SI_V2SI:
24590 if (comparison == UNKNOWN)
24591 return ix86_expand_binop_builtin (icode, exp, target);
24592 nargs = 2;
24593 break;
24594 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24595 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24596 gcc_assert (comparison != UNKNOWN);
24597 nargs = 2;
24598 swap = true;
24599 break;
24600 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24601 case V8HI_FTYPE_V8HI_SI_COUNT:
24602 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24603 case V4SI_FTYPE_V4SI_SI_COUNT:
24604 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24605 case V4HI_FTYPE_V4HI_SI_COUNT:
24606 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24607 case V2DI_FTYPE_V2DI_SI_COUNT:
24608 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24609 case V2SI_FTYPE_V2SI_SI_COUNT:
24610 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24611 case V1DI_FTYPE_V1DI_SI_COUNT:
24612 nargs = 2;
24613 last_arg_count = true;
24614 break;
24615 case UINT64_FTYPE_UINT64_UINT64:
24616 case UINT_FTYPE_UINT_UINT:
24617 case UINT_FTYPE_UINT_USHORT:
24618 case UINT_FTYPE_UINT_UCHAR:
24619 nargs = 2;
24620 break;
24621 case V2DI2TI_FTYPE_V2DI_INT:
24622 nargs = 2;
24623 rmode = V2DImode;
24624 nargs_constant = 1;
24625 break;
24626 case V8HI_FTYPE_V8HI_INT:
24627 case V8SF_FTYPE_V8SF_INT:
24628 case V4SI_FTYPE_V4SI_INT:
24629 case V4SI_FTYPE_V8SI_INT:
24630 case V4HI_FTYPE_V4HI_INT:
24631 case V4DF_FTYPE_V4DF_INT:
24632 case V4SF_FTYPE_V4SF_INT:
24633 case V4SF_FTYPE_V8SF_INT:
24634 case V2DI_FTYPE_V2DI_INT:
24635 case V2DF_FTYPE_V2DF_INT:
24636 case V2DF_FTYPE_V4DF_INT:
24637 nargs = 2;
24638 nargs_constant = 1;
24639 break;
24640 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24641 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24642 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24643 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24644 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24645 nargs = 3;
24646 break;
24647 case V16QI_FTYPE_V16QI_V16QI_INT:
24648 case V8HI_FTYPE_V8HI_V8HI_INT:
24649 case V8SI_FTYPE_V8SI_V8SI_INT:
24650 case V8SI_FTYPE_V8SI_V4SI_INT:
24651 case V8SF_FTYPE_V8SF_V8SF_INT:
24652 case V8SF_FTYPE_V8SF_V4SF_INT:
24653 case V4SI_FTYPE_V4SI_V4SI_INT:
24654 case V4DF_FTYPE_V4DF_V4DF_INT:
24655 case V4DF_FTYPE_V4DF_V2DF_INT:
24656 case V4SF_FTYPE_V4SF_V4SF_INT:
24657 case V2DI_FTYPE_V2DI_V2DI_INT:
24658 case V2DF_FTYPE_V2DF_V2DF_INT:
24659 nargs = 3;
24660 nargs_constant = 1;
24661 break;
24662 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24663 nargs = 3;
24664 rmode = V2DImode;
24665 nargs_constant = 1;
24666 break;
24667 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24668 nargs = 3;
24669 rmode = DImode;
24670 nargs_constant = 1;
24671 break;
24672 case V2DI_FTYPE_V2DI_UINT_UINT:
24673 nargs = 3;
24674 nargs_constant = 2;
24675 break;
24676 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24677 nargs = 4;
24678 nargs_constant = 2;
24679 break;
24680 default:
24681 gcc_unreachable ();
24684 gcc_assert (nargs <= ARRAY_SIZE (args));
24686 if (comparison != UNKNOWN)
24688 gcc_assert (nargs == 2);
24689 return ix86_expand_sse_compare (d, exp, target, swap);
24692 if (rmode == VOIDmode || rmode == tmode)
24694 if (optimize
24695 || target == 0
24696 || GET_MODE (target) != tmode
24697 || ! (*insn_p->operand[0].predicate) (target, tmode))
24698 target = gen_reg_rtx (tmode);
24699 real_target = target;
24701 else
24703 target = gen_reg_rtx (rmode);
24704 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24707 for (i = 0; i < nargs; i++)
24709 tree arg = CALL_EXPR_ARG (exp, i);
24710 rtx op = expand_normal (arg);
24711 enum machine_mode mode = insn_p->operand[i + 1].mode;
24712 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24714 if (last_arg_count && (i + 1) == nargs)
24716 /* SIMD shift insns take either an 8-bit immediate or
24717 register as count. But builtin functions take int as
24718 count. If count doesn't match, we put it in register. */
24719 if (!match)
24721 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24722 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24723 op = copy_to_reg (op);
24726 else if ((nargs - i) <= nargs_constant)
24728 if (!match)
24729 switch (icode)
24731 case CODE_FOR_sse4_1_roundpd:
24732 case CODE_FOR_sse4_1_roundps:
24733 case CODE_FOR_sse4_1_roundsd:
24734 case CODE_FOR_sse4_1_roundss:
24735 case CODE_FOR_sse4_1_blendps:
24736 case CODE_FOR_avx_blendpd256:
24737 case CODE_FOR_avx_vpermilv4df:
24738 case CODE_FOR_avx_roundpd256:
24739 case CODE_FOR_avx_roundps256:
24740 error ("the last argument must be a 4-bit immediate");
24741 return const0_rtx;
24743 case CODE_FOR_sse4_1_blendpd:
24744 case CODE_FOR_avx_vpermilv2df:
24745 error ("the last argument must be a 2-bit immediate");
24746 return const0_rtx;
24748 case CODE_FOR_avx_vextractf128v4df:
24749 case CODE_FOR_avx_vextractf128v8sf:
24750 case CODE_FOR_avx_vextractf128v8si:
24751 case CODE_FOR_avx_vinsertf128v4df:
24752 case CODE_FOR_avx_vinsertf128v8sf:
24753 case CODE_FOR_avx_vinsertf128v8si:
24754 error ("the last argument must be a 1-bit immediate");
24755 return const0_rtx;
24757 case CODE_FOR_avx_cmpsdv2df3:
24758 case CODE_FOR_avx_cmpssv4sf3:
24759 case CODE_FOR_avx_cmppdv2df3:
24760 case CODE_FOR_avx_cmppsv4sf3:
24761 case CODE_FOR_avx_cmppdv4df3:
24762 case CODE_FOR_avx_cmppsv8sf3:
24763 error ("the last argument must be a 5-bit immediate");
24764 return const0_rtx;
24766 default:
24767 switch (nargs_constant)
24769 case 2:
24770 if ((nargs - i) == nargs_constant)
24772 error ("the next to last argument must be an 8-bit immediate");
24773 break;
24775 case 1:
24776 error ("the last argument must be an 8-bit immediate");
24777 break;
24778 default:
24779 gcc_unreachable ();
24781 return const0_rtx;
24784 else
24786 if (VECTOR_MODE_P (mode))
24787 op = safe_vector_operand (op, mode);
24789 /* If we aren't optimizing, only allow one memory operand to
24790 be generated. */
24791 if (memory_operand (op, mode))
24792 num_memory++;
24794 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24796 if (optimize || !match || num_memory > 1)
24797 op = copy_to_mode_reg (mode, op);
24799 else
24801 op = copy_to_reg (op);
24802 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24806 args[i].op = op;
24807 args[i].mode = mode;
24810 switch (nargs)
24812 case 1:
24813 pat = GEN_FCN (icode) (real_target, args[0].op);
24814 break;
24815 case 2:
24816 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24817 break;
24818 case 3:
24819 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24820 args[2].op);
24821 break;
24822 case 4:
24823 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24824 args[2].op, args[3].op);
24825 break;
24826 default:
24827 gcc_unreachable ();
24830 if (! pat)
24831 return 0;
24833 emit_insn (pat);
24834 return target;
24837 /* Subroutine of ix86_expand_builtin to take care of special insns
24838 with variable number of operands. */
24840 static rtx
24841 ix86_expand_special_args_builtin (const struct builtin_description *d,
24842 tree exp, rtx target)
24844 tree arg;
24845 rtx pat, op;
24846 unsigned int i, nargs, arg_adjust, memory;
24847 struct
24849 rtx op;
24850 enum machine_mode mode;
24851 } args[2];
24852 enum insn_code icode = d->icode;
24853 bool last_arg_constant = false;
24854 const struct insn_data *insn_p = &insn_data[icode];
24855 enum machine_mode tmode = insn_p->operand[0].mode;
24856 enum { load, store } klass;
24858 switch ((enum ix86_special_builtin_type) d->flag)
24860 case VOID_FTYPE_VOID:
24861 emit_insn (GEN_FCN (icode) (target));
24862 return 0;
24863 case V2DI_FTYPE_PV2DI:
24864 case V32QI_FTYPE_PCCHAR:
24865 case V16QI_FTYPE_PCCHAR:
24866 case V8SF_FTYPE_PCV4SF:
24867 case V8SF_FTYPE_PCFLOAT:
24868 case V4SF_FTYPE_PCFLOAT:
24869 case V4DF_FTYPE_PCV2DF:
24870 case V4DF_FTYPE_PCDOUBLE:
24871 case V2DF_FTYPE_PCDOUBLE:
24872 nargs = 1;
24873 klass = load;
24874 memory = 0;
24875 break;
24876 case VOID_FTYPE_PV2SF_V4SF:
24877 case VOID_FTYPE_PV4DI_V4DI:
24878 case VOID_FTYPE_PV2DI_V2DI:
24879 case VOID_FTYPE_PCHAR_V32QI:
24880 case VOID_FTYPE_PCHAR_V16QI:
24881 case VOID_FTYPE_PFLOAT_V8SF:
24882 case VOID_FTYPE_PFLOAT_V4SF:
24883 case VOID_FTYPE_PDOUBLE_V4DF:
24884 case VOID_FTYPE_PDOUBLE_V2DF:
24885 case VOID_FTYPE_PDI_DI:
24886 case VOID_FTYPE_PINT_INT:
24887 nargs = 1;
24888 klass = store;
24889 /* Reserve memory operand for target. */
24890 memory = ARRAY_SIZE (args);
24891 break;
24892 case V4SF_FTYPE_V4SF_PCV2SF:
24893 case V2DF_FTYPE_V2DF_PCDOUBLE:
24894 nargs = 2;
24895 klass = load;
24896 memory = 1;
24897 break;
24898 case V8SF_FTYPE_PCV8SF_V8SF:
24899 case V4DF_FTYPE_PCV4DF_V4DF:
24900 case V4SF_FTYPE_PCV4SF_V4SF:
24901 case V2DF_FTYPE_PCV2DF_V2DF:
24902 nargs = 2;
24903 klass = load;
24904 memory = 0;
24905 break;
24906 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24907 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24908 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24909 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24910 nargs = 2;
24911 klass = store;
24912 /* Reserve memory operand for target. */
24913 memory = ARRAY_SIZE (args);
24914 break;
24915 default:
24916 gcc_unreachable ();
24919 gcc_assert (nargs <= ARRAY_SIZE (args));
24921 if (klass == store)
24923 arg = CALL_EXPR_ARG (exp, 0);
24924 op = expand_normal (arg);
24925 gcc_assert (target == 0);
24926 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24927 arg_adjust = 1;
24929 else
24931 arg_adjust = 0;
24932 if (optimize
24933 || target == 0
24934 || GET_MODE (target) != tmode
24935 || ! (*insn_p->operand[0].predicate) (target, tmode))
24936 target = gen_reg_rtx (tmode);
24939 for (i = 0; i < nargs; i++)
24941 enum machine_mode mode = insn_p->operand[i + 1].mode;
24942 bool match;
24944 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24945 op = expand_normal (arg);
24946 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24948 if (last_arg_constant && (i + 1) == nargs)
24950 if (!match)
24951 switch (icode)
24953 default:
24954 error ("the last argument must be an 8-bit immediate");
24955 return const0_rtx;
24958 else
24960 if (i == memory)
24962 /* This must be the memory operand. */
24963 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24964 gcc_assert (GET_MODE (op) == mode
24965 || GET_MODE (op) == VOIDmode);
24967 else
24969 /* This must be register. */
24970 if (VECTOR_MODE_P (mode))
24971 op = safe_vector_operand (op, mode);
24973 gcc_assert (GET_MODE (op) == mode
24974 || GET_MODE (op) == VOIDmode);
24975 op = copy_to_mode_reg (mode, op);
24979 args[i].op = op;
24980 args[i].mode = mode;
24983 switch (nargs)
24985 case 1:
24986 pat = GEN_FCN (icode) (target, args[0].op);
24987 break;
24988 case 2:
24989 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24990 break;
24991 default:
24992 gcc_unreachable ();
24995 if (! pat)
24996 return 0;
24997 emit_insn (pat);
24998 return klass == store ? 0 : target;
25001 /* Return the integer constant in ARG. Constrain it to be in the range
25002 of the subparts of VEC_TYPE; issue an error if not. */
25004 static int
25005 get_element_number (tree vec_type, tree arg)
25007 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25009 if (!host_integerp (arg, 1)
25010 || (elt = tree_low_cst (arg, 1), elt > max))
25012 error ("selector must be an integer constant in the range 0..%wi", max);
25013 return 0;
25016 return elt;
25019 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25020 ix86_expand_vector_init. We DO have language-level syntax for this, in
25021 the form of (type){ init-list }. Except that since we can't place emms
25022 instructions from inside the compiler, we can't allow the use of MMX
25023 registers unless the user explicitly asks for it. So we do *not* define
25024 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25025 we have builtins invoked by mmintrin.h that gives us license to emit
25026 these sorts of instructions. */
25028 static rtx
25029 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25031 enum machine_mode tmode = TYPE_MODE (type);
25032 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25033 int i, n_elt = GET_MODE_NUNITS (tmode);
25034 rtvec v = rtvec_alloc (n_elt);
25036 gcc_assert (VECTOR_MODE_P (tmode));
25037 gcc_assert (call_expr_nargs (exp) == n_elt);
25039 for (i = 0; i < n_elt; ++i)
25041 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25042 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25045 if (!target || !register_operand (target, tmode))
25046 target = gen_reg_rtx (tmode);
25048 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25049 return target;
25052 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25053 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25054 had a language-level syntax for referencing vector elements. */
25056 static rtx
25057 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25059 enum machine_mode tmode, mode0;
25060 tree arg0, arg1;
25061 int elt;
25062 rtx op0;
25064 arg0 = CALL_EXPR_ARG (exp, 0);
25065 arg1 = CALL_EXPR_ARG (exp, 1);
25067 op0 = expand_normal (arg0);
25068 elt = get_element_number (TREE_TYPE (arg0), arg1);
25070 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25071 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25072 gcc_assert (VECTOR_MODE_P (mode0));
25074 op0 = force_reg (mode0, op0);
25076 if (optimize || !target || !register_operand (target, tmode))
25077 target = gen_reg_rtx (tmode);
25079 ix86_expand_vector_extract (true, target, op0, elt);
25081 return target;
25084 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25085 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25086 a language-level syntax for referencing vector elements. */
25088 static rtx
25089 ix86_expand_vec_set_builtin (tree exp)
25091 enum machine_mode tmode, mode1;
25092 tree arg0, arg1, arg2;
25093 int elt;
25094 rtx op0, op1, target;
25096 arg0 = CALL_EXPR_ARG (exp, 0);
25097 arg1 = CALL_EXPR_ARG (exp, 1);
25098 arg2 = CALL_EXPR_ARG (exp, 2);
25100 tmode = TYPE_MODE (TREE_TYPE (arg0));
25101 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25102 gcc_assert (VECTOR_MODE_P (tmode));
25104 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25105 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25106 elt = get_element_number (TREE_TYPE (arg0), arg2);
25108 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25109 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25111 op0 = force_reg (tmode, op0);
25112 op1 = force_reg (mode1, op1);
25114 /* OP0 is the source of these builtin functions and shouldn't be
25115 modified. Create a copy, use it and return it as target. */
25116 target = gen_reg_rtx (tmode);
25117 emit_move_insn (target, op0);
25118 ix86_expand_vector_set (true, target, op1, elt);
25120 return target;
25123 /* Expand an expression EXP that calls a built-in function,
25124 with result going to TARGET if that's convenient
25125 (and in mode MODE if that's convenient).
25126 SUBTARGET may be used as the target for computing one of EXP's operands.
25127 IGNORE is nonzero if the value is to be ignored. */
25129 static rtx
25130 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25131 enum machine_mode mode ATTRIBUTE_UNUSED,
25132 int ignore ATTRIBUTE_UNUSED)
25134 const struct builtin_description *d;
25135 size_t i;
25136 enum insn_code icode;
25137 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25138 tree arg0, arg1, arg2;
25139 rtx op0, op1, op2, pat;
25140 enum machine_mode mode0, mode1, mode2;
25141 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25143 /* Determine whether the builtin function is available under the current ISA.
25144 Originally the builtin was not created if it wasn't applicable to the
25145 current ISA based on the command line switches. With function specific
25146 options, we need to check in the context of the function making the call
25147 whether it is supported. */
25148 if (ix86_builtins_isa[fcode].isa
25149 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25151 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25152 NULL, NULL, false);
25154 if (!opts)
25155 error ("%qE needs unknown isa option", fndecl);
25156 else
25158 gcc_assert (opts != NULL);
25159 error ("%qE needs isa option %s", fndecl, opts);
25160 free (opts);
25162 return const0_rtx;
25165 switch (fcode)
25167 case IX86_BUILTIN_MASKMOVQ:
25168 case IX86_BUILTIN_MASKMOVDQU:
25169 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25170 ? CODE_FOR_mmx_maskmovq
25171 : CODE_FOR_sse2_maskmovdqu);
25172 /* Note the arg order is different from the operand order. */
25173 arg1 = CALL_EXPR_ARG (exp, 0);
25174 arg2 = CALL_EXPR_ARG (exp, 1);
25175 arg0 = CALL_EXPR_ARG (exp, 2);
25176 op0 = expand_normal (arg0);
25177 op1 = expand_normal (arg1);
25178 op2 = expand_normal (arg2);
25179 mode0 = insn_data[icode].operand[0].mode;
25180 mode1 = insn_data[icode].operand[1].mode;
25181 mode2 = insn_data[icode].operand[2].mode;
25183 op0 = force_reg (Pmode, op0);
25184 op0 = gen_rtx_MEM (mode1, op0);
25186 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25187 op0 = copy_to_mode_reg (mode0, op0);
25188 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25189 op1 = copy_to_mode_reg (mode1, op1);
25190 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25191 op2 = copy_to_mode_reg (mode2, op2);
25192 pat = GEN_FCN (icode) (op0, op1, op2);
25193 if (! pat)
25194 return 0;
25195 emit_insn (pat);
25196 return 0;
25198 case IX86_BUILTIN_LDMXCSR:
25199 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25200 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25201 emit_move_insn (target, op0);
25202 emit_insn (gen_sse_ldmxcsr (target));
25203 return 0;
25205 case IX86_BUILTIN_STMXCSR:
25206 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25207 emit_insn (gen_sse_stmxcsr (target));
25208 return copy_to_mode_reg (SImode, target);
25210 case IX86_BUILTIN_CLFLUSH:
25211 arg0 = CALL_EXPR_ARG (exp, 0);
25212 op0 = expand_normal (arg0);
25213 icode = CODE_FOR_sse2_clflush;
25214 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25215 op0 = copy_to_mode_reg (Pmode, op0);
25217 emit_insn (gen_sse2_clflush (op0));
25218 return 0;
25220 case IX86_BUILTIN_MONITOR:
25221 arg0 = CALL_EXPR_ARG (exp, 0);
25222 arg1 = CALL_EXPR_ARG (exp, 1);
25223 arg2 = CALL_EXPR_ARG (exp, 2);
25224 op0 = expand_normal (arg0);
25225 op1 = expand_normal (arg1);
25226 op2 = expand_normal (arg2);
25227 if (!REG_P (op0))
25228 op0 = copy_to_mode_reg (Pmode, op0);
25229 if (!REG_P (op1))
25230 op1 = copy_to_mode_reg (SImode, op1);
25231 if (!REG_P (op2))
25232 op2 = copy_to_mode_reg (SImode, op2);
25233 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25234 return 0;
25236 case IX86_BUILTIN_MWAIT:
25237 arg0 = CALL_EXPR_ARG (exp, 0);
25238 arg1 = CALL_EXPR_ARG (exp, 1);
25239 op0 = expand_normal (arg0);
25240 op1 = expand_normal (arg1);
25241 if (!REG_P (op0))
25242 op0 = copy_to_mode_reg (SImode, op0);
25243 if (!REG_P (op1))
25244 op1 = copy_to_mode_reg (SImode, op1);
25245 emit_insn (gen_sse3_mwait (op0, op1));
25246 return 0;
25248 case IX86_BUILTIN_VEC_INIT_V2SI:
25249 case IX86_BUILTIN_VEC_INIT_V4HI:
25250 case IX86_BUILTIN_VEC_INIT_V8QI:
25251 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25253 case IX86_BUILTIN_VEC_EXT_V2DF:
25254 case IX86_BUILTIN_VEC_EXT_V2DI:
25255 case IX86_BUILTIN_VEC_EXT_V4SF:
25256 case IX86_BUILTIN_VEC_EXT_V4SI:
25257 case IX86_BUILTIN_VEC_EXT_V8HI:
25258 case IX86_BUILTIN_VEC_EXT_V2SI:
25259 case IX86_BUILTIN_VEC_EXT_V4HI:
25260 case IX86_BUILTIN_VEC_EXT_V16QI:
25261 return ix86_expand_vec_ext_builtin (exp, target);
25263 case IX86_BUILTIN_VEC_SET_V2DI:
25264 case IX86_BUILTIN_VEC_SET_V4SF:
25265 case IX86_BUILTIN_VEC_SET_V4SI:
25266 case IX86_BUILTIN_VEC_SET_V8HI:
25267 case IX86_BUILTIN_VEC_SET_V4HI:
25268 case IX86_BUILTIN_VEC_SET_V16QI:
25269 return ix86_expand_vec_set_builtin (exp);
25271 case IX86_BUILTIN_INFQ:
25272 case IX86_BUILTIN_HUGE_VALQ:
25274 REAL_VALUE_TYPE inf;
25275 rtx tmp;
25277 real_inf (&inf);
25278 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25280 tmp = validize_mem (force_const_mem (mode, tmp));
25282 if (target == 0)
25283 target = gen_reg_rtx (mode);
25285 emit_move_insn (target, tmp);
25286 return target;
25289 default:
25290 break;
25293 for (i = 0, d = bdesc_special_args;
25294 i < ARRAY_SIZE (bdesc_special_args);
25295 i++, d++)
25296 if (d->code == fcode)
25297 return ix86_expand_special_args_builtin (d, exp, target);
25299 for (i = 0, d = bdesc_args;
25300 i < ARRAY_SIZE (bdesc_args);
25301 i++, d++)
25302 if (d->code == fcode)
25303 switch (fcode)
25305 case IX86_BUILTIN_FABSQ:
25306 case IX86_BUILTIN_COPYSIGNQ:
25307 if (!TARGET_SSE2)
25308 /* Emit a normal call if SSE2 isn't available. */
25309 return expand_call (exp, target, ignore);
25310 default:
25311 return ix86_expand_args_builtin (d, exp, target);
25314 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25315 if (d->code == fcode)
25316 return ix86_expand_sse_comi (d, exp, target);
25318 for (i = 0, d = bdesc_pcmpestr;
25319 i < ARRAY_SIZE (bdesc_pcmpestr);
25320 i++, d++)
25321 if (d->code == fcode)
25322 return ix86_expand_sse_pcmpestr (d, exp, target);
25324 for (i = 0, d = bdesc_pcmpistr;
25325 i < ARRAY_SIZE (bdesc_pcmpistr);
25326 i++, d++)
25327 if (d->code == fcode)
25328 return ix86_expand_sse_pcmpistr (d, exp, target);
25330 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25331 if (d->code == fcode)
25332 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25333 (enum multi_arg_type)d->flag,
25334 d->comparison);
25336 gcc_unreachable ();
25339 /* Returns a function decl for a vectorized version of the builtin function
25340 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25341 if it is not available. */
25343 static tree
25344 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25345 tree type_in)
25347 enum machine_mode in_mode, out_mode;
25348 int in_n, out_n;
25350 if (TREE_CODE (type_out) != VECTOR_TYPE
25351 || TREE_CODE (type_in) != VECTOR_TYPE)
25352 return NULL_TREE;
25354 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25355 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25356 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25357 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25359 switch (fn)
25361 case BUILT_IN_SQRT:
25362 if (out_mode == DFmode && out_n == 2
25363 && in_mode == DFmode && in_n == 2)
25364 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25365 break;
25367 case BUILT_IN_SQRTF:
25368 if (out_mode == SFmode && out_n == 4
25369 && in_mode == SFmode && in_n == 4)
25370 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25371 break;
25373 case BUILT_IN_LRINT:
25374 if (out_mode == SImode && out_n == 4
25375 && in_mode == DFmode && in_n == 2)
25376 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25377 break;
25379 case BUILT_IN_LRINTF:
25380 if (out_mode == SImode && out_n == 4
25381 && in_mode == SFmode && in_n == 4)
25382 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25383 break;
25385 default:
25389 /* Dispatch to a handler for a vectorization library. */
25390 if (ix86_veclib_handler)
25391 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25392 type_in);
25394 return NULL_TREE;
25397 /* Handler for an SVML-style interface to
25398 a library with vectorized intrinsics. */
25400 static tree
25401 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25403 char name[20];
25404 tree fntype, new_fndecl, args;
25405 unsigned arity;
25406 const char *bname;
25407 enum machine_mode el_mode, in_mode;
25408 int n, in_n;
25410 /* The SVML is suitable for unsafe math only. */
25411 if (!flag_unsafe_math_optimizations)
25412 return NULL_TREE;
25414 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25415 n = TYPE_VECTOR_SUBPARTS (type_out);
25416 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25417 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25418 if (el_mode != in_mode
25419 || n != in_n)
25420 return NULL_TREE;
25422 switch (fn)
25424 case BUILT_IN_EXP:
25425 case BUILT_IN_LOG:
25426 case BUILT_IN_LOG10:
25427 case BUILT_IN_POW:
25428 case BUILT_IN_TANH:
25429 case BUILT_IN_TAN:
25430 case BUILT_IN_ATAN:
25431 case BUILT_IN_ATAN2:
25432 case BUILT_IN_ATANH:
25433 case BUILT_IN_CBRT:
25434 case BUILT_IN_SINH:
25435 case BUILT_IN_SIN:
25436 case BUILT_IN_ASINH:
25437 case BUILT_IN_ASIN:
25438 case BUILT_IN_COSH:
25439 case BUILT_IN_COS:
25440 case BUILT_IN_ACOSH:
25441 case BUILT_IN_ACOS:
25442 if (el_mode != DFmode || n != 2)
25443 return NULL_TREE;
25444 break;
25446 case BUILT_IN_EXPF:
25447 case BUILT_IN_LOGF:
25448 case BUILT_IN_LOG10F:
25449 case BUILT_IN_POWF:
25450 case BUILT_IN_TANHF:
25451 case BUILT_IN_TANF:
25452 case BUILT_IN_ATANF:
25453 case BUILT_IN_ATAN2F:
25454 case BUILT_IN_ATANHF:
25455 case BUILT_IN_CBRTF:
25456 case BUILT_IN_SINHF:
25457 case BUILT_IN_SINF:
25458 case BUILT_IN_ASINHF:
25459 case BUILT_IN_ASINF:
25460 case BUILT_IN_COSHF:
25461 case BUILT_IN_COSF:
25462 case BUILT_IN_ACOSHF:
25463 case BUILT_IN_ACOSF:
25464 if (el_mode != SFmode || n != 4)
25465 return NULL_TREE;
25466 break;
25468 default:
25469 return NULL_TREE;
25472 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25474 if (fn == BUILT_IN_LOGF)
25475 strcpy (name, "vmlsLn4");
25476 else if (fn == BUILT_IN_LOG)
25477 strcpy (name, "vmldLn2");
25478 else if (n == 4)
25480 sprintf (name, "vmls%s", bname+10);
25481 name[strlen (name)-1] = '4';
25483 else
25484 sprintf (name, "vmld%s2", bname+10);
25486 /* Convert to uppercase. */
25487 name[4] &= ~0x20;
25489 arity = 0;
25490 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25491 args = TREE_CHAIN (args))
25492 arity++;
25494 if (arity == 1)
25495 fntype = build_function_type_list (type_out, type_in, NULL);
25496 else
25497 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25499 /* Build a function declaration for the vectorized function. */
25500 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25501 TREE_PUBLIC (new_fndecl) = 1;
25502 DECL_EXTERNAL (new_fndecl) = 1;
25503 DECL_IS_NOVOPS (new_fndecl) = 1;
25504 TREE_READONLY (new_fndecl) = 1;
25506 return new_fndecl;
25509 /* Handler for an ACML-style interface to
25510 a library with vectorized intrinsics. */
25512 static tree
25513 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25515 char name[20] = "__vr.._";
25516 tree fntype, new_fndecl, args;
25517 unsigned arity;
25518 const char *bname;
25519 enum machine_mode el_mode, in_mode;
25520 int n, in_n;
25522 /* The ACML is 64bits only and suitable for unsafe math only as
25523 it does not correctly support parts of IEEE with the required
25524 precision such as denormals. */
25525 if (!TARGET_64BIT
25526 || !flag_unsafe_math_optimizations)
25527 return NULL_TREE;
25529 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25530 n = TYPE_VECTOR_SUBPARTS (type_out);
25531 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25532 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25533 if (el_mode != in_mode
25534 || n != in_n)
25535 return NULL_TREE;
25537 switch (fn)
25539 case BUILT_IN_SIN:
25540 case BUILT_IN_COS:
25541 case BUILT_IN_EXP:
25542 case BUILT_IN_LOG:
25543 case BUILT_IN_LOG2:
25544 case BUILT_IN_LOG10:
25545 name[4] = 'd';
25546 name[5] = '2';
25547 if (el_mode != DFmode
25548 || n != 2)
25549 return NULL_TREE;
25550 break;
25552 case BUILT_IN_SINF:
25553 case BUILT_IN_COSF:
25554 case BUILT_IN_EXPF:
25555 case BUILT_IN_POWF:
25556 case BUILT_IN_LOGF:
25557 case BUILT_IN_LOG2F:
25558 case BUILT_IN_LOG10F:
25559 name[4] = 's';
25560 name[5] = '4';
25561 if (el_mode != SFmode
25562 || n != 4)
25563 return NULL_TREE;
25564 break;
25566 default:
25567 return NULL_TREE;
25570 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25571 sprintf (name + 7, "%s", bname+10);
25573 arity = 0;
25574 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25575 args = TREE_CHAIN (args))
25576 arity++;
25578 if (arity == 1)
25579 fntype = build_function_type_list (type_out, type_in, NULL);
25580 else
25581 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25583 /* Build a function declaration for the vectorized function. */
25584 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25585 TREE_PUBLIC (new_fndecl) = 1;
25586 DECL_EXTERNAL (new_fndecl) = 1;
25587 DECL_IS_NOVOPS (new_fndecl) = 1;
25588 TREE_READONLY (new_fndecl) = 1;
25590 return new_fndecl;
25594 /* Returns a decl of a function that implements conversion of an integer vector
25595 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25596 side of the conversion.
25597 Return NULL_TREE if it is not available. */
25599 static tree
25600 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25602 if (TREE_CODE (type) != VECTOR_TYPE)
25603 return NULL_TREE;
25605 switch (code)
25607 case FLOAT_EXPR:
25608 switch (TYPE_MODE (type))
25610 case V4SImode:
25611 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25612 default:
25613 return NULL_TREE;
25616 case FIX_TRUNC_EXPR:
25617 switch (TYPE_MODE (type))
25619 case V4SImode:
25620 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25621 default:
25622 return NULL_TREE;
25624 default:
25625 return NULL_TREE;
25630 /* Returns a code for a target-specific builtin that implements
25631 reciprocal of the function, or NULL_TREE if not available. */
25633 static tree
25634 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25635 bool sqrt ATTRIBUTE_UNUSED)
25637 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25638 && flag_finite_math_only && !flag_trapping_math
25639 && flag_unsafe_math_optimizations))
25640 return NULL_TREE;
25642 if (md_fn)
25643 /* Machine dependent builtins. */
25644 switch (fn)
25646 /* Vectorized version of sqrt to rsqrt conversion. */
25647 case IX86_BUILTIN_SQRTPS_NR:
25648 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25650 default:
25651 return NULL_TREE;
25653 else
25654 /* Normal builtins. */
25655 switch (fn)
25657 /* Sqrt to rsqrt conversion. */
25658 case BUILT_IN_SQRTF:
25659 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25661 default:
25662 return NULL_TREE;
25666 /* Store OPERAND to the memory after reload is completed. This means
25667 that we can't easily use assign_stack_local. */
25669 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25671 rtx result;
25673 gcc_assert (reload_completed);
25674 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25676 result = gen_rtx_MEM (mode,
25677 gen_rtx_PLUS (Pmode,
25678 stack_pointer_rtx,
25679 GEN_INT (-RED_ZONE_SIZE)));
25680 emit_move_insn (result, operand);
25682 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25684 switch (mode)
25686 case HImode:
25687 case SImode:
25688 operand = gen_lowpart (DImode, operand);
25689 /* FALLTHRU */
25690 case DImode:
25691 emit_insn (
25692 gen_rtx_SET (VOIDmode,
25693 gen_rtx_MEM (DImode,
25694 gen_rtx_PRE_DEC (DImode,
25695 stack_pointer_rtx)),
25696 operand));
25697 break;
25698 default:
25699 gcc_unreachable ();
25701 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25703 else
25705 switch (mode)
25707 case DImode:
25709 rtx operands[2];
25710 split_di (&operand, 1, operands, operands + 1);
25711 emit_insn (
25712 gen_rtx_SET (VOIDmode,
25713 gen_rtx_MEM (SImode,
25714 gen_rtx_PRE_DEC (Pmode,
25715 stack_pointer_rtx)),
25716 operands[1]));
25717 emit_insn (
25718 gen_rtx_SET (VOIDmode,
25719 gen_rtx_MEM (SImode,
25720 gen_rtx_PRE_DEC (Pmode,
25721 stack_pointer_rtx)),
25722 operands[0]));
25724 break;
25725 case HImode:
25726 /* Store HImodes as SImodes. */
25727 operand = gen_lowpart (SImode, operand);
25728 /* FALLTHRU */
25729 case SImode:
25730 emit_insn (
25731 gen_rtx_SET (VOIDmode,
25732 gen_rtx_MEM (GET_MODE (operand),
25733 gen_rtx_PRE_DEC (SImode,
25734 stack_pointer_rtx)),
25735 operand));
25736 break;
25737 default:
25738 gcc_unreachable ();
25740 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25742 return result;
25745 /* Free operand from the memory. */
25746 void
25747 ix86_free_from_memory (enum machine_mode mode)
25749 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25751 int size;
25753 if (mode == DImode || TARGET_64BIT)
25754 size = 8;
25755 else
25756 size = 4;
25757 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25758 to pop or add instruction if registers are available. */
25759 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25760 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25761 GEN_INT (size))));
25765 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25766 QImode must go into class Q_REGS.
25767 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25768 movdf to do mem-to-mem moves through integer regs. */
25769 enum reg_class
25770 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25772 enum machine_mode mode = GET_MODE (x);
25774 /* We're only allowed to return a subclass of CLASS. Many of the
25775 following checks fail for NO_REGS, so eliminate that early. */
25776 if (regclass == NO_REGS)
25777 return NO_REGS;
25779 /* All classes can load zeros. */
25780 if (x == CONST0_RTX (mode))
25781 return regclass;
25783 /* Force constants into memory if we are loading a (nonzero) constant into
25784 an MMX or SSE register. This is because there are no MMX/SSE instructions
25785 to load from a constant. */
25786 if (CONSTANT_P (x)
25787 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25788 return NO_REGS;
25790 /* Prefer SSE regs only, if we can use them for math. */
25791 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25792 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25794 /* Floating-point constants need more complex checks. */
25795 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25797 /* General regs can load everything. */
25798 if (reg_class_subset_p (regclass, GENERAL_REGS))
25799 return regclass;
25801 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25802 zero above. We only want to wind up preferring 80387 registers if
25803 we plan on doing computation with them. */
25804 if (TARGET_80387
25805 && standard_80387_constant_p (x))
25807 /* Limit class to non-sse. */
25808 if (regclass == FLOAT_SSE_REGS)
25809 return FLOAT_REGS;
25810 if (regclass == FP_TOP_SSE_REGS)
25811 return FP_TOP_REG;
25812 if (regclass == FP_SECOND_SSE_REGS)
25813 return FP_SECOND_REG;
25814 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25815 return regclass;
25818 return NO_REGS;
25821 /* Generally when we see PLUS here, it's the function invariant
25822 (plus soft-fp const_int). Which can only be computed into general
25823 regs. */
25824 if (GET_CODE (x) == PLUS)
25825 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25827 /* QImode constants are easy to load, but non-constant QImode data
25828 must go into Q_REGS. */
25829 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25831 if (reg_class_subset_p (regclass, Q_REGS))
25832 return regclass;
25833 if (reg_class_subset_p (Q_REGS, regclass))
25834 return Q_REGS;
25835 return NO_REGS;
25838 return regclass;
25841 /* Discourage putting floating-point values in SSE registers unless
25842 SSE math is being used, and likewise for the 387 registers. */
25843 enum reg_class
25844 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25846 enum machine_mode mode = GET_MODE (x);
25848 /* Restrict the output reload class to the register bank that we are doing
25849 math on. If we would like not to return a subset of CLASS, reject this
25850 alternative: if reload cannot do this, it will still use its choice. */
25851 mode = GET_MODE (x);
25852 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25853 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25855 if (X87_FLOAT_MODE_P (mode))
25857 if (regclass == FP_TOP_SSE_REGS)
25858 return FP_TOP_REG;
25859 else if (regclass == FP_SECOND_SSE_REGS)
25860 return FP_SECOND_REG;
25861 else
25862 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25865 return regclass;
25868 static enum reg_class
25869 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25870 enum machine_mode mode,
25871 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25873 /* QImode spills from non-QI registers require
25874 intermediate register on 32bit targets. */
25875 if (!in_p && mode == QImode && !TARGET_64BIT
25876 && (rclass == GENERAL_REGS
25877 || rclass == LEGACY_REGS
25878 || rclass == INDEX_REGS))
25880 int regno;
25882 if (REG_P (x))
25883 regno = REGNO (x);
25884 else
25885 regno = -1;
25887 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25888 regno = true_regnum (x);
25890 /* Return Q_REGS if the operand is in memory. */
25891 if (regno == -1)
25892 return Q_REGS;
25895 return NO_REGS;
25898 /* If we are copying between general and FP registers, we need a memory
25899 location. The same is true for SSE and MMX registers.
25901 To optimize register_move_cost performance, allow inline variant.
25903 The macro can't work reliably when one of the CLASSES is class containing
25904 registers from multiple units (SSE, MMX, integer). We avoid this by never
25905 combining those units in single alternative in the machine description.
25906 Ensure that this constraint holds to avoid unexpected surprises.
25908 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25909 enforce these sanity checks. */
25911 static inline int
25912 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25913 enum machine_mode mode, int strict)
25915 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25916 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25917 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25918 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25919 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25920 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25922 gcc_assert (!strict);
25923 return true;
25926 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25927 return true;
25929 /* ??? This is a lie. We do have moves between mmx/general, and for
25930 mmx/sse2. But by saying we need secondary memory we discourage the
25931 register allocator from using the mmx registers unless needed. */
25932 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25933 return true;
25935 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25937 /* SSE1 doesn't have any direct moves from other classes. */
25938 if (!TARGET_SSE2)
25939 return true;
25941 /* If the target says that inter-unit moves are more expensive
25942 than moving through memory, then don't generate them. */
25943 if (!TARGET_INTER_UNIT_MOVES)
25944 return true;
25946 /* Between SSE and general, we have moves no larger than word size. */
25947 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25948 return true;
25951 return false;
25955 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25956 enum machine_mode mode, int strict)
25958 return inline_secondary_memory_needed (class1, class2, mode, strict);
25961 /* Return true if the registers in CLASS cannot represent the change from
25962 modes FROM to TO. */
25964 bool
25965 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25966 enum reg_class regclass)
25968 if (from == to)
25969 return false;
25971 /* x87 registers can't do subreg at all, as all values are reformatted
25972 to extended precision. */
25973 if (MAYBE_FLOAT_CLASS_P (regclass))
25974 return true;
25976 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25978 /* Vector registers do not support QI or HImode loads. If we don't
25979 disallow a change to these modes, reload will assume it's ok to
25980 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25981 the vec_dupv4hi pattern. */
25982 if (GET_MODE_SIZE (from) < 4)
25983 return true;
25985 /* Vector registers do not support subreg with nonzero offsets, which
25986 are otherwise valid for integer registers. Since we can't see
25987 whether we have a nonzero offset from here, prohibit all
25988 nonparadoxical subregs changing size. */
25989 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25990 return true;
25993 return false;
25996 /* Return the cost of moving data of mode M between a
25997 register and memory. A value of 2 is the default; this cost is
25998 relative to those in `REGISTER_MOVE_COST'.
26000 This function is used extensively by register_move_cost that is used to
26001 build tables at startup. Make it inline in this case.
26002 When IN is 2, return maximum of in and out move cost.
26004 If moving between registers and memory is more expensive than
26005 between two registers, you should define this macro to express the
26006 relative cost.
26008 Model also increased moving costs of QImode registers in non
26009 Q_REGS classes.
26011 static inline int
26012 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26013 int in)
26015 int cost;
26016 if (FLOAT_CLASS_P (regclass))
26018 int index;
26019 switch (mode)
26021 case SFmode:
26022 index = 0;
26023 break;
26024 case DFmode:
26025 index = 1;
26026 break;
26027 case XFmode:
26028 index = 2;
26029 break;
26030 default:
26031 return 100;
26033 if (in == 2)
26034 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26035 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26037 if (SSE_CLASS_P (regclass))
26039 int index;
26040 switch (GET_MODE_SIZE (mode))
26042 case 4:
26043 index = 0;
26044 break;
26045 case 8:
26046 index = 1;
26047 break;
26048 case 16:
26049 index = 2;
26050 break;
26051 default:
26052 return 100;
26054 if (in == 2)
26055 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26056 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26058 if (MMX_CLASS_P (regclass))
26060 int index;
26061 switch (GET_MODE_SIZE (mode))
26063 case 4:
26064 index = 0;
26065 break;
26066 case 8:
26067 index = 1;
26068 break;
26069 default:
26070 return 100;
26072 if (in)
26073 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26074 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26076 switch (GET_MODE_SIZE (mode))
26078 case 1:
26079 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26081 if (!in)
26082 return ix86_cost->int_store[0];
26083 if (TARGET_PARTIAL_REG_DEPENDENCY
26084 && optimize_function_for_speed_p (cfun))
26085 cost = ix86_cost->movzbl_load;
26086 else
26087 cost = ix86_cost->int_load[0];
26088 if (in == 2)
26089 return MAX (cost, ix86_cost->int_store[0]);
26090 return cost;
26092 else
26094 if (in == 2)
26095 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26096 if (in)
26097 return ix86_cost->movzbl_load;
26098 else
26099 return ix86_cost->int_store[0] + 4;
26101 break;
26102 case 2:
26103 if (in == 2)
26104 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26105 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26106 default:
26107 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26108 if (mode == TFmode)
26109 mode = XFmode;
26110 if (in == 2)
26111 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26112 else if (in)
26113 cost = ix86_cost->int_load[2];
26114 else
26115 cost = ix86_cost->int_store[2];
26116 return (cost * (((int) GET_MODE_SIZE (mode)
26117 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26122 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26124 return inline_memory_move_cost (mode, regclass, in);
26128 /* Return the cost of moving data from a register in class CLASS1 to
26129 one in class CLASS2.
26131 It is not required that the cost always equal 2 when FROM is the same as TO;
26132 on some machines it is expensive to move between registers if they are not
26133 general registers. */
26136 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26137 enum reg_class class2)
26139 /* In case we require secondary memory, compute cost of the store followed
26140 by load. In order to avoid bad register allocation choices, we need
26141 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26143 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26145 int cost = 1;
26147 cost += inline_memory_move_cost (mode, class1, 2);
26148 cost += inline_memory_move_cost (mode, class2, 2);
26150 /* In case of copying from general_purpose_register we may emit multiple
26151 stores followed by single load causing memory size mismatch stall.
26152 Count this as arbitrarily high cost of 20. */
26153 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26154 cost += 20;
26156 /* In the case of FP/MMX moves, the registers actually overlap, and we
26157 have to switch modes in order to treat them differently. */
26158 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26159 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26160 cost += 20;
26162 return cost;
26165 /* Moves between SSE/MMX and integer unit are expensive. */
26166 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26167 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26169 /* ??? By keeping returned value relatively high, we limit the number
26170 of moves between integer and MMX/SSE registers for all targets.
26171 Additionally, high value prevents problem with x86_modes_tieable_p(),
26172 where integer modes in MMX/SSE registers are not tieable
26173 because of missing QImode and HImode moves to, from or between
26174 MMX/SSE registers. */
26175 return MAX (8, ix86_cost->mmxsse_to_integer);
26177 if (MAYBE_FLOAT_CLASS_P (class1))
26178 return ix86_cost->fp_move;
26179 if (MAYBE_SSE_CLASS_P (class1))
26180 return ix86_cost->sse_move;
26181 if (MAYBE_MMX_CLASS_P (class1))
26182 return ix86_cost->mmx_move;
26183 return 2;
26186 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26188 bool
26189 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26191 /* Flags and only flags can only hold CCmode values. */
26192 if (CC_REGNO_P (regno))
26193 return GET_MODE_CLASS (mode) == MODE_CC;
26194 if (GET_MODE_CLASS (mode) == MODE_CC
26195 || GET_MODE_CLASS (mode) == MODE_RANDOM
26196 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26197 return 0;
26198 if (FP_REGNO_P (regno))
26199 return VALID_FP_MODE_P (mode);
26200 if (SSE_REGNO_P (regno))
26202 /* We implement the move patterns for all vector modes into and
26203 out of SSE registers, even when no operation instructions
26204 are available. OImode move is available only when AVX is
26205 enabled. */
26206 return ((TARGET_AVX && mode == OImode)
26207 || VALID_AVX256_REG_MODE (mode)
26208 || VALID_SSE_REG_MODE (mode)
26209 || VALID_SSE2_REG_MODE (mode)
26210 || VALID_MMX_REG_MODE (mode)
26211 || VALID_MMX_REG_MODE_3DNOW (mode));
26213 if (MMX_REGNO_P (regno))
26215 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26216 so if the register is available at all, then we can move data of
26217 the given mode into or out of it. */
26218 return (VALID_MMX_REG_MODE (mode)
26219 || VALID_MMX_REG_MODE_3DNOW (mode));
26222 if (mode == QImode)
26224 /* Take care for QImode values - they can be in non-QI regs,
26225 but then they do cause partial register stalls. */
26226 if (regno <= BX_REG || TARGET_64BIT)
26227 return 1;
26228 if (!TARGET_PARTIAL_REG_STALL)
26229 return 1;
26230 return reload_in_progress || reload_completed;
26232 /* We handle both integer and floats in the general purpose registers. */
26233 else if (VALID_INT_MODE_P (mode))
26234 return 1;
26235 else if (VALID_FP_MODE_P (mode))
26236 return 1;
26237 else if (VALID_DFP_MODE_P (mode))
26238 return 1;
26239 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26240 on to use that value in smaller contexts, this can easily force a
26241 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26242 supporting DImode, allow it. */
26243 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26244 return 1;
26246 return 0;
26249 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26250 tieable integer mode. */
26252 static bool
26253 ix86_tieable_integer_mode_p (enum machine_mode mode)
26255 switch (mode)
26257 case HImode:
26258 case SImode:
26259 return true;
26261 case QImode:
26262 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26264 case DImode:
26265 return TARGET_64BIT;
26267 default:
26268 return false;
26272 /* Return true if MODE1 is accessible in a register that can hold MODE2
26273 without copying. That is, all register classes that can hold MODE2
26274 can also hold MODE1. */
26276 bool
26277 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26279 if (mode1 == mode2)
26280 return true;
26282 if (ix86_tieable_integer_mode_p (mode1)
26283 && ix86_tieable_integer_mode_p (mode2))
26284 return true;
26286 /* MODE2 being XFmode implies fp stack or general regs, which means we
26287 can tie any smaller floating point modes to it. Note that we do not
26288 tie this with TFmode. */
26289 if (mode2 == XFmode)
26290 return mode1 == SFmode || mode1 == DFmode;
26292 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26293 that we can tie it with SFmode. */
26294 if (mode2 == DFmode)
26295 return mode1 == SFmode;
26297 /* If MODE2 is only appropriate for an SSE register, then tie with
26298 any other mode acceptable to SSE registers. */
26299 if (GET_MODE_SIZE (mode2) == 16
26300 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26301 return (GET_MODE_SIZE (mode1) == 16
26302 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26304 /* If MODE2 is appropriate for an MMX register, then tie
26305 with any other mode acceptable to MMX registers. */
26306 if (GET_MODE_SIZE (mode2) == 8
26307 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26308 return (GET_MODE_SIZE (mode1) == 8
26309 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26311 return false;
26314 /* Compute a (partial) cost for rtx X. Return true if the complete
26315 cost has been computed, and false if subexpressions should be
26316 scanned. In either case, *TOTAL contains the cost result. */
26318 static bool
26319 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26321 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26322 enum machine_mode mode = GET_MODE (x);
26323 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26325 switch (code)
26327 case CONST_INT:
26328 case CONST:
26329 case LABEL_REF:
26330 case SYMBOL_REF:
26331 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26332 *total = 3;
26333 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26334 *total = 2;
26335 else if (flag_pic && SYMBOLIC_CONST (x)
26336 && (!TARGET_64BIT
26337 || (!GET_CODE (x) != LABEL_REF
26338 && (GET_CODE (x) != SYMBOL_REF
26339 || !SYMBOL_REF_LOCAL_P (x)))))
26340 *total = 1;
26341 else
26342 *total = 0;
26343 return true;
26345 case CONST_DOUBLE:
26346 if (mode == VOIDmode)
26347 *total = 0;
26348 else
26349 switch (standard_80387_constant_p (x))
26351 case 1: /* 0.0 */
26352 *total = 1;
26353 break;
26354 default: /* Other constants */
26355 *total = 2;
26356 break;
26357 case 0:
26358 case -1:
26359 /* Start with (MEM (SYMBOL_REF)), since that's where
26360 it'll probably end up. Add a penalty for size. */
26361 *total = (COSTS_N_INSNS (1)
26362 + (flag_pic != 0 && !TARGET_64BIT)
26363 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26364 break;
26366 return true;
26368 case ZERO_EXTEND:
26369 /* The zero extensions is often completely free on x86_64, so make
26370 it as cheap as possible. */
26371 if (TARGET_64BIT && mode == DImode
26372 && GET_MODE (XEXP (x, 0)) == SImode)
26373 *total = 1;
26374 else if (TARGET_ZERO_EXTEND_WITH_AND)
26375 *total = cost->add;
26376 else
26377 *total = cost->movzx;
26378 return false;
26380 case SIGN_EXTEND:
26381 *total = cost->movsx;
26382 return false;
26384 case ASHIFT:
26385 if (CONST_INT_P (XEXP (x, 1))
26386 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26388 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26389 if (value == 1)
26391 *total = cost->add;
26392 return false;
26394 if ((value == 2 || value == 3)
26395 && cost->lea <= cost->shift_const)
26397 *total = cost->lea;
26398 return false;
26401 /* FALLTHRU */
26403 case ROTATE:
26404 case ASHIFTRT:
26405 case LSHIFTRT:
26406 case ROTATERT:
26407 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26409 if (CONST_INT_P (XEXP (x, 1)))
26411 if (INTVAL (XEXP (x, 1)) > 32)
26412 *total = cost->shift_const + COSTS_N_INSNS (2);
26413 else
26414 *total = cost->shift_const * 2;
26416 else
26418 if (GET_CODE (XEXP (x, 1)) == AND)
26419 *total = cost->shift_var * 2;
26420 else
26421 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26424 else
26426 if (CONST_INT_P (XEXP (x, 1)))
26427 *total = cost->shift_const;
26428 else
26429 *total = cost->shift_var;
26431 return false;
26433 case MULT:
26434 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26436 /* ??? SSE scalar cost should be used here. */
26437 *total = cost->fmul;
26438 return false;
26440 else if (X87_FLOAT_MODE_P (mode))
26442 *total = cost->fmul;
26443 return false;
26445 else if (FLOAT_MODE_P (mode))
26447 /* ??? SSE vector cost should be used here. */
26448 *total = cost->fmul;
26449 return false;
26451 else
26453 rtx op0 = XEXP (x, 0);
26454 rtx op1 = XEXP (x, 1);
26455 int nbits;
26456 if (CONST_INT_P (XEXP (x, 1)))
26458 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26459 for (nbits = 0; value != 0; value &= value - 1)
26460 nbits++;
26462 else
26463 /* This is arbitrary. */
26464 nbits = 7;
26466 /* Compute costs correctly for widening multiplication. */
26467 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26468 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26469 == GET_MODE_SIZE (mode))
26471 int is_mulwiden = 0;
26472 enum machine_mode inner_mode = GET_MODE (op0);
26474 if (GET_CODE (op0) == GET_CODE (op1))
26475 is_mulwiden = 1, op1 = XEXP (op1, 0);
26476 else if (CONST_INT_P (op1))
26478 if (GET_CODE (op0) == SIGN_EXTEND)
26479 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26480 == INTVAL (op1);
26481 else
26482 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26485 if (is_mulwiden)
26486 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26489 *total = (cost->mult_init[MODE_INDEX (mode)]
26490 + nbits * cost->mult_bit
26491 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26493 return true;
26496 case DIV:
26497 case UDIV:
26498 case MOD:
26499 case UMOD:
26500 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26501 /* ??? SSE cost should be used here. */
26502 *total = cost->fdiv;
26503 else if (X87_FLOAT_MODE_P (mode))
26504 *total = cost->fdiv;
26505 else if (FLOAT_MODE_P (mode))
26506 /* ??? SSE vector cost should be used here. */
26507 *total = cost->fdiv;
26508 else
26509 *total = cost->divide[MODE_INDEX (mode)];
26510 return false;
26512 case PLUS:
26513 if (GET_MODE_CLASS (mode) == MODE_INT
26514 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26516 if (GET_CODE (XEXP (x, 0)) == PLUS
26517 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26518 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26519 && CONSTANT_P (XEXP (x, 1)))
26521 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26522 if (val == 2 || val == 4 || val == 8)
26524 *total = cost->lea;
26525 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26526 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26527 outer_code, speed);
26528 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26529 return true;
26532 else if (GET_CODE (XEXP (x, 0)) == MULT
26533 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26535 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26536 if (val == 2 || val == 4 || val == 8)
26538 *total = cost->lea;
26539 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26540 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26541 return true;
26544 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26546 *total = cost->lea;
26547 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26548 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26549 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26550 return true;
26553 /* FALLTHRU */
26555 case MINUS:
26556 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26558 /* ??? SSE cost should be used here. */
26559 *total = cost->fadd;
26560 return false;
26562 else if (X87_FLOAT_MODE_P (mode))
26564 *total = cost->fadd;
26565 return false;
26567 else if (FLOAT_MODE_P (mode))
26569 /* ??? SSE vector cost should be used here. */
26570 *total = cost->fadd;
26571 return false;
26573 /* FALLTHRU */
26575 case AND:
26576 case IOR:
26577 case XOR:
26578 if (!TARGET_64BIT && mode == DImode)
26580 *total = (cost->add * 2
26581 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26582 << (GET_MODE (XEXP (x, 0)) != DImode))
26583 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26584 << (GET_MODE (XEXP (x, 1)) != DImode)));
26585 return true;
26587 /* FALLTHRU */
26589 case NEG:
26590 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26592 /* ??? SSE cost should be used here. */
26593 *total = cost->fchs;
26594 return false;
26596 else if (X87_FLOAT_MODE_P (mode))
26598 *total = cost->fchs;
26599 return false;
26601 else if (FLOAT_MODE_P (mode))
26603 /* ??? SSE vector cost should be used here. */
26604 *total = cost->fchs;
26605 return false;
26607 /* FALLTHRU */
26609 case NOT:
26610 if (!TARGET_64BIT && mode == DImode)
26611 *total = cost->add * 2;
26612 else
26613 *total = cost->add;
26614 return false;
26616 case COMPARE:
26617 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26618 && XEXP (XEXP (x, 0), 1) == const1_rtx
26619 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26620 && XEXP (x, 1) == const0_rtx)
26622 /* This kind of construct is implemented using test[bwl].
26623 Treat it as if we had an AND. */
26624 *total = (cost->add
26625 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26626 + rtx_cost (const1_rtx, outer_code, speed));
26627 return true;
26629 return false;
26631 case FLOAT_EXTEND:
26632 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26633 *total = 0;
26634 return false;
26636 case ABS:
26637 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26638 /* ??? SSE cost should be used here. */
26639 *total = cost->fabs;
26640 else if (X87_FLOAT_MODE_P (mode))
26641 *total = cost->fabs;
26642 else if (FLOAT_MODE_P (mode))
26643 /* ??? SSE vector cost should be used here. */
26644 *total = cost->fabs;
26645 return false;
26647 case SQRT:
26648 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26649 /* ??? SSE cost should be used here. */
26650 *total = cost->fsqrt;
26651 else if (X87_FLOAT_MODE_P (mode))
26652 *total = cost->fsqrt;
26653 else if (FLOAT_MODE_P (mode))
26654 /* ??? SSE vector cost should be used here. */
26655 *total = cost->fsqrt;
26656 return false;
26658 case UNSPEC:
26659 if (XINT (x, 1) == UNSPEC_TP)
26660 *total = 0;
26661 return false;
26663 default:
26664 return false;
26668 #if TARGET_MACHO
26670 static int current_machopic_label_num;
26672 /* Given a symbol name and its associated stub, write out the
26673 definition of the stub. */
26675 void
26676 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26678 unsigned int length;
26679 char *binder_name, *symbol_name, lazy_ptr_name[32];
26680 int label = ++current_machopic_label_num;
26682 /* For 64-bit we shouldn't get here. */
26683 gcc_assert (!TARGET_64BIT);
26685 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26686 symb = (*targetm.strip_name_encoding) (symb);
26688 length = strlen (stub);
26689 binder_name = XALLOCAVEC (char, length + 32);
26690 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26692 length = strlen (symb);
26693 symbol_name = XALLOCAVEC (char, length + 32);
26694 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26696 sprintf (lazy_ptr_name, "L%d$lz", label);
26698 if (MACHOPIC_PURE)
26699 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26700 else
26701 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26703 fprintf (file, "%s:\n", stub);
26704 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26706 if (MACHOPIC_PURE)
26708 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26709 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26710 fprintf (file, "\tjmp\t*%%edx\n");
26712 else
26713 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26715 fprintf (file, "%s:\n", binder_name);
26717 if (MACHOPIC_PURE)
26719 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26720 fprintf (file, "\tpushl\t%%eax\n");
26722 else
26723 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26725 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26727 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26728 fprintf (file, "%s:\n", lazy_ptr_name);
26729 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26730 fprintf (file, "\t.long %s\n", binder_name);
26733 void
26734 darwin_x86_file_end (void)
26736 darwin_file_end ();
26737 ix86_file_end ();
26739 #endif /* TARGET_MACHO */
26741 /* Order the registers for register allocator. */
26743 void
26744 x86_order_regs_for_local_alloc (void)
26746 int pos = 0;
26747 int i;
26749 /* First allocate the local general purpose registers. */
26750 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26751 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26752 reg_alloc_order [pos++] = i;
26754 /* Global general purpose registers. */
26755 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26756 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26757 reg_alloc_order [pos++] = i;
26759 /* x87 registers come first in case we are doing FP math
26760 using them. */
26761 if (!TARGET_SSE_MATH)
26762 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26763 reg_alloc_order [pos++] = i;
26765 /* SSE registers. */
26766 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26767 reg_alloc_order [pos++] = i;
26768 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26769 reg_alloc_order [pos++] = i;
26771 /* x87 registers. */
26772 if (TARGET_SSE_MATH)
26773 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26774 reg_alloc_order [pos++] = i;
26776 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26777 reg_alloc_order [pos++] = i;
26779 /* Initialize the rest of array as we do not allocate some registers
26780 at all. */
26781 while (pos < FIRST_PSEUDO_REGISTER)
26782 reg_alloc_order [pos++] = 0;
26785 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26786 struct attribute_spec.handler. */
26787 static tree
26788 ix86_handle_abi_attribute (tree *node, tree name,
26789 tree args ATTRIBUTE_UNUSED,
26790 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26792 if (TREE_CODE (*node) != FUNCTION_TYPE
26793 && TREE_CODE (*node) != METHOD_TYPE
26794 && TREE_CODE (*node) != FIELD_DECL
26795 && TREE_CODE (*node) != TYPE_DECL)
26797 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26798 IDENTIFIER_POINTER (name));
26799 *no_add_attrs = true;
26800 return NULL_TREE;
26802 if (!TARGET_64BIT)
26804 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26805 IDENTIFIER_POINTER (name));
26806 *no_add_attrs = true;
26807 return NULL_TREE;
26810 /* Can combine regparm with all attributes but fastcall. */
26811 if (is_attribute_p ("ms_abi", name))
26813 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26815 error ("ms_abi and sysv_abi attributes are not compatible");
26818 return NULL_TREE;
26820 else if (is_attribute_p ("sysv_abi", name))
26822 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26824 error ("ms_abi and sysv_abi attributes are not compatible");
26827 return NULL_TREE;
26830 return NULL_TREE;
26833 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26834 struct attribute_spec.handler. */
26835 static tree
26836 ix86_handle_struct_attribute (tree *node, tree name,
26837 tree args ATTRIBUTE_UNUSED,
26838 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26840 tree *type = NULL;
26841 if (DECL_P (*node))
26843 if (TREE_CODE (*node) == TYPE_DECL)
26844 type = &TREE_TYPE (*node);
26846 else
26847 type = node;
26849 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26850 || TREE_CODE (*type) == UNION_TYPE)))
26852 warning (OPT_Wattributes, "%qs attribute ignored",
26853 IDENTIFIER_POINTER (name));
26854 *no_add_attrs = true;
26857 else if ((is_attribute_p ("ms_struct", name)
26858 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26859 || ((is_attribute_p ("gcc_struct", name)
26860 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26862 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26863 IDENTIFIER_POINTER (name));
26864 *no_add_attrs = true;
26867 return NULL_TREE;
26870 static bool
26871 ix86_ms_bitfield_layout_p (const_tree record_type)
26873 return (TARGET_MS_BITFIELD_LAYOUT &&
26874 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26875 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26878 /* Returns an expression indicating where the this parameter is
26879 located on entry to the FUNCTION. */
26881 static rtx
26882 x86_this_parameter (tree function)
26884 tree type = TREE_TYPE (function);
26885 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26886 int nregs;
26888 if (TARGET_64BIT)
26890 const int *parm_regs;
26892 if (ix86_function_type_abi (type) == MS_ABI)
26893 parm_regs = x86_64_ms_abi_int_parameter_registers;
26894 else
26895 parm_regs = x86_64_int_parameter_registers;
26896 return gen_rtx_REG (DImode, parm_regs[aggr]);
26899 nregs = ix86_function_regparm (type, function);
26901 if (nregs > 0 && !stdarg_p (type))
26903 int regno;
26905 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26906 regno = aggr ? DX_REG : CX_REG;
26907 else
26909 regno = AX_REG;
26910 if (aggr)
26912 regno = DX_REG;
26913 if (nregs == 1)
26914 return gen_rtx_MEM (SImode,
26915 plus_constant (stack_pointer_rtx, 4));
26918 return gen_rtx_REG (SImode, regno);
26921 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26924 /* Determine whether x86_output_mi_thunk can succeed. */
26926 static bool
26927 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26928 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26929 HOST_WIDE_INT vcall_offset, const_tree function)
26931 /* 64-bit can handle anything. */
26932 if (TARGET_64BIT)
26933 return true;
26935 /* For 32-bit, everything's fine if we have one free register. */
26936 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26937 return true;
26939 /* Need a free register for vcall_offset. */
26940 if (vcall_offset)
26941 return false;
26943 /* Need a free register for GOT references. */
26944 if (flag_pic && !(*targetm.binds_local_p) (function))
26945 return false;
26947 /* Otherwise ok. */
26948 return true;
26951 /* Output the assembler code for a thunk function. THUNK_DECL is the
26952 declaration for the thunk function itself, FUNCTION is the decl for
26953 the target function. DELTA is an immediate constant offset to be
26954 added to THIS. If VCALL_OFFSET is nonzero, the word at
26955 *(*this + vcall_offset) should be added to THIS. */
26957 static void
26958 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26959 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26960 HOST_WIDE_INT vcall_offset, tree function)
26962 rtx xops[3];
26963 rtx this_param = x86_this_parameter (function);
26964 rtx this_reg, tmp;
26966 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26967 pull it in now and let DELTA benefit. */
26968 if (REG_P (this_param))
26969 this_reg = this_param;
26970 else if (vcall_offset)
26972 /* Put the this parameter into %eax. */
26973 xops[0] = this_param;
26974 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26975 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26977 else
26978 this_reg = NULL_RTX;
26980 /* Adjust the this parameter by a fixed constant. */
26981 if (delta)
26983 xops[0] = GEN_INT (delta);
26984 xops[1] = this_reg ? this_reg : this_param;
26985 if (TARGET_64BIT)
26987 if (!x86_64_general_operand (xops[0], DImode))
26989 tmp = gen_rtx_REG (DImode, R10_REG);
26990 xops[1] = tmp;
26991 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26992 xops[0] = tmp;
26993 xops[1] = this_param;
26995 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26997 else
26998 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27001 /* Adjust the this parameter by a value stored in the vtable. */
27002 if (vcall_offset)
27004 if (TARGET_64BIT)
27005 tmp = gen_rtx_REG (DImode, R10_REG);
27006 else
27008 int tmp_regno = CX_REG;
27009 if (lookup_attribute ("fastcall",
27010 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27011 tmp_regno = AX_REG;
27012 tmp = gen_rtx_REG (SImode, tmp_regno);
27015 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27016 xops[1] = tmp;
27017 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27019 /* Adjust the this parameter. */
27020 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27021 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27023 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27024 xops[0] = GEN_INT (vcall_offset);
27025 xops[1] = tmp2;
27026 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27027 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27029 xops[1] = this_reg;
27030 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27033 /* If necessary, drop THIS back to its stack slot. */
27034 if (this_reg && this_reg != this_param)
27036 xops[0] = this_reg;
27037 xops[1] = this_param;
27038 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27041 xops[0] = XEXP (DECL_RTL (function), 0);
27042 if (TARGET_64BIT)
27044 if (!flag_pic || (*targetm.binds_local_p) (function))
27045 output_asm_insn ("jmp\t%P0", xops);
27046 /* All thunks should be in the same object as their target,
27047 and thus binds_local_p should be true. */
27048 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27049 gcc_unreachable ();
27050 else
27052 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27053 tmp = gen_rtx_CONST (Pmode, tmp);
27054 tmp = gen_rtx_MEM (QImode, tmp);
27055 xops[0] = tmp;
27056 output_asm_insn ("jmp\t%A0", xops);
27059 else
27061 if (!flag_pic || (*targetm.binds_local_p) (function))
27062 output_asm_insn ("jmp\t%P0", xops);
27063 else
27064 #if TARGET_MACHO
27065 if (TARGET_MACHO)
27067 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27068 tmp = (gen_rtx_SYMBOL_REF
27069 (Pmode,
27070 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27071 tmp = gen_rtx_MEM (QImode, tmp);
27072 xops[0] = tmp;
27073 output_asm_insn ("jmp\t%0", xops);
27075 else
27076 #endif /* TARGET_MACHO */
27078 tmp = gen_rtx_REG (SImode, CX_REG);
27079 output_set_got (tmp, NULL_RTX);
27081 xops[1] = tmp;
27082 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27083 output_asm_insn ("jmp\t{*}%1", xops);
27088 static void
27089 x86_file_start (void)
27091 default_file_start ();
27092 #if TARGET_MACHO
27093 darwin_file_start ();
27094 #endif
27095 if (X86_FILE_START_VERSION_DIRECTIVE)
27096 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27097 if (X86_FILE_START_FLTUSED)
27098 fputs ("\t.global\t__fltused\n", asm_out_file);
27099 if (ix86_asm_dialect == ASM_INTEL)
27100 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27104 x86_field_alignment (tree field, int computed)
27106 enum machine_mode mode;
27107 tree type = TREE_TYPE (field);
27109 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27110 return computed;
27111 mode = TYPE_MODE (strip_array_types (type));
27112 if (mode == DFmode || mode == DCmode
27113 || GET_MODE_CLASS (mode) == MODE_INT
27114 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27115 return MIN (32, computed);
27116 return computed;
27119 /* Output assembler code to FILE to increment profiler label # LABELNO
27120 for profiling a function entry. */
27121 void
27122 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27124 if (TARGET_64BIT)
27126 #ifndef NO_PROFILE_COUNTERS
27127 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27128 #endif
27130 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27131 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27132 else
27133 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27135 else if (flag_pic)
27137 #ifndef NO_PROFILE_COUNTERS
27138 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27139 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27140 #endif
27141 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27143 else
27145 #ifndef NO_PROFILE_COUNTERS
27146 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27147 PROFILE_COUNT_REGISTER);
27148 #endif
27149 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27153 /* We don't have exact information about the insn sizes, but we may assume
27154 quite safely that we are informed about all 1 byte insns and memory
27155 address sizes. This is enough to eliminate unnecessary padding in
27156 99% of cases. */
27158 static int
27159 min_insn_size (rtx insn)
27161 int l = 0;
27163 if (!INSN_P (insn) || !active_insn_p (insn))
27164 return 0;
27166 /* Discard alignments we've emit and jump instructions. */
27167 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27168 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27169 return 0;
27170 if (JUMP_P (insn)
27171 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
27172 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
27173 return 0;
27175 /* Important case - calls are always 5 bytes.
27176 It is common to have many calls in the row. */
27177 if (CALL_P (insn)
27178 && symbolic_reference_mentioned_p (PATTERN (insn))
27179 && !SIBLING_CALL_P (insn))
27180 return 5;
27181 if (get_attr_length (insn) <= 1)
27182 return 1;
27184 /* For normal instructions we may rely on the sizes of addresses
27185 and the presence of symbol to require 4 bytes of encoding.
27186 This is not the case for jumps where references are PC relative. */
27187 if (!JUMP_P (insn))
27189 l = get_attr_length_address (insn);
27190 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27191 l = 4;
27193 if (l)
27194 return 1+l;
27195 else
27196 return 2;
27199 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27200 window. */
27202 static void
27203 ix86_avoid_jump_misspredicts (void)
27205 rtx insn, start = get_insns ();
27206 int nbytes = 0, njumps = 0;
27207 int isjump = 0;
27209 /* Look for all minimal intervals of instructions containing 4 jumps.
27210 The intervals are bounded by START and INSN. NBYTES is the total
27211 size of instructions in the interval including INSN and not including
27212 START. When the NBYTES is smaller than 16 bytes, it is possible
27213 that the end of START and INSN ends up in the same 16byte page.
27215 The smallest offset in the page INSN can start is the case where START
27216 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27217 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27219 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27222 nbytes += min_insn_size (insn);
27223 if (dump_file)
27224 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27225 INSN_UID (insn), min_insn_size (insn));
27226 if ((JUMP_P (insn)
27227 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27228 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27229 || CALL_P (insn))
27230 njumps++;
27231 else
27232 continue;
27234 while (njumps > 3)
27236 start = NEXT_INSN (start);
27237 if ((JUMP_P (start)
27238 && GET_CODE (PATTERN (start)) != ADDR_VEC
27239 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27240 || CALL_P (start))
27241 njumps--, isjump = 1;
27242 else
27243 isjump = 0;
27244 nbytes -= min_insn_size (start);
27246 gcc_assert (njumps >= 0);
27247 if (dump_file)
27248 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27249 INSN_UID (start), INSN_UID (insn), nbytes);
27251 if (njumps == 3 && isjump && nbytes < 16)
27253 int padsize = 15 - nbytes + min_insn_size (insn);
27255 if (dump_file)
27256 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27257 INSN_UID (insn), padsize);
27258 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27263 /* AMD Athlon works faster
27264 when RET is not destination of conditional jump or directly preceded
27265 by other jump instruction. We avoid the penalty by inserting NOP just
27266 before the RET instructions in such cases. */
27267 static void
27268 ix86_pad_returns (void)
27270 edge e;
27271 edge_iterator ei;
27273 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27275 basic_block bb = e->src;
27276 rtx ret = BB_END (bb);
27277 rtx prev;
27278 bool replace = false;
27280 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27281 || optimize_bb_for_size_p (bb))
27282 continue;
27283 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27284 if (active_insn_p (prev) || LABEL_P (prev))
27285 break;
27286 if (prev && LABEL_P (prev))
27288 edge e;
27289 edge_iterator ei;
27291 FOR_EACH_EDGE (e, ei, bb->preds)
27292 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27293 && !(e->flags & EDGE_FALLTHRU))
27294 replace = true;
27296 if (!replace)
27298 prev = prev_active_insn (ret);
27299 if (prev
27300 && ((JUMP_P (prev) && any_condjump_p (prev))
27301 || CALL_P (prev)))
27302 replace = true;
27303 /* Empty functions get branch mispredict even when the jump destination
27304 is not visible to us. */
27305 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27306 replace = true;
27308 if (replace)
27310 emit_insn_before (gen_return_internal_long (), ret);
27311 delete_insn (ret);
27316 /* Implement machine specific optimizations. We implement padding of returns
27317 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27318 static void
27319 ix86_reorg (void)
27321 if (TARGET_PAD_RETURNS && optimize
27322 && optimize_function_for_speed_p (cfun))
27323 ix86_pad_returns ();
27324 if (TARGET_FOUR_JUMP_LIMIT && optimize
27325 && optimize_function_for_speed_p (cfun))
27326 ix86_avoid_jump_misspredicts ();
27329 /* Return nonzero when QImode register that must be represented via REX prefix
27330 is used. */
27331 bool
27332 x86_extended_QIreg_mentioned_p (rtx insn)
27334 int i;
27335 extract_insn_cached (insn);
27336 for (i = 0; i < recog_data.n_operands; i++)
27337 if (REG_P (recog_data.operand[i])
27338 && REGNO (recog_data.operand[i]) > BX_REG)
27339 return true;
27340 return false;
27343 /* Return nonzero when P points to register encoded via REX prefix.
27344 Called via for_each_rtx. */
27345 static int
27346 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27348 unsigned int regno;
27349 if (!REG_P (*p))
27350 return 0;
27351 regno = REGNO (*p);
27352 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27355 /* Return true when INSN mentions register that must be encoded using REX
27356 prefix. */
27357 bool
27358 x86_extended_reg_mentioned_p (rtx insn)
27360 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27361 extended_reg_mentioned_1, NULL);
27364 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27365 optabs would emit if we didn't have TFmode patterns. */
27367 void
27368 x86_emit_floatuns (rtx operands[2])
27370 rtx neglab, donelab, i0, i1, f0, in, out;
27371 enum machine_mode mode, inmode;
27373 inmode = GET_MODE (operands[1]);
27374 gcc_assert (inmode == SImode || inmode == DImode);
27376 out = operands[0];
27377 in = force_reg (inmode, operands[1]);
27378 mode = GET_MODE (out);
27379 neglab = gen_label_rtx ();
27380 donelab = gen_label_rtx ();
27381 f0 = gen_reg_rtx (mode);
27383 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27385 expand_float (out, in, 0);
27387 emit_jump_insn (gen_jump (donelab));
27388 emit_barrier ();
27390 emit_label (neglab);
27392 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27393 1, OPTAB_DIRECT);
27394 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27395 1, OPTAB_DIRECT);
27396 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27398 expand_float (f0, i0, 0);
27400 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27402 emit_label (donelab);
27405 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27406 with all elements equal to VAR. Return true if successful. */
27408 static bool
27409 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27410 rtx target, rtx val)
27412 enum machine_mode hmode, smode, wsmode, wvmode;
27413 rtx x;
27415 switch (mode)
27417 case V2SImode:
27418 case V2SFmode:
27419 if (!mmx_ok)
27420 return false;
27421 /* FALLTHRU */
27423 case V2DFmode:
27424 case V2DImode:
27425 case V4SFmode:
27426 case V4SImode:
27427 val = force_reg (GET_MODE_INNER (mode), val);
27428 x = gen_rtx_VEC_DUPLICATE (mode, val);
27429 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27430 return true;
27432 case V4HImode:
27433 if (!mmx_ok)
27434 return false;
27435 if (TARGET_SSE || TARGET_3DNOW_A)
27437 val = gen_lowpart (SImode, val);
27438 x = gen_rtx_TRUNCATE (HImode, val);
27439 x = gen_rtx_VEC_DUPLICATE (mode, x);
27440 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27441 return true;
27443 else
27445 smode = HImode;
27446 wsmode = SImode;
27447 wvmode = V2SImode;
27448 goto widen;
27451 case V8QImode:
27452 if (!mmx_ok)
27453 return false;
27454 smode = QImode;
27455 wsmode = HImode;
27456 wvmode = V4HImode;
27457 goto widen;
27458 case V8HImode:
27459 if (TARGET_SSE2)
27461 rtx tmp1, tmp2;
27462 /* Extend HImode to SImode using a paradoxical SUBREG. */
27463 tmp1 = gen_reg_rtx (SImode);
27464 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27465 /* Insert the SImode value as low element of V4SImode vector. */
27466 tmp2 = gen_reg_rtx (V4SImode);
27467 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27468 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27469 CONST0_RTX (V4SImode),
27470 const1_rtx);
27471 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27472 /* Cast the V4SImode vector back to a V8HImode vector. */
27473 tmp1 = gen_reg_rtx (V8HImode);
27474 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27475 /* Duplicate the low short through the whole low SImode word. */
27476 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27477 /* Cast the V8HImode vector back to a V4SImode vector. */
27478 tmp2 = gen_reg_rtx (V4SImode);
27479 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27480 /* Replicate the low element of the V4SImode vector. */
27481 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27482 /* Cast the V2SImode back to V8HImode, and store in target. */
27483 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27484 return true;
27486 smode = HImode;
27487 wsmode = SImode;
27488 wvmode = V4SImode;
27489 goto widen;
27490 case V16QImode:
27491 if (TARGET_SSE2)
27493 rtx tmp1, tmp2;
27494 /* Extend QImode to SImode using a paradoxical SUBREG. */
27495 tmp1 = gen_reg_rtx (SImode);
27496 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27497 /* Insert the SImode value as low element of V4SImode vector. */
27498 tmp2 = gen_reg_rtx (V4SImode);
27499 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27500 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27501 CONST0_RTX (V4SImode),
27502 const1_rtx);
27503 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27504 /* Cast the V4SImode vector back to a V16QImode vector. */
27505 tmp1 = gen_reg_rtx (V16QImode);
27506 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27507 /* Duplicate the low byte through the whole low SImode word. */
27508 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27509 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27510 /* Cast the V16QImode vector back to a V4SImode vector. */
27511 tmp2 = gen_reg_rtx (V4SImode);
27512 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27513 /* Replicate the low element of the V4SImode vector. */
27514 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27515 /* Cast the V2SImode back to V16QImode, and store in target. */
27516 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27517 return true;
27519 smode = QImode;
27520 wsmode = HImode;
27521 wvmode = V8HImode;
27522 goto widen;
27523 widen:
27524 /* Replicate the value once into the next wider mode and recurse. */
27525 val = convert_modes (wsmode, smode, val, true);
27526 x = expand_simple_binop (wsmode, ASHIFT, val,
27527 GEN_INT (GET_MODE_BITSIZE (smode)),
27528 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27529 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27531 x = gen_reg_rtx (wvmode);
27532 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27533 gcc_unreachable ();
27534 emit_move_insn (target, gen_lowpart (mode, x));
27535 return true;
27537 case V4DFmode:
27538 hmode = V2DFmode;
27539 goto half;
27540 case V4DImode:
27541 hmode = V2DImode;
27542 goto half;
27543 case V8SFmode:
27544 hmode = V4SFmode;
27545 goto half;
27546 case V8SImode:
27547 hmode = V4SImode;
27548 goto half;
27549 case V16HImode:
27550 hmode = V8HImode;
27551 goto half;
27552 case V32QImode:
27553 hmode = V16QImode;
27554 goto half;
27555 half:
27557 rtx tmp = gen_reg_rtx (hmode);
27558 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27559 emit_insn (gen_rtx_SET (VOIDmode, target,
27560 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27562 return true;
27564 default:
27565 return false;
27569 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27570 whose ONE_VAR element is VAR, and other elements are zero. Return true
27571 if successful. */
27573 static bool
27574 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27575 rtx target, rtx var, int one_var)
27577 enum machine_mode vsimode;
27578 rtx new_target;
27579 rtx x, tmp;
27580 bool use_vector_set = false;
27582 switch (mode)
27584 case V2DImode:
27585 /* For SSE4.1, we normally use vector set. But if the second
27586 element is zero and inter-unit moves are OK, we use movq
27587 instead. */
27588 use_vector_set = (TARGET_64BIT
27589 && TARGET_SSE4_1
27590 && !(TARGET_INTER_UNIT_MOVES
27591 && one_var == 0));
27592 break;
27593 case V16QImode:
27594 case V4SImode:
27595 case V4SFmode:
27596 use_vector_set = TARGET_SSE4_1;
27597 break;
27598 case V8HImode:
27599 use_vector_set = TARGET_SSE2;
27600 break;
27601 case V4HImode:
27602 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27603 break;
27604 case V32QImode:
27605 case V16HImode:
27606 case V8SImode:
27607 case V8SFmode:
27608 case V4DFmode:
27609 use_vector_set = TARGET_AVX;
27610 break;
27611 case V4DImode:
27612 /* Use ix86_expand_vector_set in 64bit mode only. */
27613 use_vector_set = TARGET_AVX && TARGET_64BIT;
27614 break;
27615 default:
27616 break;
27619 if (use_vector_set)
27621 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27622 var = force_reg (GET_MODE_INNER (mode), var);
27623 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27624 return true;
27627 switch (mode)
27629 case V2SFmode:
27630 case V2SImode:
27631 if (!mmx_ok)
27632 return false;
27633 /* FALLTHRU */
27635 case V2DFmode:
27636 case V2DImode:
27637 if (one_var != 0)
27638 return false;
27639 var = force_reg (GET_MODE_INNER (mode), var);
27640 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27641 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27642 return true;
27644 case V4SFmode:
27645 case V4SImode:
27646 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27647 new_target = gen_reg_rtx (mode);
27648 else
27649 new_target = target;
27650 var = force_reg (GET_MODE_INNER (mode), var);
27651 x = gen_rtx_VEC_DUPLICATE (mode, var);
27652 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27653 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27654 if (one_var != 0)
27656 /* We need to shuffle the value to the correct position, so
27657 create a new pseudo to store the intermediate result. */
27659 /* With SSE2, we can use the integer shuffle insns. */
27660 if (mode != V4SFmode && TARGET_SSE2)
27662 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27663 GEN_INT (1),
27664 GEN_INT (one_var == 1 ? 0 : 1),
27665 GEN_INT (one_var == 2 ? 0 : 1),
27666 GEN_INT (one_var == 3 ? 0 : 1)));
27667 if (target != new_target)
27668 emit_move_insn (target, new_target);
27669 return true;
27672 /* Otherwise convert the intermediate result to V4SFmode and
27673 use the SSE1 shuffle instructions. */
27674 if (mode != V4SFmode)
27676 tmp = gen_reg_rtx (V4SFmode);
27677 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27679 else
27680 tmp = new_target;
27682 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27683 GEN_INT (1),
27684 GEN_INT (one_var == 1 ? 0 : 1),
27685 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27686 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27688 if (mode != V4SFmode)
27689 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27690 else if (tmp != target)
27691 emit_move_insn (target, tmp);
27693 else if (target != new_target)
27694 emit_move_insn (target, new_target);
27695 return true;
27697 case V8HImode:
27698 case V16QImode:
27699 vsimode = V4SImode;
27700 goto widen;
27701 case V4HImode:
27702 case V8QImode:
27703 if (!mmx_ok)
27704 return false;
27705 vsimode = V2SImode;
27706 goto widen;
27707 widen:
27708 if (one_var != 0)
27709 return false;
27711 /* Zero extend the variable element to SImode and recurse. */
27712 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27714 x = gen_reg_rtx (vsimode);
27715 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27716 var, one_var))
27717 gcc_unreachable ();
27719 emit_move_insn (target, gen_lowpart (mode, x));
27720 return true;
27722 default:
27723 return false;
27727 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27728 consisting of the values in VALS. It is known that all elements
27729 except ONE_VAR are constants. Return true if successful. */
27731 static bool
27732 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27733 rtx target, rtx vals, int one_var)
27735 rtx var = XVECEXP (vals, 0, one_var);
27736 enum machine_mode wmode;
27737 rtx const_vec, x;
27739 const_vec = copy_rtx (vals);
27740 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27741 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27743 switch (mode)
27745 case V2DFmode:
27746 case V2DImode:
27747 case V2SFmode:
27748 case V2SImode:
27749 /* For the two element vectors, it's just as easy to use
27750 the general case. */
27751 return false;
27753 case V4DImode:
27754 /* Use ix86_expand_vector_set in 64bit mode only. */
27755 if (!TARGET_64BIT)
27756 return false;
27757 case V4DFmode:
27758 case V8SFmode:
27759 case V8SImode:
27760 case V16HImode:
27761 case V32QImode:
27762 case V4SFmode:
27763 case V4SImode:
27764 case V8HImode:
27765 case V4HImode:
27766 break;
27768 case V16QImode:
27769 if (TARGET_SSE4_1)
27770 break;
27771 wmode = V8HImode;
27772 goto widen;
27773 case V8QImode:
27774 wmode = V4HImode;
27775 goto widen;
27776 widen:
27777 /* There's no way to set one QImode entry easily. Combine
27778 the variable value with its adjacent constant value, and
27779 promote to an HImode set. */
27780 x = XVECEXP (vals, 0, one_var ^ 1);
27781 if (one_var & 1)
27783 var = convert_modes (HImode, QImode, var, true);
27784 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27785 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27786 x = GEN_INT (INTVAL (x) & 0xff);
27788 else
27790 var = convert_modes (HImode, QImode, var, true);
27791 x = gen_int_mode (INTVAL (x) << 8, HImode);
27793 if (x != const0_rtx)
27794 var = expand_simple_binop (HImode, IOR, var, x, var,
27795 1, OPTAB_LIB_WIDEN);
27797 x = gen_reg_rtx (wmode);
27798 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27799 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27801 emit_move_insn (target, gen_lowpart (mode, x));
27802 return true;
27804 default:
27805 return false;
27808 emit_move_insn (target, const_vec);
27809 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27810 return true;
27813 /* A subroutine of ix86_expand_vector_init_general. Use vector
27814 concatenate to handle the most general case: all values variable,
27815 and none identical. */
27817 static void
27818 ix86_expand_vector_init_concat (enum machine_mode mode,
27819 rtx target, rtx *ops, int n)
27821 enum machine_mode cmode, hmode = VOIDmode;
27822 rtx first[8], second[4];
27823 rtvec v;
27824 int i, j;
27826 switch (n)
27828 case 2:
27829 switch (mode)
27831 case V8SImode:
27832 cmode = V4SImode;
27833 break;
27834 case V8SFmode:
27835 cmode = V4SFmode;
27836 break;
27837 case V4DImode:
27838 cmode = V2DImode;
27839 break;
27840 case V4DFmode:
27841 cmode = V2DFmode;
27842 break;
27843 case V4SImode:
27844 cmode = V2SImode;
27845 break;
27846 case V4SFmode:
27847 cmode = V2SFmode;
27848 break;
27849 case V2DImode:
27850 cmode = DImode;
27851 break;
27852 case V2SImode:
27853 cmode = SImode;
27854 break;
27855 case V2DFmode:
27856 cmode = DFmode;
27857 break;
27858 case V2SFmode:
27859 cmode = SFmode;
27860 break;
27861 default:
27862 gcc_unreachable ();
27865 if (!register_operand (ops[1], cmode))
27866 ops[1] = force_reg (cmode, ops[1]);
27867 if (!register_operand (ops[0], cmode))
27868 ops[0] = force_reg (cmode, ops[0]);
27869 emit_insn (gen_rtx_SET (VOIDmode, target,
27870 gen_rtx_VEC_CONCAT (mode, ops[0],
27871 ops[1])));
27872 break;
27874 case 4:
27875 switch (mode)
27877 case V4DImode:
27878 cmode = V2DImode;
27879 break;
27880 case V4DFmode:
27881 cmode = V2DFmode;
27882 break;
27883 case V4SImode:
27884 cmode = V2SImode;
27885 break;
27886 case V4SFmode:
27887 cmode = V2SFmode;
27888 break;
27889 default:
27890 gcc_unreachable ();
27892 goto half;
27894 case 8:
27895 switch (mode)
27897 case V8SImode:
27898 cmode = V2SImode;
27899 hmode = V4SImode;
27900 break;
27901 case V8SFmode:
27902 cmode = V2SFmode;
27903 hmode = V4SFmode;
27904 break;
27905 default:
27906 gcc_unreachable ();
27908 goto half;
27910 half:
27911 /* FIXME: We process inputs backward to help RA. PR 36222. */
27912 i = n - 1;
27913 j = (n >> 1) - 1;
27914 for (; i > 0; i -= 2, j--)
27916 first[j] = gen_reg_rtx (cmode);
27917 v = gen_rtvec (2, ops[i - 1], ops[i]);
27918 ix86_expand_vector_init (false, first[j],
27919 gen_rtx_PARALLEL (cmode, v));
27922 n >>= 1;
27923 if (n > 2)
27925 gcc_assert (hmode != VOIDmode);
27926 for (i = j = 0; i < n; i += 2, j++)
27928 second[j] = gen_reg_rtx (hmode);
27929 ix86_expand_vector_init_concat (hmode, second [j],
27930 &first [i], 2);
27932 n >>= 1;
27933 ix86_expand_vector_init_concat (mode, target, second, n);
27935 else
27936 ix86_expand_vector_init_concat (mode, target, first, n);
27937 break;
27939 default:
27940 gcc_unreachable ();
27944 /* A subroutine of ix86_expand_vector_init_general. Use vector
27945 interleave to handle the most general case: all values variable,
27946 and none identical. */
27948 static void
27949 ix86_expand_vector_init_interleave (enum machine_mode mode,
27950 rtx target, rtx *ops, int n)
27952 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27953 int i, j;
27954 rtx op0, op1;
27955 rtx (*gen_load_even) (rtx, rtx, rtx);
27956 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27957 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27959 switch (mode)
27961 case V8HImode:
27962 gen_load_even = gen_vec_setv8hi;
27963 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27964 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27965 inner_mode = HImode;
27966 first_imode = V4SImode;
27967 second_imode = V2DImode;
27968 third_imode = VOIDmode;
27969 break;
27970 case V16QImode:
27971 gen_load_even = gen_vec_setv16qi;
27972 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27973 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27974 inner_mode = QImode;
27975 first_imode = V8HImode;
27976 second_imode = V4SImode;
27977 third_imode = V2DImode;
27978 break;
27979 default:
27980 gcc_unreachable ();
27983 for (i = 0; i < n; i++)
27985 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27986 op0 = gen_reg_rtx (SImode);
27987 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27989 /* Insert the SImode value as low element of V4SImode vector. */
27990 op1 = gen_reg_rtx (V4SImode);
27991 op0 = gen_rtx_VEC_MERGE (V4SImode,
27992 gen_rtx_VEC_DUPLICATE (V4SImode,
27993 op0),
27994 CONST0_RTX (V4SImode),
27995 const1_rtx);
27996 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27998 /* Cast the V4SImode vector back to a vector in orignal mode. */
27999 op0 = gen_reg_rtx (mode);
28000 emit_move_insn (op0, gen_lowpart (mode, op1));
28002 /* Load even elements into the second positon. */
28003 emit_insn ((*gen_load_even) (op0,
28004 force_reg (inner_mode,
28005 ops [i + i + 1]),
28006 const1_rtx));
28008 /* Cast vector to FIRST_IMODE vector. */
28009 ops[i] = gen_reg_rtx (first_imode);
28010 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28013 /* Interleave low FIRST_IMODE vectors. */
28014 for (i = j = 0; i < n; i += 2, j++)
28016 op0 = gen_reg_rtx (first_imode);
28017 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28019 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28020 ops[j] = gen_reg_rtx (second_imode);
28021 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28024 /* Interleave low SECOND_IMODE vectors. */
28025 switch (second_imode)
28027 case V4SImode:
28028 for (i = j = 0; i < n / 2; i += 2, j++)
28030 op0 = gen_reg_rtx (second_imode);
28031 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28032 ops[i + 1]));
28034 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28035 vector. */
28036 ops[j] = gen_reg_rtx (third_imode);
28037 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28039 second_imode = V2DImode;
28040 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28041 /* FALLTHRU */
28043 case V2DImode:
28044 op0 = gen_reg_rtx (second_imode);
28045 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28046 ops[1]));
28048 /* Cast the SECOND_IMODE vector back to a vector on original
28049 mode. */
28050 emit_insn (gen_rtx_SET (VOIDmode, target,
28051 gen_lowpart (mode, op0)));
28052 break;
28054 default:
28055 gcc_unreachable ();
28059 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28060 all values variable, and none identical. */
28062 static void
28063 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28064 rtx target, rtx vals)
28066 rtx ops[32], op0, op1;
28067 enum machine_mode half_mode = VOIDmode;
28068 int n, i;
28070 switch (mode)
28072 case V2SFmode:
28073 case V2SImode:
28074 if (!mmx_ok && !TARGET_SSE)
28075 break;
28076 /* FALLTHRU */
28078 case V8SFmode:
28079 case V8SImode:
28080 case V4DFmode:
28081 case V4DImode:
28082 case V4SFmode:
28083 case V4SImode:
28084 case V2DFmode:
28085 case V2DImode:
28086 n = GET_MODE_NUNITS (mode);
28087 for (i = 0; i < n; i++)
28088 ops[i] = XVECEXP (vals, 0, i);
28089 ix86_expand_vector_init_concat (mode, target, ops, n);
28090 return;
28092 case V32QImode:
28093 half_mode = V16QImode;
28094 goto half;
28096 case V16HImode:
28097 half_mode = V8HImode;
28098 goto half;
28100 half:
28101 n = GET_MODE_NUNITS (mode);
28102 for (i = 0; i < n; i++)
28103 ops[i] = XVECEXP (vals, 0, i);
28104 op0 = gen_reg_rtx (half_mode);
28105 op1 = gen_reg_rtx (half_mode);
28106 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28107 n >> 2);
28108 ix86_expand_vector_init_interleave (half_mode, op1,
28109 &ops [n >> 1], n >> 2);
28110 emit_insn (gen_rtx_SET (VOIDmode, target,
28111 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28112 return;
28114 case V16QImode:
28115 if (!TARGET_SSE4_1)
28116 break;
28117 /* FALLTHRU */
28119 case V8HImode:
28120 if (!TARGET_SSE2)
28121 break;
28123 /* Don't use ix86_expand_vector_init_interleave if we can't
28124 move from GPR to SSE register directly. */
28125 if (!TARGET_INTER_UNIT_MOVES)
28126 break;
28128 n = GET_MODE_NUNITS (mode);
28129 for (i = 0; i < n; i++)
28130 ops[i] = XVECEXP (vals, 0, i);
28131 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28132 return;
28134 case V4HImode:
28135 case V8QImode:
28136 break;
28138 default:
28139 gcc_unreachable ();
28143 int i, j, n_elts, n_words, n_elt_per_word;
28144 enum machine_mode inner_mode;
28145 rtx words[4], shift;
28147 inner_mode = GET_MODE_INNER (mode);
28148 n_elts = GET_MODE_NUNITS (mode);
28149 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28150 n_elt_per_word = n_elts / n_words;
28151 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28153 for (i = 0; i < n_words; ++i)
28155 rtx word = NULL_RTX;
28157 for (j = 0; j < n_elt_per_word; ++j)
28159 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28160 elt = convert_modes (word_mode, inner_mode, elt, true);
28162 if (j == 0)
28163 word = elt;
28164 else
28166 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28167 word, 1, OPTAB_LIB_WIDEN);
28168 word = expand_simple_binop (word_mode, IOR, word, elt,
28169 word, 1, OPTAB_LIB_WIDEN);
28173 words[i] = word;
28176 if (n_words == 1)
28177 emit_move_insn (target, gen_lowpart (mode, words[0]));
28178 else if (n_words == 2)
28180 rtx tmp = gen_reg_rtx (mode);
28181 emit_clobber (tmp);
28182 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28183 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28184 emit_move_insn (target, tmp);
28186 else if (n_words == 4)
28188 rtx tmp = gen_reg_rtx (V4SImode);
28189 gcc_assert (word_mode == SImode);
28190 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28191 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28192 emit_move_insn (target, gen_lowpart (mode, tmp));
28194 else
28195 gcc_unreachable ();
28199 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28200 instructions unless MMX_OK is true. */
28202 void
28203 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28205 enum machine_mode mode = GET_MODE (target);
28206 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28207 int n_elts = GET_MODE_NUNITS (mode);
28208 int n_var = 0, one_var = -1;
28209 bool all_same = true, all_const_zero = true;
28210 int i;
28211 rtx x;
28213 for (i = 0; i < n_elts; ++i)
28215 x = XVECEXP (vals, 0, i);
28216 if (!(CONST_INT_P (x)
28217 || GET_CODE (x) == CONST_DOUBLE
28218 || GET_CODE (x) == CONST_FIXED))
28219 n_var++, one_var = i;
28220 else if (x != CONST0_RTX (inner_mode))
28221 all_const_zero = false;
28222 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28223 all_same = false;
28226 /* Constants are best loaded from the constant pool. */
28227 if (n_var == 0)
28229 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28230 return;
28233 /* If all values are identical, broadcast the value. */
28234 if (all_same
28235 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28236 XVECEXP (vals, 0, 0)))
28237 return;
28239 /* Values where only one field is non-constant are best loaded from
28240 the pool and overwritten via move later. */
28241 if (n_var == 1)
28243 if (all_const_zero
28244 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28245 XVECEXP (vals, 0, one_var),
28246 one_var))
28247 return;
28249 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28250 return;
28253 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28256 void
28257 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28259 enum machine_mode mode = GET_MODE (target);
28260 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28261 enum machine_mode half_mode;
28262 bool use_vec_merge = false;
28263 rtx tmp;
28264 static rtx (*gen_extract[6][2]) (rtx, rtx)
28266 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28267 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28268 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28269 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28270 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28271 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28273 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28275 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28276 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28277 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28278 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28279 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28280 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28282 int i, j, n;
28284 switch (mode)
28286 case V2SFmode:
28287 case V2SImode:
28288 if (mmx_ok)
28290 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28291 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28292 if (elt == 0)
28293 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28294 else
28295 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28296 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28297 return;
28299 break;
28301 case V2DImode:
28302 use_vec_merge = TARGET_SSE4_1;
28303 if (use_vec_merge)
28304 break;
28306 case V2DFmode:
28308 rtx op0, op1;
28310 /* For the two element vectors, we implement a VEC_CONCAT with
28311 the extraction of the other element. */
28313 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28314 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28316 if (elt == 0)
28317 op0 = val, op1 = tmp;
28318 else
28319 op0 = tmp, op1 = val;
28321 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28322 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28324 return;
28326 case V4SFmode:
28327 use_vec_merge = TARGET_SSE4_1;
28328 if (use_vec_merge)
28329 break;
28331 switch (elt)
28333 case 0:
28334 use_vec_merge = true;
28335 break;
28337 case 1:
28338 /* tmp = target = A B C D */
28339 tmp = copy_to_reg (target);
28340 /* target = A A B B */
28341 emit_insn (gen_sse_unpcklps (target, target, target));
28342 /* target = X A B B */
28343 ix86_expand_vector_set (false, target, val, 0);
28344 /* target = A X C D */
28345 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28346 GEN_INT (1), GEN_INT (0),
28347 GEN_INT (2+4), GEN_INT (3+4)));
28348 return;
28350 case 2:
28351 /* tmp = target = A B C D */
28352 tmp = copy_to_reg (target);
28353 /* tmp = X B C D */
28354 ix86_expand_vector_set (false, tmp, val, 0);
28355 /* target = A B X D */
28356 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28357 GEN_INT (0), GEN_INT (1),
28358 GEN_INT (0+4), GEN_INT (3+4)));
28359 return;
28361 case 3:
28362 /* tmp = target = A B C D */
28363 tmp = copy_to_reg (target);
28364 /* tmp = X B C D */
28365 ix86_expand_vector_set (false, tmp, val, 0);
28366 /* target = A B X D */
28367 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28368 GEN_INT (0), GEN_INT (1),
28369 GEN_INT (2+4), GEN_INT (0+4)));
28370 return;
28372 default:
28373 gcc_unreachable ();
28375 break;
28377 case V4SImode:
28378 use_vec_merge = TARGET_SSE4_1;
28379 if (use_vec_merge)
28380 break;
28382 /* Element 0 handled by vec_merge below. */
28383 if (elt == 0)
28385 use_vec_merge = true;
28386 break;
28389 if (TARGET_SSE2)
28391 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28392 store into element 0, then shuffle them back. */
28394 rtx order[4];
28396 order[0] = GEN_INT (elt);
28397 order[1] = const1_rtx;
28398 order[2] = const2_rtx;
28399 order[3] = GEN_INT (3);
28400 order[elt] = const0_rtx;
28402 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28403 order[1], order[2], order[3]));
28405 ix86_expand_vector_set (false, target, val, 0);
28407 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28408 order[1], order[2], order[3]));
28410 else
28412 /* For SSE1, we have to reuse the V4SF code. */
28413 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28414 gen_lowpart (SFmode, val), elt);
28416 return;
28418 case V8HImode:
28419 use_vec_merge = TARGET_SSE2;
28420 break;
28421 case V4HImode:
28422 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28423 break;
28425 case V16QImode:
28426 use_vec_merge = TARGET_SSE4_1;
28427 break;
28429 case V8QImode:
28430 break;
28432 case V32QImode:
28433 half_mode = V16QImode;
28434 j = 0;
28435 n = 16;
28436 goto half;
28438 case V16HImode:
28439 half_mode = V8HImode;
28440 j = 1;
28441 n = 8;
28442 goto half;
28444 case V8SImode:
28445 half_mode = V4SImode;
28446 j = 2;
28447 n = 4;
28448 goto half;
28450 case V4DImode:
28451 half_mode = V2DImode;
28452 j = 3;
28453 n = 2;
28454 goto half;
28456 case V8SFmode:
28457 half_mode = V4SFmode;
28458 j = 4;
28459 n = 4;
28460 goto half;
28462 case V4DFmode:
28463 half_mode = V2DFmode;
28464 j = 5;
28465 n = 2;
28466 goto half;
28468 half:
28469 /* Compute offset. */
28470 i = elt / n;
28471 elt %= n;
28473 gcc_assert (i <= 1);
28475 /* Extract the half. */
28476 tmp = gen_reg_rtx (half_mode);
28477 emit_insn ((*gen_extract[j][i]) (tmp, target));
28479 /* Put val in tmp at elt. */
28480 ix86_expand_vector_set (false, tmp, val, elt);
28482 /* Put it back. */
28483 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28484 return;
28486 default:
28487 break;
28490 if (use_vec_merge)
28492 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28493 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28494 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28496 else
28498 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28500 emit_move_insn (mem, target);
28502 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28503 emit_move_insn (tmp, val);
28505 emit_move_insn (target, mem);
28509 void
28510 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28512 enum machine_mode mode = GET_MODE (vec);
28513 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28514 bool use_vec_extr = false;
28515 rtx tmp;
28517 switch (mode)
28519 case V2SImode:
28520 case V2SFmode:
28521 if (!mmx_ok)
28522 break;
28523 /* FALLTHRU */
28525 case V2DFmode:
28526 case V2DImode:
28527 use_vec_extr = true;
28528 break;
28530 case V4SFmode:
28531 use_vec_extr = TARGET_SSE4_1;
28532 if (use_vec_extr)
28533 break;
28535 switch (elt)
28537 case 0:
28538 tmp = vec;
28539 break;
28541 case 1:
28542 case 3:
28543 tmp = gen_reg_rtx (mode);
28544 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28545 GEN_INT (elt), GEN_INT (elt),
28546 GEN_INT (elt+4), GEN_INT (elt+4)));
28547 break;
28549 case 2:
28550 tmp = gen_reg_rtx (mode);
28551 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28552 break;
28554 default:
28555 gcc_unreachable ();
28557 vec = tmp;
28558 use_vec_extr = true;
28559 elt = 0;
28560 break;
28562 case V4SImode:
28563 use_vec_extr = TARGET_SSE4_1;
28564 if (use_vec_extr)
28565 break;
28567 if (TARGET_SSE2)
28569 switch (elt)
28571 case 0:
28572 tmp = vec;
28573 break;
28575 case 1:
28576 case 3:
28577 tmp = gen_reg_rtx (mode);
28578 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28579 GEN_INT (elt), GEN_INT (elt),
28580 GEN_INT (elt), GEN_INT (elt)));
28581 break;
28583 case 2:
28584 tmp = gen_reg_rtx (mode);
28585 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28586 break;
28588 default:
28589 gcc_unreachable ();
28591 vec = tmp;
28592 use_vec_extr = true;
28593 elt = 0;
28595 else
28597 /* For SSE1, we have to reuse the V4SF code. */
28598 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28599 gen_lowpart (V4SFmode, vec), elt);
28600 return;
28602 break;
28604 case V8HImode:
28605 use_vec_extr = TARGET_SSE2;
28606 break;
28607 case V4HImode:
28608 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28609 break;
28611 case V16QImode:
28612 use_vec_extr = TARGET_SSE4_1;
28613 break;
28615 case V8QImode:
28616 /* ??? Could extract the appropriate HImode element and shift. */
28617 default:
28618 break;
28621 if (use_vec_extr)
28623 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28624 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28626 /* Let the rtl optimizers know about the zero extension performed. */
28627 if (inner_mode == QImode || inner_mode == HImode)
28629 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28630 target = gen_lowpart (SImode, target);
28633 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28635 else
28637 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28639 emit_move_insn (mem, vec);
28641 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28642 emit_move_insn (target, tmp);
28646 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28647 pattern to reduce; DEST is the destination; IN is the input vector. */
28649 void
28650 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28652 rtx tmp1, tmp2, tmp3;
28654 tmp1 = gen_reg_rtx (V4SFmode);
28655 tmp2 = gen_reg_rtx (V4SFmode);
28656 tmp3 = gen_reg_rtx (V4SFmode);
28658 emit_insn (gen_sse_movhlps (tmp1, in, in));
28659 emit_insn (fn (tmp2, tmp1, in));
28661 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28662 GEN_INT (1), GEN_INT (1),
28663 GEN_INT (1+4), GEN_INT (1+4)));
28664 emit_insn (fn (dest, tmp2, tmp3));
28667 /* Target hook for scalar_mode_supported_p. */
28668 static bool
28669 ix86_scalar_mode_supported_p (enum machine_mode mode)
28671 if (DECIMAL_FLOAT_MODE_P (mode))
28672 return true;
28673 else if (mode == TFmode)
28674 return true;
28675 else
28676 return default_scalar_mode_supported_p (mode);
28679 /* Implements target hook vector_mode_supported_p. */
28680 static bool
28681 ix86_vector_mode_supported_p (enum machine_mode mode)
28683 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28684 return true;
28685 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28686 return true;
28687 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28688 return true;
28689 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28690 return true;
28691 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28692 return true;
28693 return false;
28696 /* Target hook for c_mode_for_suffix. */
28697 static enum machine_mode
28698 ix86_c_mode_for_suffix (char suffix)
28700 if (suffix == 'q')
28701 return TFmode;
28702 if (suffix == 'w')
28703 return XFmode;
28705 return VOIDmode;
28708 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28710 We do this in the new i386 backend to maintain source compatibility
28711 with the old cc0-based compiler. */
28713 static tree
28714 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28715 tree inputs ATTRIBUTE_UNUSED,
28716 tree clobbers)
28718 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28719 clobbers);
28720 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28721 clobbers);
28722 return clobbers;
28725 /* Implements target vector targetm.asm.encode_section_info. This
28726 is not used by netware. */
28728 static void ATTRIBUTE_UNUSED
28729 ix86_encode_section_info (tree decl, rtx rtl, int first)
28731 default_encode_section_info (decl, rtl, first);
28733 if (TREE_CODE (decl) == VAR_DECL
28734 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28735 && ix86_in_large_data_p (decl))
28736 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28739 /* Worker function for REVERSE_CONDITION. */
28741 enum rtx_code
28742 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28744 return (mode != CCFPmode && mode != CCFPUmode
28745 ? reverse_condition (code)
28746 : reverse_condition_maybe_unordered (code));
28749 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28750 to OPERANDS[0]. */
28752 const char *
28753 output_387_reg_move (rtx insn, rtx *operands)
28755 if (REG_P (operands[0]))
28757 if (REG_P (operands[1])
28758 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28760 if (REGNO (operands[0]) == FIRST_STACK_REG)
28761 return output_387_ffreep (operands, 0);
28762 return "fstp\t%y0";
28764 if (STACK_TOP_P (operands[0]))
28765 return "fld%z1\t%y1";
28766 return "fst\t%y0";
28768 else if (MEM_P (operands[0]))
28770 gcc_assert (REG_P (operands[1]));
28771 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28772 return "fstp%z0\t%y0";
28773 else
28775 /* There is no non-popping store to memory for XFmode.
28776 So if we need one, follow the store with a load. */
28777 if (GET_MODE (operands[0]) == XFmode)
28778 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28779 else
28780 return "fst%z0\t%y0";
28783 else
28784 gcc_unreachable();
28787 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28788 FP status register is set. */
28790 void
28791 ix86_emit_fp_unordered_jump (rtx label)
28793 rtx reg = gen_reg_rtx (HImode);
28794 rtx temp;
28796 emit_insn (gen_x86_fnstsw_1 (reg));
28798 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28800 emit_insn (gen_x86_sahf_1 (reg));
28802 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28803 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28805 else
28807 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28809 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28810 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28813 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28814 gen_rtx_LABEL_REF (VOIDmode, label),
28815 pc_rtx);
28816 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28818 emit_jump_insn (temp);
28819 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28822 /* Output code to perform a log1p XFmode calculation. */
28824 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28826 rtx label1 = gen_label_rtx ();
28827 rtx label2 = gen_label_rtx ();
28829 rtx tmp = gen_reg_rtx (XFmode);
28830 rtx tmp2 = gen_reg_rtx (XFmode);
28832 emit_insn (gen_absxf2 (tmp, op1));
28833 emit_insn (gen_cmpxf (tmp,
28834 CONST_DOUBLE_FROM_REAL_VALUE (
28835 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28836 XFmode)));
28837 emit_jump_insn (gen_bge (label1));
28839 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28840 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28841 emit_jump (label2);
28843 emit_label (label1);
28844 emit_move_insn (tmp, CONST1_RTX (XFmode));
28845 emit_insn (gen_addxf3 (tmp, op1, tmp));
28846 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28847 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28849 emit_label (label2);
28852 /* Output code to perform a Newton-Rhapson approximation of a single precision
28853 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28855 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28857 rtx x0, x1, e0, e1, two;
28859 x0 = gen_reg_rtx (mode);
28860 e0 = gen_reg_rtx (mode);
28861 e1 = gen_reg_rtx (mode);
28862 x1 = gen_reg_rtx (mode);
28864 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28866 if (VECTOR_MODE_P (mode))
28867 two = ix86_build_const_vector (SFmode, true, two);
28869 two = force_reg (mode, two);
28871 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28873 /* x0 = rcp(b) estimate */
28874 emit_insn (gen_rtx_SET (VOIDmode, x0,
28875 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28876 UNSPEC_RCP)));
28877 /* e0 = x0 * b */
28878 emit_insn (gen_rtx_SET (VOIDmode, e0,
28879 gen_rtx_MULT (mode, x0, b)));
28880 /* e1 = 2. - e0 */
28881 emit_insn (gen_rtx_SET (VOIDmode, e1,
28882 gen_rtx_MINUS (mode, two, e0)));
28883 /* x1 = x0 * e1 */
28884 emit_insn (gen_rtx_SET (VOIDmode, x1,
28885 gen_rtx_MULT (mode, x0, e1)));
28886 /* res = a * x1 */
28887 emit_insn (gen_rtx_SET (VOIDmode, res,
28888 gen_rtx_MULT (mode, a, x1)));
28891 /* Output code to perform a Newton-Rhapson approximation of a
28892 single precision floating point [reciprocal] square root. */
28894 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28895 bool recip)
28897 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28898 REAL_VALUE_TYPE r;
28900 x0 = gen_reg_rtx (mode);
28901 e0 = gen_reg_rtx (mode);
28902 e1 = gen_reg_rtx (mode);
28903 e2 = gen_reg_rtx (mode);
28904 e3 = gen_reg_rtx (mode);
28906 real_from_integer (&r, VOIDmode, -3, -1, 0);
28907 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28909 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28910 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28912 if (VECTOR_MODE_P (mode))
28914 mthree = ix86_build_const_vector (SFmode, true, mthree);
28915 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28918 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28919 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28921 /* x0 = rsqrt(a) estimate */
28922 emit_insn (gen_rtx_SET (VOIDmode, x0,
28923 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28924 UNSPEC_RSQRT)));
28926 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28927 if (!recip)
28929 rtx zero, mask;
28931 zero = gen_reg_rtx (mode);
28932 mask = gen_reg_rtx (mode);
28934 zero = force_reg (mode, CONST0_RTX(mode));
28935 emit_insn (gen_rtx_SET (VOIDmode, mask,
28936 gen_rtx_NE (mode, zero, a)));
28938 emit_insn (gen_rtx_SET (VOIDmode, x0,
28939 gen_rtx_AND (mode, x0, mask)));
28942 /* e0 = x0 * a */
28943 emit_insn (gen_rtx_SET (VOIDmode, e0,
28944 gen_rtx_MULT (mode, x0, a)));
28945 /* e1 = e0 * x0 */
28946 emit_insn (gen_rtx_SET (VOIDmode, e1,
28947 gen_rtx_MULT (mode, e0, x0)));
28949 /* e2 = e1 - 3. */
28950 mthree = force_reg (mode, mthree);
28951 emit_insn (gen_rtx_SET (VOIDmode, e2,
28952 gen_rtx_PLUS (mode, e1, mthree)));
28954 mhalf = force_reg (mode, mhalf);
28955 if (recip)
28956 /* e3 = -.5 * x0 */
28957 emit_insn (gen_rtx_SET (VOIDmode, e3,
28958 gen_rtx_MULT (mode, x0, mhalf)));
28959 else
28960 /* e3 = -.5 * e0 */
28961 emit_insn (gen_rtx_SET (VOIDmode, e3,
28962 gen_rtx_MULT (mode, e0, mhalf)));
28963 /* ret = e2 * e3 */
28964 emit_insn (gen_rtx_SET (VOIDmode, res,
28965 gen_rtx_MULT (mode, e2, e3)));
28968 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28970 static void ATTRIBUTE_UNUSED
28971 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28972 tree decl)
28974 /* With Binutils 2.15, the "@unwind" marker must be specified on
28975 every occurrence of the ".eh_frame" section, not just the first
28976 one. */
28977 if (TARGET_64BIT
28978 && strcmp (name, ".eh_frame") == 0)
28980 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28981 flags & SECTION_WRITE ? "aw" : "a");
28982 return;
28984 default_elf_asm_named_section (name, flags, decl);
28987 /* Return the mangling of TYPE if it is an extended fundamental type. */
28989 static const char *
28990 ix86_mangle_type (const_tree type)
28992 type = TYPE_MAIN_VARIANT (type);
28994 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28995 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28996 return NULL;
28998 switch (TYPE_MODE (type))
29000 case TFmode:
29001 /* __float128 is "g". */
29002 return "g";
29003 case XFmode:
29004 /* "long double" or __float80 is "e". */
29005 return "e";
29006 default:
29007 return NULL;
29011 /* For 32-bit code we can save PIC register setup by using
29012 __stack_chk_fail_local hidden function instead of calling
29013 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29014 register, so it is better to call __stack_chk_fail directly. */
29016 static tree
29017 ix86_stack_protect_fail (void)
29019 return TARGET_64BIT
29020 ? default_external_stack_protect_fail ()
29021 : default_hidden_stack_protect_fail ();
29024 /* Select a format to encode pointers in exception handling data. CODE
29025 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29026 true if the symbol may be affected by dynamic relocations.
29028 ??? All x86 object file formats are capable of representing this.
29029 After all, the relocation needed is the same as for the call insn.
29030 Whether or not a particular assembler allows us to enter such, I
29031 guess we'll have to see. */
29033 asm_preferred_eh_data_format (int code, int global)
29035 if (flag_pic)
29037 int type = DW_EH_PE_sdata8;
29038 if (!TARGET_64BIT
29039 || ix86_cmodel == CM_SMALL_PIC
29040 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29041 type = DW_EH_PE_sdata4;
29042 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29044 if (ix86_cmodel == CM_SMALL
29045 || (ix86_cmodel == CM_MEDIUM && code))
29046 return DW_EH_PE_udata4;
29047 return DW_EH_PE_absptr;
29050 /* Expand copysign from SIGN to the positive value ABS_VALUE
29051 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29052 the sign-bit. */
29053 static void
29054 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29056 enum machine_mode mode = GET_MODE (sign);
29057 rtx sgn = gen_reg_rtx (mode);
29058 if (mask == NULL_RTX)
29060 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29061 if (!VECTOR_MODE_P (mode))
29063 /* We need to generate a scalar mode mask in this case. */
29064 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29065 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29066 mask = gen_reg_rtx (mode);
29067 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29070 else
29071 mask = gen_rtx_NOT (mode, mask);
29072 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29073 gen_rtx_AND (mode, mask, sign)));
29074 emit_insn (gen_rtx_SET (VOIDmode, result,
29075 gen_rtx_IOR (mode, abs_value, sgn)));
29078 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29079 mask for masking out the sign-bit is stored in *SMASK, if that is
29080 non-null. */
29081 static rtx
29082 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29084 enum machine_mode mode = GET_MODE (op0);
29085 rtx xa, mask;
29087 xa = gen_reg_rtx (mode);
29088 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29089 if (!VECTOR_MODE_P (mode))
29091 /* We need to generate a scalar mode mask in this case. */
29092 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29093 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29094 mask = gen_reg_rtx (mode);
29095 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29097 emit_insn (gen_rtx_SET (VOIDmode, xa,
29098 gen_rtx_AND (mode, op0, mask)));
29100 if (smask)
29101 *smask = mask;
29103 return xa;
29106 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29107 swapping the operands if SWAP_OPERANDS is true. The expanded
29108 code is a forward jump to a newly created label in case the
29109 comparison is true. The generated label rtx is returned. */
29110 static rtx
29111 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29112 bool swap_operands)
29114 rtx label, tmp;
29116 if (swap_operands)
29118 tmp = op0;
29119 op0 = op1;
29120 op1 = tmp;
29123 label = gen_label_rtx ();
29124 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29125 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29126 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29127 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29128 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29129 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29130 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29131 JUMP_LABEL (tmp) = label;
29133 return label;
29136 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29137 using comparison code CODE. Operands are swapped for the comparison if
29138 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29139 static rtx
29140 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29141 bool swap_operands)
29143 enum machine_mode mode = GET_MODE (op0);
29144 rtx mask = gen_reg_rtx (mode);
29146 if (swap_operands)
29148 rtx tmp = op0;
29149 op0 = op1;
29150 op1 = tmp;
29153 if (mode == DFmode)
29154 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29155 gen_rtx_fmt_ee (code, mode, op0, op1)));
29156 else
29157 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29158 gen_rtx_fmt_ee (code, mode, op0, op1)));
29160 return mask;
29163 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29164 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29165 static rtx
29166 ix86_gen_TWO52 (enum machine_mode mode)
29168 REAL_VALUE_TYPE TWO52r;
29169 rtx TWO52;
29171 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29172 TWO52 = const_double_from_real_value (TWO52r, mode);
29173 TWO52 = force_reg (mode, TWO52);
29175 return TWO52;
29178 /* Expand SSE sequence for computing lround from OP1 storing
29179 into OP0. */
29180 void
29181 ix86_expand_lround (rtx op0, rtx op1)
29183 /* C code for the stuff we're doing below:
29184 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29185 return (long)tmp;
29187 enum machine_mode mode = GET_MODE (op1);
29188 const struct real_format *fmt;
29189 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29190 rtx adj;
29192 /* load nextafter (0.5, 0.0) */
29193 fmt = REAL_MODE_FORMAT (mode);
29194 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29195 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29197 /* adj = copysign (0.5, op1) */
29198 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29199 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29201 /* adj = op1 + adj */
29202 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29204 /* op0 = (imode)adj */
29205 expand_fix (op0, adj, 0);
29208 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29209 into OPERAND0. */
29210 void
29211 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29213 /* C code for the stuff we're doing below (for do_floor):
29214 xi = (long)op1;
29215 xi -= (double)xi > op1 ? 1 : 0;
29216 return xi;
29218 enum machine_mode fmode = GET_MODE (op1);
29219 enum machine_mode imode = GET_MODE (op0);
29220 rtx ireg, freg, label, tmp;
29222 /* reg = (long)op1 */
29223 ireg = gen_reg_rtx (imode);
29224 expand_fix (ireg, op1, 0);
29226 /* freg = (double)reg */
29227 freg = gen_reg_rtx (fmode);
29228 expand_float (freg, ireg, 0);
29230 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29231 label = ix86_expand_sse_compare_and_jump (UNLE,
29232 freg, op1, !do_floor);
29233 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29234 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29235 emit_move_insn (ireg, tmp);
29237 emit_label (label);
29238 LABEL_NUSES (label) = 1;
29240 emit_move_insn (op0, ireg);
29243 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29244 result in OPERAND0. */
29245 void
29246 ix86_expand_rint (rtx operand0, rtx operand1)
29248 /* C code for the stuff we're doing below:
29249 xa = fabs (operand1);
29250 if (!isless (xa, 2**52))
29251 return operand1;
29252 xa = xa + 2**52 - 2**52;
29253 return copysign (xa, operand1);
29255 enum machine_mode mode = GET_MODE (operand0);
29256 rtx res, xa, label, TWO52, mask;
29258 res = gen_reg_rtx (mode);
29259 emit_move_insn (res, operand1);
29261 /* xa = abs (operand1) */
29262 xa = ix86_expand_sse_fabs (res, &mask);
29264 /* if (!isless (xa, TWO52)) goto label; */
29265 TWO52 = ix86_gen_TWO52 (mode);
29266 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29268 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29269 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29271 ix86_sse_copysign_to_positive (res, xa, res, mask);
29273 emit_label (label);
29274 LABEL_NUSES (label) = 1;
29276 emit_move_insn (operand0, res);
29279 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29280 into OPERAND0. */
29281 void
29282 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29284 /* C code for the stuff we expand below.
29285 double xa = fabs (x), x2;
29286 if (!isless (xa, TWO52))
29287 return x;
29288 xa = xa + TWO52 - TWO52;
29289 x2 = copysign (xa, x);
29290 Compensate. Floor:
29291 if (x2 > x)
29292 x2 -= 1;
29293 Compensate. Ceil:
29294 if (x2 < x)
29295 x2 -= -1;
29296 return x2;
29298 enum machine_mode mode = GET_MODE (operand0);
29299 rtx xa, TWO52, tmp, label, one, res, mask;
29301 TWO52 = ix86_gen_TWO52 (mode);
29303 /* Temporary for holding the result, initialized to the input
29304 operand to ease control flow. */
29305 res = gen_reg_rtx (mode);
29306 emit_move_insn (res, operand1);
29308 /* xa = abs (operand1) */
29309 xa = ix86_expand_sse_fabs (res, &mask);
29311 /* if (!isless (xa, TWO52)) goto label; */
29312 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29314 /* xa = xa + TWO52 - TWO52; */
29315 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29316 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29318 /* xa = copysign (xa, operand1) */
29319 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29321 /* generate 1.0 or -1.0 */
29322 one = force_reg (mode,
29323 const_double_from_real_value (do_floor
29324 ? dconst1 : dconstm1, mode));
29326 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29327 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29328 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29329 gen_rtx_AND (mode, one, tmp)));
29330 /* We always need to subtract here to preserve signed zero. */
29331 tmp = expand_simple_binop (mode, MINUS,
29332 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29333 emit_move_insn (res, tmp);
29335 emit_label (label);
29336 LABEL_NUSES (label) = 1;
29338 emit_move_insn (operand0, res);
29341 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29342 into OPERAND0. */
29343 void
29344 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29346 /* C code for the stuff we expand below.
29347 double xa = fabs (x), x2;
29348 if (!isless (xa, TWO52))
29349 return x;
29350 x2 = (double)(long)x;
29351 Compensate. Floor:
29352 if (x2 > x)
29353 x2 -= 1;
29354 Compensate. Ceil:
29355 if (x2 < x)
29356 x2 += 1;
29357 if (HONOR_SIGNED_ZEROS (mode))
29358 return copysign (x2, x);
29359 return x2;
29361 enum machine_mode mode = GET_MODE (operand0);
29362 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29364 TWO52 = ix86_gen_TWO52 (mode);
29366 /* Temporary for holding the result, initialized to the input
29367 operand to ease control flow. */
29368 res = gen_reg_rtx (mode);
29369 emit_move_insn (res, operand1);
29371 /* xa = abs (operand1) */
29372 xa = ix86_expand_sse_fabs (res, &mask);
29374 /* if (!isless (xa, TWO52)) goto label; */
29375 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29377 /* xa = (double)(long)x */
29378 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29379 expand_fix (xi, res, 0);
29380 expand_float (xa, xi, 0);
29382 /* generate 1.0 */
29383 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29385 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29386 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29387 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29388 gen_rtx_AND (mode, one, tmp)));
29389 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29390 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29391 emit_move_insn (res, tmp);
29393 if (HONOR_SIGNED_ZEROS (mode))
29394 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29396 emit_label (label);
29397 LABEL_NUSES (label) = 1;
29399 emit_move_insn (operand0, res);
29402 /* Expand SSE sequence for computing round from OPERAND1 storing
29403 into OPERAND0. Sequence that works without relying on DImode truncation
29404 via cvttsd2siq that is only available on 64bit targets. */
29405 void
29406 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29408 /* C code for the stuff we expand below.
29409 double xa = fabs (x), xa2, x2;
29410 if (!isless (xa, TWO52))
29411 return x;
29412 Using the absolute value and copying back sign makes
29413 -0.0 -> -0.0 correct.
29414 xa2 = xa + TWO52 - TWO52;
29415 Compensate.
29416 dxa = xa2 - xa;
29417 if (dxa <= -0.5)
29418 xa2 += 1;
29419 else if (dxa > 0.5)
29420 xa2 -= 1;
29421 x2 = copysign (xa2, x);
29422 return x2;
29424 enum machine_mode mode = GET_MODE (operand0);
29425 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29427 TWO52 = ix86_gen_TWO52 (mode);
29429 /* Temporary for holding the result, initialized to the input
29430 operand to ease control flow. */
29431 res = gen_reg_rtx (mode);
29432 emit_move_insn (res, operand1);
29434 /* xa = abs (operand1) */
29435 xa = ix86_expand_sse_fabs (res, &mask);
29437 /* if (!isless (xa, TWO52)) goto label; */
29438 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29440 /* xa2 = xa + TWO52 - TWO52; */
29441 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29442 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29444 /* dxa = xa2 - xa; */
29445 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29447 /* generate 0.5, 1.0 and -0.5 */
29448 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29449 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29450 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29451 0, OPTAB_DIRECT);
29453 /* Compensate. */
29454 tmp = gen_reg_rtx (mode);
29455 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29456 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29457 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29458 gen_rtx_AND (mode, one, tmp)));
29459 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29460 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29461 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29462 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29463 gen_rtx_AND (mode, one, tmp)));
29464 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29466 /* res = copysign (xa2, operand1) */
29467 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29469 emit_label (label);
29470 LABEL_NUSES (label) = 1;
29472 emit_move_insn (operand0, res);
29475 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29476 into OPERAND0. */
29477 void
29478 ix86_expand_trunc (rtx operand0, rtx operand1)
29480 /* C code for SSE variant we expand below.
29481 double xa = fabs (x), x2;
29482 if (!isless (xa, TWO52))
29483 return x;
29484 x2 = (double)(long)x;
29485 if (HONOR_SIGNED_ZEROS (mode))
29486 return copysign (x2, x);
29487 return x2;
29489 enum machine_mode mode = GET_MODE (operand0);
29490 rtx xa, xi, TWO52, label, res, mask;
29492 TWO52 = ix86_gen_TWO52 (mode);
29494 /* Temporary for holding the result, initialized to the input
29495 operand to ease control flow. */
29496 res = gen_reg_rtx (mode);
29497 emit_move_insn (res, operand1);
29499 /* xa = abs (operand1) */
29500 xa = ix86_expand_sse_fabs (res, &mask);
29502 /* if (!isless (xa, TWO52)) goto label; */
29503 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29505 /* x = (double)(long)x */
29506 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29507 expand_fix (xi, res, 0);
29508 expand_float (res, xi, 0);
29510 if (HONOR_SIGNED_ZEROS (mode))
29511 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29513 emit_label (label);
29514 LABEL_NUSES (label) = 1;
29516 emit_move_insn (operand0, res);
29519 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29520 into OPERAND0. */
29521 void
29522 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29524 enum machine_mode mode = GET_MODE (operand0);
29525 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29527 /* C code for SSE variant we expand below.
29528 double xa = fabs (x), x2;
29529 if (!isless (xa, TWO52))
29530 return x;
29531 xa2 = xa + TWO52 - TWO52;
29532 Compensate:
29533 if (xa2 > xa)
29534 xa2 -= 1.0;
29535 x2 = copysign (xa2, x);
29536 return x2;
29539 TWO52 = ix86_gen_TWO52 (mode);
29541 /* Temporary for holding the result, initialized to the input
29542 operand to ease control flow. */
29543 res = gen_reg_rtx (mode);
29544 emit_move_insn (res, operand1);
29546 /* xa = abs (operand1) */
29547 xa = ix86_expand_sse_fabs (res, &smask);
29549 /* if (!isless (xa, TWO52)) goto label; */
29550 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29552 /* res = xa + TWO52 - TWO52; */
29553 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29554 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29555 emit_move_insn (res, tmp);
29557 /* generate 1.0 */
29558 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29560 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29561 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29562 emit_insn (gen_rtx_SET (VOIDmode, mask,
29563 gen_rtx_AND (mode, mask, one)));
29564 tmp = expand_simple_binop (mode, MINUS,
29565 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29566 emit_move_insn (res, tmp);
29568 /* res = copysign (res, operand1) */
29569 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29571 emit_label (label);
29572 LABEL_NUSES (label) = 1;
29574 emit_move_insn (operand0, res);
29577 /* Expand SSE sequence for computing round from OPERAND1 storing
29578 into OPERAND0. */
29579 void
29580 ix86_expand_round (rtx operand0, rtx operand1)
29582 /* C code for the stuff we're doing below:
29583 double xa = fabs (x);
29584 if (!isless (xa, TWO52))
29585 return x;
29586 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29587 return copysign (xa, x);
29589 enum machine_mode mode = GET_MODE (operand0);
29590 rtx res, TWO52, xa, label, xi, half, mask;
29591 const struct real_format *fmt;
29592 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29594 /* Temporary for holding the result, initialized to the input
29595 operand to ease control flow. */
29596 res = gen_reg_rtx (mode);
29597 emit_move_insn (res, operand1);
29599 TWO52 = ix86_gen_TWO52 (mode);
29600 xa = ix86_expand_sse_fabs (res, &mask);
29601 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29603 /* load nextafter (0.5, 0.0) */
29604 fmt = REAL_MODE_FORMAT (mode);
29605 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29606 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29608 /* xa = xa + 0.5 */
29609 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29610 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29612 /* xa = (double)(int64_t)xa */
29613 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29614 expand_fix (xi, xa, 0);
29615 expand_float (xa, xi, 0);
29617 /* res = copysign (xa, operand1) */
29618 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29620 emit_label (label);
29621 LABEL_NUSES (label) = 1;
29623 emit_move_insn (operand0, res);
29627 /* Validate whether a SSE5 instruction is valid or not.
29628 OPERANDS is the array of operands.
29629 NUM is the number of operands.
29630 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29631 NUM_MEMORY is the maximum number of memory operands to accept.
29632 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29634 bool
29635 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29636 bool uses_oc0, int num_memory, bool commutative)
29638 int mem_mask;
29639 int mem_count;
29640 int i;
29642 /* Count the number of memory arguments */
29643 mem_mask = 0;
29644 mem_count = 0;
29645 for (i = 0; i < num; i++)
29647 enum machine_mode mode = GET_MODE (operands[i]);
29648 if (register_operand (operands[i], mode))
29651 else if (memory_operand (operands[i], mode))
29653 mem_mask |= (1 << i);
29654 mem_count++;
29657 else
29659 rtx pattern = PATTERN (insn);
29661 /* allow 0 for pcmov */
29662 if (GET_CODE (pattern) != SET
29663 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29664 || i < 2
29665 || operands[i] != CONST0_RTX (mode))
29666 return false;
29670 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29671 a memory operation. */
29672 if (num_memory < 0)
29674 num_memory = -num_memory;
29675 if ((mem_mask & (1 << (num-1))) != 0)
29677 mem_mask &= ~(1 << (num-1));
29678 mem_count--;
29682 /* If there were no memory operations, allow the insn */
29683 if (mem_mask == 0)
29684 return true;
29686 /* Do not allow the destination register to be a memory operand. */
29687 else if (mem_mask & (1 << 0))
29688 return false;
29690 /* If there are too many memory operations, disallow the instruction. While
29691 the hardware only allows 1 memory reference, before register allocation
29692 for some insns, we allow two memory operations sometimes in order to allow
29693 code like the following to be optimized:
29695 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29697 or similar cases that are vectorized into using the fmaddss
29698 instruction. */
29699 else if (mem_count > num_memory)
29700 return false;
29702 /* Don't allow more than one memory operation if not optimizing. */
29703 else if (mem_count > 1 && !optimize)
29704 return false;
29706 else if (num == 4 && mem_count == 1)
29708 /* formats (destination is the first argument), example fmaddss:
29709 xmm1, xmm1, xmm2, xmm3/mem
29710 xmm1, xmm1, xmm2/mem, xmm3
29711 xmm1, xmm2, xmm3/mem, xmm1
29712 xmm1, xmm2/mem, xmm3, xmm1 */
29713 if (uses_oc0)
29714 return ((mem_mask == (1 << 1))
29715 || (mem_mask == (1 << 2))
29716 || (mem_mask == (1 << 3)));
29718 /* format, example pmacsdd:
29719 xmm1, xmm2, xmm3/mem, xmm1 */
29720 if (commutative)
29721 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29722 else
29723 return (mem_mask == (1 << 2));
29726 else if (num == 4 && num_memory == 2)
29728 /* If there are two memory operations, we can load one of the memory ops
29729 into the destination register. This is for optimizing the
29730 multiply/add ops, which the combiner has optimized both the multiply
29731 and the add insns to have a memory operation. We have to be careful
29732 that the destination doesn't overlap with the inputs. */
29733 rtx op0 = operands[0];
29735 if (reg_mentioned_p (op0, operands[1])
29736 || reg_mentioned_p (op0, operands[2])
29737 || reg_mentioned_p (op0, operands[3]))
29738 return false;
29740 /* formats (destination is the first argument), example fmaddss:
29741 xmm1, xmm1, xmm2, xmm3/mem
29742 xmm1, xmm1, xmm2/mem, xmm3
29743 xmm1, xmm2, xmm3/mem, xmm1
29744 xmm1, xmm2/mem, xmm3, xmm1
29746 For the oc0 case, we will load either operands[1] or operands[3] into
29747 operands[0], so any combination of 2 memory operands is ok. */
29748 if (uses_oc0)
29749 return true;
29751 /* format, example pmacsdd:
29752 xmm1, xmm2, xmm3/mem, xmm1
29754 For the integer multiply/add instructions be more restrictive and
29755 require operands[2] and operands[3] to be the memory operands. */
29756 if (commutative)
29757 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29758 else
29759 return (mem_mask == ((1 << 2) | (1 << 3)));
29762 else if (num == 3 && num_memory == 1)
29764 /* formats, example protb:
29765 xmm1, xmm2, xmm3/mem
29766 xmm1, xmm2/mem, xmm3 */
29767 if (uses_oc0)
29768 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29770 /* format, example comeq:
29771 xmm1, xmm2, xmm3/mem */
29772 else
29773 return (mem_mask == (1 << 2));
29776 else
29777 gcc_unreachable ();
29779 return false;
29783 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29784 hardware will allow by using the destination register to load one of the
29785 memory operations. Presently this is used by the multiply/add routines to
29786 allow 2 memory references. */
29788 void
29789 ix86_expand_sse5_multiple_memory (rtx operands[],
29790 int num,
29791 enum machine_mode mode)
29793 rtx op0 = operands[0];
29794 if (num != 4
29795 || memory_operand (op0, mode)
29796 || reg_mentioned_p (op0, operands[1])
29797 || reg_mentioned_p (op0, operands[2])
29798 || reg_mentioned_p (op0, operands[3]))
29799 gcc_unreachable ();
29801 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29802 the destination register. */
29803 if (memory_operand (operands[1], mode))
29805 emit_move_insn (op0, operands[1]);
29806 operands[1] = op0;
29808 else if (memory_operand (operands[3], mode))
29810 emit_move_insn (op0, operands[3]);
29811 operands[3] = op0;
29813 else
29814 gcc_unreachable ();
29816 return;
29820 /* Table of valid machine attributes. */
29821 static const struct attribute_spec ix86_attribute_table[] =
29823 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29824 /* Stdcall attribute says callee is responsible for popping arguments
29825 if they are not variable. */
29826 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29827 /* Fastcall attribute says callee is responsible for popping arguments
29828 if they are not variable. */
29829 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29830 /* Cdecl attribute says the callee is a normal C declaration */
29831 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29832 /* Regparm attribute specifies how many integer arguments are to be
29833 passed in registers. */
29834 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29835 /* Sseregparm attribute says we are using x86_64 calling conventions
29836 for FP arguments. */
29837 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29838 /* force_align_arg_pointer says this function realigns the stack at entry. */
29839 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29840 false, true, true, ix86_handle_cconv_attribute },
29841 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29842 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29843 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29844 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29845 #endif
29846 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29847 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29848 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29849 SUBTARGET_ATTRIBUTE_TABLE,
29850 #endif
29851 /* ms_abi and sysv_abi calling convention function attributes. */
29852 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29853 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29854 /* End element. */
29855 { NULL, 0, 0, false, false, false, NULL }
29858 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29859 static int
29860 x86_builtin_vectorization_cost (bool runtime_test)
29862 /* If the branch of the runtime test is taken - i.e. - the vectorized
29863 version is skipped - this incurs a misprediction cost (because the
29864 vectorized version is expected to be the fall-through). So we subtract
29865 the latency of a mispredicted branch from the costs that are incured
29866 when the vectorized version is executed.
29868 TODO: The values in individual target tables have to be tuned or new
29869 fields may be needed. For eg. on K8, the default branch path is the
29870 not-taken path. If the taken path is predicted correctly, the minimum
29871 penalty of going down the taken-path is 1 cycle. If the taken-path is
29872 not predicted correctly, then the minimum penalty is 10 cycles. */
29874 if (runtime_test)
29876 return (-(ix86_cost->cond_taken_branch_cost));
29878 else
29879 return 0;
29882 /* This function returns the calling abi specific va_list type node.
29883 It returns the FNDECL specific va_list type. */
29885 tree
29886 ix86_fn_abi_va_list (tree fndecl)
29888 if (!TARGET_64BIT)
29889 return va_list_type_node;
29890 gcc_assert (fndecl != NULL_TREE);
29892 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
29893 return ms_va_list_type_node;
29894 else
29895 return sysv_va_list_type_node;
29898 /* Returns the canonical va_list type specified by TYPE. If there
29899 is no valid TYPE provided, it return NULL_TREE. */
29901 tree
29902 ix86_canonical_va_list_type (tree type)
29904 tree wtype, htype;
29906 /* Resolve references and pointers to va_list type. */
29907 if (INDIRECT_REF_P (type))
29908 type = TREE_TYPE (type);
29909 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29910 type = TREE_TYPE (type);
29912 if (TARGET_64BIT)
29914 wtype = va_list_type_node;
29915 gcc_assert (wtype != NULL_TREE);
29916 htype = type;
29917 if (TREE_CODE (wtype) == ARRAY_TYPE)
29919 /* If va_list is an array type, the argument may have decayed
29920 to a pointer type, e.g. by being passed to another function.
29921 In that case, unwrap both types so that we can compare the
29922 underlying records. */
29923 if (TREE_CODE (htype) == ARRAY_TYPE
29924 || POINTER_TYPE_P (htype))
29926 wtype = TREE_TYPE (wtype);
29927 htype = TREE_TYPE (htype);
29930 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29931 return va_list_type_node;
29932 wtype = sysv_va_list_type_node;
29933 gcc_assert (wtype != NULL_TREE);
29934 htype = type;
29935 if (TREE_CODE (wtype) == ARRAY_TYPE)
29937 /* If va_list is an array type, the argument may have decayed
29938 to a pointer type, e.g. by being passed to another function.
29939 In that case, unwrap both types so that we can compare the
29940 underlying records. */
29941 if (TREE_CODE (htype) == ARRAY_TYPE
29942 || POINTER_TYPE_P (htype))
29944 wtype = TREE_TYPE (wtype);
29945 htype = TREE_TYPE (htype);
29948 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29949 return sysv_va_list_type_node;
29950 wtype = ms_va_list_type_node;
29951 gcc_assert (wtype != NULL_TREE);
29952 htype = type;
29953 if (TREE_CODE (wtype) == ARRAY_TYPE)
29955 /* If va_list is an array type, the argument may have decayed
29956 to a pointer type, e.g. by being passed to another function.
29957 In that case, unwrap both types so that we can compare the
29958 underlying records. */
29959 if (TREE_CODE (htype) == ARRAY_TYPE
29960 || POINTER_TYPE_P (htype))
29962 wtype = TREE_TYPE (wtype);
29963 htype = TREE_TYPE (htype);
29966 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29967 return ms_va_list_type_node;
29968 return NULL_TREE;
29970 return std_canonical_va_list_type (type);
29973 /* Iterate through the target-specific builtin types for va_list.
29974 IDX denotes the iterator, *PTREE is set to the result type of
29975 the va_list builtin, and *PNAME to its internal type.
29976 Returns zero if there is no element for this index, otherwise
29977 IDX should be increased upon the next call.
29978 Note, do not iterate a base builtin's name like __builtin_va_list.
29979 Used from c_common_nodes_and_builtins. */
29982 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29984 if (!TARGET_64BIT)
29985 return 0;
29986 switch (idx) {
29987 case 0:
29988 *ptree = ms_va_list_type_node;
29989 *pname = "__builtin_ms_va_list";
29990 break;
29991 case 1:
29992 *ptree = sysv_va_list_type_node;
29993 *pname = "__builtin_sysv_va_list";
29994 break;
29995 default:
29996 return 0;
29998 return 1;
30001 /* Initialize the GCC target structure. */
30002 #undef TARGET_RETURN_IN_MEMORY
30003 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30005 #undef TARGET_ATTRIBUTE_TABLE
30006 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30007 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30008 # undef TARGET_MERGE_DECL_ATTRIBUTES
30009 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30010 #endif
30012 #undef TARGET_COMP_TYPE_ATTRIBUTES
30013 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30015 #undef TARGET_INIT_BUILTINS
30016 #define TARGET_INIT_BUILTINS ix86_init_builtins
30017 #undef TARGET_EXPAND_BUILTIN
30018 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30020 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30021 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30022 ix86_builtin_vectorized_function
30024 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30025 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30027 #undef TARGET_BUILTIN_RECIPROCAL
30028 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30030 #undef TARGET_ASM_FUNCTION_EPILOGUE
30031 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30033 #undef TARGET_ENCODE_SECTION_INFO
30034 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30035 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30036 #else
30037 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30038 #endif
30040 #undef TARGET_ASM_OPEN_PAREN
30041 #define TARGET_ASM_OPEN_PAREN ""
30042 #undef TARGET_ASM_CLOSE_PAREN
30043 #define TARGET_ASM_CLOSE_PAREN ""
30045 #undef TARGET_ASM_ALIGNED_HI_OP
30046 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30047 #undef TARGET_ASM_ALIGNED_SI_OP
30048 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30049 #ifdef ASM_QUAD
30050 #undef TARGET_ASM_ALIGNED_DI_OP
30051 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30052 #endif
30054 #undef TARGET_ASM_UNALIGNED_HI_OP
30055 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30056 #undef TARGET_ASM_UNALIGNED_SI_OP
30057 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30058 #undef TARGET_ASM_UNALIGNED_DI_OP
30059 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30061 #undef TARGET_SCHED_ADJUST_COST
30062 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30063 #undef TARGET_SCHED_ISSUE_RATE
30064 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30065 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30066 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30067 ia32_multipass_dfa_lookahead
30069 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30070 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30072 #ifdef HAVE_AS_TLS
30073 #undef TARGET_HAVE_TLS
30074 #define TARGET_HAVE_TLS true
30075 #endif
30076 #undef TARGET_CANNOT_FORCE_CONST_MEM
30077 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30078 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30079 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30081 #undef TARGET_DELEGITIMIZE_ADDRESS
30082 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30084 #undef TARGET_MS_BITFIELD_LAYOUT_P
30085 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30087 #if TARGET_MACHO
30088 #undef TARGET_BINDS_LOCAL_P
30089 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30090 #endif
30091 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30092 #undef TARGET_BINDS_LOCAL_P
30093 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30094 #endif
30096 #undef TARGET_ASM_OUTPUT_MI_THUNK
30097 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30098 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30099 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30101 #undef TARGET_ASM_FILE_START
30102 #define TARGET_ASM_FILE_START x86_file_start
30104 #undef TARGET_DEFAULT_TARGET_FLAGS
30105 #define TARGET_DEFAULT_TARGET_FLAGS \
30106 (TARGET_DEFAULT \
30107 | TARGET_SUBTARGET_DEFAULT \
30108 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30110 #undef TARGET_HANDLE_OPTION
30111 #define TARGET_HANDLE_OPTION ix86_handle_option
30113 #undef TARGET_RTX_COSTS
30114 #define TARGET_RTX_COSTS ix86_rtx_costs
30115 #undef TARGET_ADDRESS_COST
30116 #define TARGET_ADDRESS_COST ix86_address_cost
30118 #undef TARGET_FIXED_CONDITION_CODE_REGS
30119 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30120 #undef TARGET_CC_MODES_COMPATIBLE
30121 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30123 #undef TARGET_MACHINE_DEPENDENT_REORG
30124 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30126 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30127 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30129 #undef TARGET_BUILD_BUILTIN_VA_LIST
30130 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30132 #undef TARGET_FN_ABI_VA_LIST
30133 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30135 #undef TARGET_CANONICAL_VA_LIST_TYPE
30136 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30138 #undef TARGET_EXPAND_BUILTIN_VA_START
30139 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30141 #undef TARGET_MD_ASM_CLOBBERS
30142 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30144 #undef TARGET_PROMOTE_PROTOTYPES
30145 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30146 #undef TARGET_STRUCT_VALUE_RTX
30147 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30148 #undef TARGET_SETUP_INCOMING_VARARGS
30149 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30150 #undef TARGET_MUST_PASS_IN_STACK
30151 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30152 #undef TARGET_PASS_BY_REFERENCE
30153 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30154 #undef TARGET_INTERNAL_ARG_POINTER
30155 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30156 #undef TARGET_UPDATE_STACK_BOUNDARY
30157 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30158 #undef TARGET_GET_DRAP_RTX
30159 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30160 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
30161 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
30162 #undef TARGET_STRICT_ARGUMENT_NAMING
30163 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30165 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30166 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30168 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30169 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30171 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30172 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30174 #undef TARGET_C_MODE_FOR_SUFFIX
30175 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30177 #ifdef HAVE_AS_TLS
30178 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30179 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30180 #endif
30182 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30183 #undef TARGET_INSERT_ATTRIBUTES
30184 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30185 #endif
30187 #undef TARGET_MANGLE_TYPE
30188 #define TARGET_MANGLE_TYPE ix86_mangle_type
30190 #undef TARGET_STACK_PROTECT_FAIL
30191 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30193 #undef TARGET_FUNCTION_VALUE
30194 #define TARGET_FUNCTION_VALUE ix86_function_value
30196 #undef TARGET_SECONDARY_RELOAD
30197 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30199 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30200 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30202 #undef TARGET_SET_CURRENT_FUNCTION
30203 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30205 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30206 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30208 #undef TARGET_OPTION_SAVE
30209 #define TARGET_OPTION_SAVE ix86_function_specific_save
30211 #undef TARGET_OPTION_RESTORE
30212 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30214 #undef TARGET_OPTION_PRINT
30215 #define TARGET_OPTION_PRINT ix86_function_specific_print
30217 #undef TARGET_OPTION_CAN_INLINE_P
30218 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30220 #undef TARGET_EXPAND_TO_RTL_HOOK
30221 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30223 struct gcc_target targetm = TARGET_INITIALIZER;
30225 #include "gt-i386.h"