PR target/38900
[official-gcc.git] / gcc / config / i386 / i386.c
blob15a73d8655168220549399647efb4294ca7366be
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "gimple.h"
51 #include "elf/dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55 #include "cselib.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
62 #endif
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
70 : 4)
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78 const
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
97 0, /* "large" insn */
98 2, /* MOVE_RATIO */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 2, /* Branch cost */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
149 static const
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
169 3, /* MOVE_RATIO */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
195 1, /* Branch cost */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
219 static const
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
239 3, /* MOVE_RATIO */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
267 1, /* Branch cost */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
291 static const
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
311 6, /* MOVE_RATIO */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
337 2, /* Branch cost */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
361 static const
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
381 6, /* MOVE_RATIO */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
438 static const
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
458 4, /* MOVE_RATIO */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
485 1, /* Branch cost */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
509 static const
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
529 4, /* MOVE_RATIO */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
558 1, /* Branch cost */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 static const
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
602 9, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
628 5, /* Branch cost */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
655 static const
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
675 9, /* MOVE_RATIO */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
704 time). */
705 100, /* number of parallel prefetches */
706 3, /* Branch cost */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
753 9, /* MOVE_RATIO */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
775 /* On K8
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
778 On AMDFAM10
779 MOVD reg64, xmmreg Double FADD 3
780 1/1 1/1
781 MOVD reg32, xmmreg Double FADD 3
782 1/1 1/1 */
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
790 time). */
791 100, /* number of parallel prefetches */
792 2, /* Branch cost */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
821 static const
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
841 6, /* MOVE_RATIO */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
867 2, /* Branch cost */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
877 {-1, libcall}}},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
892 static const
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
912 17, /* MOVE_RATIO */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
938 1, /* Branch cost */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
949 {-1, libcall}}},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
965 static const
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
985 16, /* MOVE_RATIO */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1038 static const
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1112 static const
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1189 static const
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1304 m_486 | m_PENT,
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1320 ~m_386,
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1339 m_PPRO,
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1351 m_K6,
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1357 m_PENT4,
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1360 m_PPRO,
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1363 ~m_PENT,
1365 /* X86_TUNE_READ_MODIFY */
1366 ~(m_PENT | m_PPRO),
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1385 ~m_PPRO,
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1391 m_PPRO,
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1403 | m_GENERIC,
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1427 | m_AMDFAM10,
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1430 m_AMDFAM10,
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1436 m_ATHLON_K8,
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1439 m_AMD_MULTIPLE,
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1454 ~m_486,
1456 /* X86_TUNE_USE_FFREEP */
1457 m_AMD_MULTIPLE,
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1463 ~(m_AMDFAM10),
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1468 | m_GENERIC,
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1472 | m_GENERIC,
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1488 ~m_K8,
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1491 m_K8 | m_GENERIC64,
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1495 ~(m_386 | m_486),
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1502 machines. */
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1506 than a MOV. */
1507 m_PENT,
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1511 m_PENT,
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1516 m_K6,
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1519 from FP to FP. */
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1524 m_AMDFAM10,
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1529 m_CORE2,
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1533 m_ATOM,
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1546 ~m_386,
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1549 ~(m_386 | m_486),
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1552 ~m_386,
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1555 ~m_386,
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1560 | m_GENERIC;
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1570 epilogue code. */
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1587 /* FP registers */
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1590 /* arg pointer */
1591 NON_Q_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1594 /* SSE registers */
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1596 SSE_REGS, SSE_REGS,
1597 /* MMX registers */
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1599 MMX_REGS, MMX_REGS,
1600 /* REX registers */
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1605 SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1678 numbers.
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1726 unsigned short n;
1727 rtx rtl;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1734 [arguments]
1735 <- ARG_POINTER
1736 saved pc
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1740 [saved regs]
1742 [padding0]
1744 [saved SSE regs]
1746 [padding1] \
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1750 [frame] (
1752 [padding2] /
1754 struct ix86_frame
1756 int padding0;
1757 int nsseregs;
1758 int nregs;
1759 int padding1;
1760 int va_arg_size;
1761 HOST_WIDE_INT frame;
1762 int padding2;
1763 int outgoing_arguments_size;
1764 int red_zone_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1779 /* Asm dialect. */
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1781 /* TLS dialects. */
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1820 command line. */
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1849 tree x86_mfence;
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1860 X86_64_NO_CLASS,
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1863 X86_64_SSE_CLASS,
1864 X86_64_SSESF_CLASS,
1865 X86_64_SSEDF_CLASS,
1866 X86_64_SSEUP_CLASS,
1867 X86_64_X87_CLASS,
1868 X86_64_X87UP_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1870 X86_64_MEMORY_CLASS
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1885 rtx, rtx, int);
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1912 in memory. */
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1915 #endif
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1952 as -msse4.2. */
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1973 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1975 /* Define a set of ISAs which aren't available when a given ISA is
1976 disabled. MMX and SSE ISAs are handled separately. */
1978 #define OPTION_MASK_ISA_MMX_UNSET \
1979 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_UNSET \
1981 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1982 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1984 #define OPTION_MASK_ISA_SSE_UNSET \
1985 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1986 #define OPTION_MASK_ISA_SSE2_UNSET \
1987 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1988 #define OPTION_MASK_ISA_SSE3_UNSET \
1989 (OPTION_MASK_ISA_SSE3 \
1990 | OPTION_MASK_ISA_SSSE3_UNSET \
1991 | OPTION_MASK_ISA_SSE4A_UNSET )
1992 #define OPTION_MASK_ISA_SSSE3_UNSET \
1993 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1995 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1996 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1997 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1998 #define OPTION_MASK_ISA_AVX_UNSET \
1999 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
2000 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2002 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2003 as -mno-sse4.1. */
2004 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2006 #define OPTION_MASK_ISA_SSE4A_UNSET \
2007 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2008 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2009 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2010 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2011 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2012 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2013 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2014 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2015 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2018 /* Vectorization library interface and handlers. */
2019 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2020 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2021 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2023 /* Processor target table, indexed by processor number */
2024 struct ptt
2026 const struct processor_costs *cost; /* Processor costs */
2027 const int align_loop; /* Default alignments. */
2028 const int align_loop_max_skip;
2029 const int align_jump;
2030 const int align_jump_max_skip;
2031 const int align_func;
2034 static const struct ptt processor_target_table[PROCESSOR_max] =
2036 {&i386_cost, 4, 3, 4, 3, 4},
2037 {&i486_cost, 16, 15, 16, 15, 16},
2038 {&pentium_cost, 16, 7, 16, 7, 16},
2039 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2040 {&geode_cost, 0, 0, 0, 0, 0},
2041 {&k6_cost, 32, 7, 32, 7, 32},
2042 {&athlon_cost, 16, 7, 16, 7, 16},
2043 {&pentium4_cost, 0, 0, 0, 0, 0},
2044 {&k8_cost, 16, 7, 16, 7, 16},
2045 {&nocona_cost, 0, 0, 0, 0, 0},
2046 {&core2_cost, 16, 10, 16, 10, 16},
2047 {&generic32_cost, 16, 7, 16, 7, 16},
2048 {&generic64_cost, 16, 10, 16, 10, 16},
2049 {&amdfam10_cost, 32, 24, 32, 7, 32},
2050 {&atom_cost, 16, 7, 16, 7, 16}
2053 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2055 "generic",
2056 "i386",
2057 "i486",
2058 "pentium",
2059 "pentium-mmx",
2060 "pentiumpro",
2061 "pentium2",
2062 "pentium3",
2063 "pentium4",
2064 "pentium-m",
2065 "prescott",
2066 "nocona",
2067 "core2",
2068 "atom",
2069 "geode",
2070 "k6",
2071 "k6-2",
2072 "k6-3",
2073 "athlon",
2074 "athlon-4",
2075 "k8",
2076 "amdfam10"
2079 /* Implement TARGET_HANDLE_OPTION. */
2081 static bool
2082 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2084 switch (code)
2086 case OPT_mmmx:
2087 if (value)
2089 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2090 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2092 else
2094 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2095 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2097 return true;
2099 case OPT_m3dnow:
2100 if (value)
2102 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2103 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2105 else
2107 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2108 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2110 return true;
2112 case OPT_m3dnowa:
2113 return false;
2115 case OPT_msse:
2116 if (value)
2118 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2121 else
2123 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2124 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2126 return true;
2128 case OPT_msse2:
2129 if (value)
2131 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2132 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2134 else
2136 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2139 return true;
2141 case OPT_msse3:
2142 if (value)
2144 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2145 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2147 else
2149 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2152 return true;
2154 case OPT_mssse3:
2155 if (value)
2157 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2158 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2160 else
2162 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2165 return true;
2167 case OPT_msse4_1:
2168 if (value)
2170 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2171 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2173 else
2175 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2178 return true;
2180 case OPT_msse4_2:
2181 if (value)
2183 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2184 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2186 else
2188 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2191 return true;
2193 case OPT_mavx:
2194 if (value)
2196 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2197 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2199 else
2201 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2202 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2204 return true;
2206 case OPT_mfma:
2207 if (value)
2209 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2210 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2212 else
2214 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2217 return true;
2219 case OPT_msse4:
2220 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2222 return true;
2224 case OPT_mno_sse4:
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2227 return true;
2229 case OPT_msse4a:
2230 if (value)
2232 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2235 else
2237 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2238 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2240 return true;
2242 case OPT_msse5:
2243 if (value)
2245 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2248 else
2250 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2251 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2253 return true;
2255 case OPT_mabm:
2256 if (value)
2258 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2259 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2261 else
2263 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2264 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2266 return true;
2268 case OPT_mpopcnt:
2269 if (value)
2271 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2272 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2274 else
2276 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2277 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2279 return true;
2281 case OPT_msahf:
2282 if (value)
2284 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2285 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2287 else
2289 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2290 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2292 return true;
2294 case OPT_mcx16:
2295 if (value)
2297 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2298 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2300 else
2302 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2303 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2305 return true;
2307 case OPT_mmovbe:
2308 if (value)
2310 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2311 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2313 else
2315 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2316 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2318 return true;
2320 case OPT_mcrc32:
2321 if (value)
2323 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2324 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2326 else
2328 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2329 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2331 return true;
2333 case OPT_maes:
2334 if (value)
2336 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2337 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2339 else
2341 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2342 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2344 return true;
2346 case OPT_mpclmul:
2347 if (value)
2349 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2350 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2352 else
2354 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2357 return true;
2359 default:
2360 return true;
2364 /* Return a string the documents the current -m options. The caller is
2365 responsible for freeing the string. */
2367 static char *
2368 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2369 const char *fpmath, bool add_nl_p)
2371 struct ix86_target_opts
2373 const char *option; /* option string */
2374 int mask; /* isa mask options */
2377 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2378 preceding options while match those first. */
2379 static struct ix86_target_opts isa_opts[] =
2381 { "-m64", OPTION_MASK_ISA_64BIT },
2382 { "-msse5", OPTION_MASK_ISA_SSE5 },
2383 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2384 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2385 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2386 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2387 { "-msse3", OPTION_MASK_ISA_SSE3 },
2388 { "-msse2", OPTION_MASK_ISA_SSE2 },
2389 { "-msse", OPTION_MASK_ISA_SSE },
2390 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2391 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2392 { "-mmmx", OPTION_MASK_ISA_MMX },
2393 { "-mabm", OPTION_MASK_ISA_ABM },
2394 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2395 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2396 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2397 { "-maes", OPTION_MASK_ISA_AES },
2398 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2401 /* Flag options. */
2402 static struct ix86_target_opts flag_opts[] =
2404 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2405 { "-m80387", MASK_80387 },
2406 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2407 { "-malign-double", MASK_ALIGN_DOUBLE },
2408 { "-mcld", MASK_CLD },
2409 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2410 { "-mieee-fp", MASK_IEEE_FP },
2411 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2412 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2413 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2414 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2415 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2416 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2417 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2418 { "-mno-red-zone", MASK_NO_RED_ZONE },
2419 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2420 { "-mrecip", MASK_RECIP },
2421 { "-mrtd", MASK_RTD },
2422 { "-msseregparm", MASK_SSEREGPARM },
2423 { "-mstack-arg-probe", MASK_STACK_PROBE },
2424 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2427 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2429 char isa_other[40];
2430 char target_other[40];
2431 unsigned num = 0;
2432 unsigned i, j;
2433 char *ret;
2434 char *ptr;
2435 size_t len;
2436 size_t line_len;
2437 size_t sep_len;
2439 memset (opts, '\0', sizeof (opts));
2441 /* Add -march= option. */
2442 if (arch)
2444 opts[num][0] = "-march=";
2445 opts[num++][1] = arch;
2448 /* Add -mtune= option. */
2449 if (tune)
2451 opts[num][0] = "-mtune=";
2452 opts[num++][1] = tune;
2455 /* Pick out the options in isa options. */
2456 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2458 if ((isa & isa_opts[i].mask) != 0)
2460 opts[num++][0] = isa_opts[i].option;
2461 isa &= ~ isa_opts[i].mask;
2465 if (isa && add_nl_p)
2467 opts[num++][0] = isa_other;
2468 sprintf (isa_other, "(other isa: 0x%x)", isa);
2471 /* Add flag options. */
2472 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2474 if ((flags & flag_opts[i].mask) != 0)
2476 opts[num++][0] = flag_opts[i].option;
2477 flags &= ~ flag_opts[i].mask;
2481 if (flags && add_nl_p)
2483 opts[num++][0] = target_other;
2484 sprintf (target_other, "(other flags: 0x%x)", isa);
2487 /* Add -fpmath= option. */
2488 if (fpmath)
2490 opts[num][0] = "-mfpmath=";
2491 opts[num++][1] = fpmath;
2494 /* Any options? */
2495 if (num == 0)
2496 return NULL;
2498 gcc_assert (num < ARRAY_SIZE (opts));
2500 /* Size the string. */
2501 len = 0;
2502 sep_len = (add_nl_p) ? 3 : 1;
2503 for (i = 0; i < num; i++)
2505 len += sep_len;
2506 for (j = 0; j < 2; j++)
2507 if (opts[i][j])
2508 len += strlen (opts[i][j]);
2511 /* Build the string. */
2512 ret = ptr = (char *) xmalloc (len);
2513 line_len = 0;
2515 for (i = 0; i < num; i++)
2517 size_t len2[2];
2519 for (j = 0; j < 2; j++)
2520 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2522 if (i != 0)
2524 *ptr++ = ' ';
2525 line_len++;
2527 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2529 *ptr++ = '\\';
2530 *ptr++ = '\n';
2531 line_len = 0;
2535 for (j = 0; j < 2; j++)
2536 if (opts[i][j])
2538 memcpy (ptr, opts[i][j], len2[j]);
2539 ptr += len2[j];
2540 line_len += len2[j];
2544 *ptr = '\0';
2545 gcc_assert (ret + len >= ptr);
2547 return ret;
2550 /* Function that is callable from the debugger to print the current
2551 options. */
2552 void
2553 ix86_debug_options (void)
2555 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2556 ix86_arch_string, ix86_tune_string,
2557 ix86_fpmath_string, true);
2559 if (opts)
2561 fprintf (stderr, "%s\n\n", opts);
2562 free (opts);
2564 else
2565 fprintf (stderr, "<no options>\n\n");
2567 return;
2570 /* Sometimes certain combinations of command options do not make
2571 sense on a particular target machine. You can define a macro
2572 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2573 defined, is executed once just after all the command options have
2574 been parsed.
2576 Don't use this macro to turn on various extra optimizations for
2577 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2579 void
2580 override_options (bool main_args_p)
2582 int i;
2583 unsigned int ix86_arch_mask, ix86_tune_mask;
2584 const char *prefix;
2585 const char *suffix;
2586 const char *sw;
2588 /* Comes from final.c -- no real reason to change it. */
2589 #define MAX_CODE_ALIGN 16
2591 enum pta_flags
2593 PTA_SSE = 1 << 0,
2594 PTA_SSE2 = 1 << 1,
2595 PTA_SSE3 = 1 << 2,
2596 PTA_MMX = 1 << 3,
2597 PTA_PREFETCH_SSE = 1 << 4,
2598 PTA_3DNOW = 1 << 5,
2599 PTA_3DNOW_A = 1 << 6,
2600 PTA_64BIT = 1 << 7,
2601 PTA_SSSE3 = 1 << 8,
2602 PTA_CX16 = 1 << 9,
2603 PTA_POPCNT = 1 << 10,
2604 PTA_ABM = 1 << 11,
2605 PTA_SSE4A = 1 << 12,
2606 PTA_NO_SAHF = 1 << 13,
2607 PTA_SSE4_1 = 1 << 14,
2608 PTA_SSE4_2 = 1 << 15,
2609 PTA_SSE5 = 1 << 16,
2610 PTA_AES = 1 << 17,
2611 PTA_PCLMUL = 1 << 18,
2612 PTA_AVX = 1 << 19,
2613 PTA_FMA = 1 << 20,
2614 PTA_MOVBE = 1 << 21
2617 static struct pta
2619 const char *const name; /* processor name or nickname. */
2620 const enum processor_type processor;
2621 const enum attr_cpu schedule;
2622 const unsigned /*enum pta_flags*/ flags;
2624 const processor_alias_table[] =
2626 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2627 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2628 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2629 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2630 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2631 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2632 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2633 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2634 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2635 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2636 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2637 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2638 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2639 PTA_MMX | PTA_SSE},
2640 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2641 PTA_MMX | PTA_SSE},
2642 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2643 PTA_MMX | PTA_SSE | PTA_SSE2},
2644 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2645 PTA_MMX |PTA_SSE | PTA_SSE2},
2646 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2647 PTA_MMX | PTA_SSE | PTA_SSE2},
2648 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2649 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2650 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2651 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2652 | PTA_CX16 | PTA_NO_SAHF},
2653 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2654 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2655 | PTA_SSSE3 | PTA_CX16},
2656 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2657 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2658 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2659 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2660 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2661 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2662 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2663 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2664 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2665 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2666 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2667 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2668 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2669 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2670 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2671 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2672 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2673 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2674 {"x86-64", PROCESSOR_K8, CPU_K8,
2675 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2676 {"k8", PROCESSOR_K8, CPU_K8,
2677 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2678 | PTA_SSE2 | PTA_NO_SAHF},
2679 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2680 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2681 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2682 {"opteron", PROCESSOR_K8, CPU_K8,
2683 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2684 | PTA_SSE2 | PTA_NO_SAHF},
2685 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2686 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2687 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2688 {"athlon64", PROCESSOR_K8, CPU_K8,
2689 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2690 | PTA_SSE2 | PTA_NO_SAHF},
2691 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2692 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2693 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2694 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2695 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2696 | PTA_SSE2 | PTA_NO_SAHF},
2697 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2698 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2699 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2700 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2701 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2702 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2703 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2704 0 /* flags are only used for -march switch. */ },
2705 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2706 PTA_64BIT /* flags are only used for -march switch. */ },
2709 int const pta_size = ARRAY_SIZE (processor_alias_table);
2711 /* Set up prefix/suffix so the error messages refer to either the command
2712 line argument, or the attribute(target). */
2713 if (main_args_p)
2715 prefix = "-m";
2716 suffix = "";
2717 sw = "switch";
2719 else
2721 prefix = "option(\"";
2722 suffix = "\")";
2723 sw = "attribute";
2726 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2727 SUBTARGET_OVERRIDE_OPTIONS;
2728 #endif
2730 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2731 SUBSUBTARGET_OVERRIDE_OPTIONS;
2732 #endif
2734 /* -fPIC is the default for x86_64. */
2735 if (TARGET_MACHO && TARGET_64BIT)
2736 flag_pic = 2;
2738 /* Set the default values for switches whose default depends on TARGET_64BIT
2739 in case they weren't overwritten by command line options. */
2740 if (TARGET_64BIT)
2742 /* Mach-O doesn't support omitting the frame pointer for now. */
2743 if (flag_omit_frame_pointer == 2)
2744 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2745 if (flag_asynchronous_unwind_tables == 2)
2746 flag_asynchronous_unwind_tables = 1;
2747 if (flag_pcc_struct_return == 2)
2748 flag_pcc_struct_return = 0;
2750 else
2752 if (flag_omit_frame_pointer == 2)
2753 flag_omit_frame_pointer = 0;
2754 if (flag_asynchronous_unwind_tables == 2)
2755 flag_asynchronous_unwind_tables = 0;
2756 if (flag_pcc_struct_return == 2)
2757 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2760 /* Need to check -mtune=generic first. */
2761 if (ix86_tune_string)
2763 if (!strcmp (ix86_tune_string, "generic")
2764 || !strcmp (ix86_tune_string, "i686")
2765 /* As special support for cross compilers we read -mtune=native
2766 as -mtune=generic. With native compilers we won't see the
2767 -mtune=native, as it was changed by the driver. */
2768 || !strcmp (ix86_tune_string, "native"))
2770 if (TARGET_64BIT)
2771 ix86_tune_string = "generic64";
2772 else
2773 ix86_tune_string = "generic32";
2775 /* If this call is for setting the option attribute, allow the
2776 generic32/generic64 that was previously set. */
2777 else if (!main_args_p
2778 && (!strcmp (ix86_tune_string, "generic32")
2779 || !strcmp (ix86_tune_string, "generic64")))
2781 else if (!strncmp (ix86_tune_string, "generic", 7))
2782 error ("bad value (%s) for %stune=%s %s",
2783 ix86_tune_string, prefix, suffix, sw);
2785 else
2787 if (ix86_arch_string)
2788 ix86_tune_string = ix86_arch_string;
2789 if (!ix86_tune_string)
2791 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2792 ix86_tune_defaulted = 1;
2795 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2796 need to use a sensible tune option. */
2797 if (!strcmp (ix86_tune_string, "generic")
2798 || !strcmp (ix86_tune_string, "x86-64")
2799 || !strcmp (ix86_tune_string, "i686"))
2801 if (TARGET_64BIT)
2802 ix86_tune_string = "generic64";
2803 else
2804 ix86_tune_string = "generic32";
2807 if (ix86_stringop_string)
2809 if (!strcmp (ix86_stringop_string, "rep_byte"))
2810 stringop_alg = rep_prefix_1_byte;
2811 else if (!strcmp (ix86_stringop_string, "libcall"))
2812 stringop_alg = libcall;
2813 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2814 stringop_alg = rep_prefix_4_byte;
2815 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2816 && TARGET_64BIT)
2817 /* rep; movq isn't available in 32-bit code. */
2818 stringop_alg = rep_prefix_8_byte;
2819 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2820 stringop_alg = loop_1_byte;
2821 else if (!strcmp (ix86_stringop_string, "loop"))
2822 stringop_alg = loop;
2823 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2824 stringop_alg = unrolled_loop;
2825 else
2826 error ("bad value (%s) for %sstringop-strategy=%s %s",
2827 ix86_stringop_string, prefix, suffix, sw);
2829 if (!strcmp (ix86_tune_string, "x86-64"))
2830 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2831 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2832 prefix, suffix, prefix, suffix, prefix, suffix);
2834 if (!ix86_arch_string)
2835 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2836 else
2837 ix86_arch_specified = 1;
2839 if (!strcmp (ix86_arch_string, "generic"))
2840 error ("generic CPU can be used only for %stune=%s %s",
2841 prefix, suffix, sw);
2842 if (!strncmp (ix86_arch_string, "generic", 7))
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string, prefix, suffix, sw);
2846 /* Validate -mabi= value. */
2847 if (ix86_abi_string)
2849 if (strcmp (ix86_abi_string, "sysv") == 0)
2850 ix86_abi = SYSV_ABI;
2851 else if (strcmp (ix86_abi_string, "ms") == 0)
2852 ix86_abi = MS_ABI;
2853 else
2854 error ("unknown ABI (%s) for %sabi=%s %s",
2855 ix86_abi_string, prefix, suffix, sw);
2857 else
2858 ix86_abi = DEFAULT_ABI;
2860 if (ix86_cmodel_string != 0)
2862 if (!strcmp (ix86_cmodel_string, "small"))
2863 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2864 else if (!strcmp (ix86_cmodel_string, "medium"))
2865 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2866 else if (!strcmp (ix86_cmodel_string, "large"))
2867 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2868 else if (flag_pic)
2869 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2870 else if (!strcmp (ix86_cmodel_string, "32"))
2871 ix86_cmodel = CM_32;
2872 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2873 ix86_cmodel = CM_KERNEL;
2874 else
2875 error ("bad value (%s) for %scmodel=%s %s",
2876 ix86_cmodel_string, prefix, suffix, sw);
2878 else
2880 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2881 use of rip-relative addressing. This eliminates fixups that
2882 would otherwise be needed if this object is to be placed in a
2883 DLL, and is essentially just as efficient as direct addressing. */
2884 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2885 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2886 else if (TARGET_64BIT)
2887 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2888 else
2889 ix86_cmodel = CM_32;
2891 if (ix86_asm_string != 0)
2893 if (! TARGET_MACHO
2894 && !strcmp (ix86_asm_string, "intel"))
2895 ix86_asm_dialect = ASM_INTEL;
2896 else if (!strcmp (ix86_asm_string, "att"))
2897 ix86_asm_dialect = ASM_ATT;
2898 else
2899 error ("bad value (%s) for %sasm=%s %s",
2900 ix86_asm_string, prefix, suffix, sw);
2902 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2903 error ("code model %qs not supported in the %s bit mode",
2904 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2905 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2906 sorry ("%i-bit mode not compiled in",
2907 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2909 for (i = 0; i < pta_size; i++)
2910 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2912 ix86_schedule = processor_alias_table[i].schedule;
2913 ix86_arch = processor_alias_table[i].processor;
2914 /* Default cpu tuning to the architecture. */
2915 ix86_tune = ix86_arch;
2917 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2918 error ("CPU you selected does not support x86-64 "
2919 "instruction set");
2921 if (processor_alias_table[i].flags & PTA_MMX
2922 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2923 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2924 if (processor_alias_table[i].flags & PTA_3DNOW
2925 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2926 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2927 if (processor_alias_table[i].flags & PTA_3DNOW_A
2928 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2929 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2930 if (processor_alias_table[i].flags & PTA_SSE
2931 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2932 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2933 if (processor_alias_table[i].flags & PTA_SSE2
2934 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2935 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2936 if (processor_alias_table[i].flags & PTA_SSE3
2937 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2938 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2939 if (processor_alias_table[i].flags & PTA_SSSE3
2940 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2941 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2942 if (processor_alias_table[i].flags & PTA_SSE4_1
2943 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2944 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2945 if (processor_alias_table[i].flags & PTA_SSE4_2
2946 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2947 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2948 if (processor_alias_table[i].flags & PTA_AVX
2949 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2950 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2951 if (processor_alias_table[i].flags & PTA_FMA
2952 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2953 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2954 if (processor_alias_table[i].flags & PTA_SSE4A
2955 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2956 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2957 if (processor_alias_table[i].flags & PTA_SSE5
2958 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2959 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2960 if (processor_alias_table[i].flags & PTA_ABM
2961 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2962 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2963 if (processor_alias_table[i].flags & PTA_CX16
2964 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2965 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2966 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2967 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2968 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2969 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2970 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2971 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2972 if (processor_alias_table[i].flags & PTA_MOVBE
2973 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
2974 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
2975 if (processor_alias_table[i].flags & PTA_AES
2976 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2977 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2978 if (processor_alias_table[i].flags & PTA_PCLMUL
2979 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2980 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2981 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2982 x86_prefetch_sse = true;
2984 break;
2987 if (i == pta_size)
2988 error ("bad value (%s) for %sarch=%s %s",
2989 ix86_arch_string, prefix, suffix, sw);
2991 ix86_arch_mask = 1u << ix86_arch;
2992 for (i = 0; i < X86_ARCH_LAST; ++i)
2993 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2995 for (i = 0; i < pta_size; i++)
2996 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2998 ix86_schedule = processor_alias_table[i].schedule;
2999 ix86_tune = processor_alias_table[i].processor;
3000 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3002 if (ix86_tune_defaulted)
3004 ix86_tune_string = "x86-64";
3005 for (i = 0; i < pta_size; i++)
3006 if (! strcmp (ix86_tune_string,
3007 processor_alias_table[i].name))
3008 break;
3009 ix86_schedule = processor_alias_table[i].schedule;
3010 ix86_tune = processor_alias_table[i].processor;
3012 else
3013 error ("CPU you selected does not support x86-64 "
3014 "instruction set");
3016 /* Intel CPUs have always interpreted SSE prefetch instructions as
3017 NOPs; so, we can enable SSE prefetch instructions even when
3018 -mtune (rather than -march) points us to a processor that has them.
3019 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3020 higher processors. */
3021 if (TARGET_CMOVE
3022 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3023 x86_prefetch_sse = true;
3024 break;
3026 if (i == pta_size)
3027 error ("bad value (%s) for %stune=%s %s",
3028 ix86_tune_string, prefix, suffix, sw);
3030 ix86_tune_mask = 1u << ix86_tune;
3031 for (i = 0; i < X86_TUNE_LAST; ++i)
3032 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3034 if (optimize_size)
3035 ix86_cost = &ix86_size_cost;
3036 else
3037 ix86_cost = processor_target_table[ix86_tune].cost;
3039 /* Arrange to set up i386_stack_locals for all functions. */
3040 init_machine_status = ix86_init_machine_status;
3042 /* Validate -mregparm= value. */
3043 if (ix86_regparm_string)
3045 if (TARGET_64BIT)
3046 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3047 i = atoi (ix86_regparm_string);
3048 if (i < 0 || i > REGPARM_MAX)
3049 error ("%sregparm=%d%s is not between 0 and %d",
3050 prefix, i, suffix, REGPARM_MAX);
3051 else
3052 ix86_regparm = i;
3054 if (TARGET_64BIT)
3055 ix86_regparm = REGPARM_MAX;
3057 /* If the user has provided any of the -malign-* options,
3058 warn and use that value only if -falign-* is not set.
3059 Remove this code in GCC 3.2 or later. */
3060 if (ix86_align_loops_string)
3062 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3063 prefix, suffix, suffix);
3064 if (align_loops == 0)
3066 i = atoi (ix86_align_loops_string);
3067 if (i < 0 || i > MAX_CODE_ALIGN)
3068 error ("%salign-loops=%d%s is not between 0 and %d",
3069 prefix, i, suffix, MAX_CODE_ALIGN);
3070 else
3071 align_loops = 1 << i;
3075 if (ix86_align_jumps_string)
3077 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3078 prefix, suffix, suffix);
3079 if (align_jumps == 0)
3081 i = atoi (ix86_align_jumps_string);
3082 if (i < 0 || i > MAX_CODE_ALIGN)
3083 error ("%salign-loops=%d%s is not between 0 and %d",
3084 prefix, i, suffix, MAX_CODE_ALIGN);
3085 else
3086 align_jumps = 1 << i;
3090 if (ix86_align_funcs_string)
3092 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3093 prefix, suffix, suffix);
3094 if (align_functions == 0)
3096 i = atoi (ix86_align_funcs_string);
3097 if (i < 0 || i > MAX_CODE_ALIGN)
3098 error ("%salign-loops=%d%s is not between 0 and %d",
3099 prefix, i, suffix, MAX_CODE_ALIGN);
3100 else
3101 align_functions = 1 << i;
3105 /* Default align_* from the processor table. */
3106 if (align_loops == 0)
3108 align_loops = processor_target_table[ix86_tune].align_loop;
3109 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3111 if (align_jumps == 0)
3113 align_jumps = processor_target_table[ix86_tune].align_jump;
3114 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3116 if (align_functions == 0)
3118 align_functions = processor_target_table[ix86_tune].align_func;
3121 /* Validate -mbranch-cost= value, or provide default. */
3122 ix86_branch_cost = ix86_cost->branch_cost;
3123 if (ix86_branch_cost_string)
3125 i = atoi (ix86_branch_cost_string);
3126 if (i < 0 || i > 5)
3127 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3128 else
3129 ix86_branch_cost = i;
3131 if (ix86_section_threshold_string)
3133 i = atoi (ix86_section_threshold_string);
3134 if (i < 0)
3135 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3136 else
3137 ix86_section_threshold = i;
3140 if (ix86_tls_dialect_string)
3142 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3143 ix86_tls_dialect = TLS_DIALECT_GNU;
3144 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3145 ix86_tls_dialect = TLS_DIALECT_GNU2;
3146 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3147 ix86_tls_dialect = TLS_DIALECT_SUN;
3148 else
3149 error ("bad value (%s) for %stls-dialect=%s %s",
3150 ix86_tls_dialect_string, prefix, suffix, sw);
3153 if (ix87_precision_string)
3155 i = atoi (ix87_precision_string);
3156 if (i != 32 && i != 64 && i != 80)
3157 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3160 if (TARGET_64BIT)
3162 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3164 /* Enable by default the SSE and MMX builtins. Do allow the user to
3165 explicitly disable any of these. In particular, disabling SSE and
3166 MMX for kernel code is extremely useful. */
3167 if (!ix86_arch_specified)
3168 ix86_isa_flags
3169 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3170 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3172 if (TARGET_RTD)
3173 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3175 else
3177 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3179 if (!ix86_arch_specified)
3180 ix86_isa_flags
3181 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3183 /* i386 ABI does not specify red zone. It still makes sense to use it
3184 when programmer takes care to stack from being destroyed. */
3185 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3186 target_flags |= MASK_NO_RED_ZONE;
3189 /* Keep nonleaf frame pointers. */
3190 if (flag_omit_frame_pointer)
3191 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3192 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3193 flag_omit_frame_pointer = 1;
3195 /* If we're doing fast math, we don't care about comparison order
3196 wrt NaNs. This lets us use a shorter comparison sequence. */
3197 if (flag_finite_math_only)
3198 target_flags &= ~MASK_IEEE_FP;
3200 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3201 since the insns won't need emulation. */
3202 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3203 target_flags &= ~MASK_NO_FANCY_MATH_387;
3205 /* Likewise, if the target doesn't have a 387, or we've specified
3206 software floating point, don't use 387 inline intrinsics. */
3207 if (!TARGET_80387)
3208 target_flags |= MASK_NO_FANCY_MATH_387;
3210 /* Turn on MMX builtins for -msse. */
3211 if (TARGET_SSE)
3213 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3214 x86_prefetch_sse = true;
3217 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3218 if (TARGET_SSE4_2 || TARGET_ABM)
3219 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3221 /* Validate -mpreferred-stack-boundary= value or default it to
3222 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3223 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3224 if (ix86_preferred_stack_boundary_string)
3226 i = atoi (ix86_preferred_stack_boundary_string);
3227 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3228 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3229 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3230 else
3231 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3234 /* Set the default value for -mstackrealign. */
3235 if (ix86_force_align_arg_pointer == -1)
3236 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3238 /* Validate -mincoming-stack-boundary= value or default it to
3239 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3240 if (ix86_force_align_arg_pointer)
3241 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3242 else
3243 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3244 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3245 if (ix86_incoming_stack_boundary_string)
3247 i = atoi (ix86_incoming_stack_boundary_string);
3248 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3249 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3250 i, TARGET_64BIT ? 4 : 2);
3251 else
3253 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3254 ix86_incoming_stack_boundary
3255 = ix86_user_incoming_stack_boundary;
3259 /* Accept -msseregparm only if at least SSE support is enabled. */
3260 if (TARGET_SSEREGPARM
3261 && ! TARGET_SSE)
3262 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3264 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3265 if (ix86_fpmath_string != 0)
3267 if (! strcmp (ix86_fpmath_string, "387"))
3268 ix86_fpmath = FPMATH_387;
3269 else if (! strcmp (ix86_fpmath_string, "sse"))
3271 if (!TARGET_SSE)
3273 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3274 ix86_fpmath = FPMATH_387;
3276 else
3277 ix86_fpmath = FPMATH_SSE;
3279 else if (! strcmp (ix86_fpmath_string, "387,sse")
3280 || ! strcmp (ix86_fpmath_string, "387+sse")
3281 || ! strcmp (ix86_fpmath_string, "sse,387")
3282 || ! strcmp (ix86_fpmath_string, "sse+387")
3283 || ! strcmp (ix86_fpmath_string, "both"))
3285 if (!TARGET_SSE)
3287 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3288 ix86_fpmath = FPMATH_387;
3290 else if (!TARGET_80387)
3292 warning (0, "387 instruction set disabled, using SSE arithmetics");
3293 ix86_fpmath = FPMATH_SSE;
3295 else
3296 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3298 else
3299 error ("bad value (%s) for %sfpmath=%s %s",
3300 ix86_fpmath_string, prefix, suffix, sw);
3303 /* If the i387 is disabled, then do not return values in it. */
3304 if (!TARGET_80387)
3305 target_flags &= ~MASK_FLOAT_RETURNS;
3307 /* Use external vectorized library in vectorizing intrinsics. */
3308 if (ix86_veclibabi_string)
3310 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3311 ix86_veclib_handler = ix86_veclibabi_svml;
3312 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3313 ix86_veclib_handler = ix86_veclibabi_acml;
3314 else
3315 error ("unknown vectorization library ABI type (%s) for "
3316 "%sveclibabi=%s %s", ix86_veclibabi_string,
3317 prefix, suffix, sw);
3320 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3321 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3322 && !optimize_size)
3323 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3325 /* ??? Unwind info is not correct around the CFG unless either a frame
3326 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3327 unwind info generation to be aware of the CFG and propagating states
3328 around edges. */
3329 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3330 || flag_exceptions || flag_non_call_exceptions)
3331 && flag_omit_frame_pointer
3332 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3334 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3335 warning (0, "unwind tables currently require either a frame pointer "
3336 "or %saccumulate-outgoing-args%s for correctness",
3337 prefix, suffix);
3338 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3341 /* If stack probes are required, the space used for large function
3342 arguments on the stack must also be probed, so enable
3343 -maccumulate-outgoing-args so this happens in the prologue. */
3344 if (TARGET_STACK_PROBE
3345 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3347 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3348 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3349 "for correctness", prefix, suffix);
3350 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3353 /* For sane SSE instruction set generation we need fcomi instruction.
3354 It is safe to enable all CMOVE instructions. */
3355 if (TARGET_SSE)
3356 TARGET_CMOVE = 1;
3358 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3360 char *p;
3361 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3362 p = strchr (internal_label_prefix, 'X');
3363 internal_label_prefix_len = p - internal_label_prefix;
3364 *p = '\0';
3367 /* When scheduling description is not available, disable scheduler pass
3368 so it won't slow down the compilation and make x87 code slower. */
3369 if (!TARGET_SCHEDULE)
3370 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3372 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3373 set_param_value ("simultaneous-prefetches",
3374 ix86_cost->simultaneous_prefetches);
3375 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3376 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3377 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3378 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3379 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3380 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3382 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3383 can be optimized to ap = __builtin_next_arg (0). */
3384 if (!TARGET_64BIT)
3385 targetm.expand_builtin_va_start = NULL;
3387 if (TARGET_64BIT)
3389 ix86_gen_leave = gen_leave_rex64;
3390 ix86_gen_pop1 = gen_popdi1;
3391 ix86_gen_add3 = gen_adddi3;
3392 ix86_gen_sub3 = gen_subdi3;
3393 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3394 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3395 ix86_gen_monitor = gen_sse3_monitor64;
3396 ix86_gen_andsp = gen_anddi3;
3398 else
3400 ix86_gen_leave = gen_leave;
3401 ix86_gen_pop1 = gen_popsi1;
3402 ix86_gen_add3 = gen_addsi3;
3403 ix86_gen_sub3 = gen_subsi3;
3404 ix86_gen_sub3_carry = gen_subsi3_carry;
3405 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3406 ix86_gen_monitor = gen_sse3_monitor;
3407 ix86_gen_andsp = gen_andsi3;
3410 #ifdef USE_IX86_CLD
3411 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3412 if (!TARGET_64BIT)
3413 target_flags |= MASK_CLD & ~target_flags_explicit;
3414 #endif
3416 /* Save the initial options in case the user does function specific options */
3417 if (main_args_p)
3418 target_option_default_node = target_option_current_node
3419 = build_target_option_node ();
3422 /* Update register usage after having seen the compiler flags. */
3424 void
3425 ix86_conditional_register_usage (void)
3427 int i;
3428 unsigned int j;
3430 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3432 if (fixed_regs[i] > 1)
3433 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3434 if (call_used_regs[i] > 1)
3435 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3438 /* The PIC register, if it exists, is fixed. */
3439 j = PIC_OFFSET_TABLE_REGNUM;
3440 if (j != INVALID_REGNUM)
3441 fixed_regs[j] = call_used_regs[j] = 1;
3443 /* The MS_ABI changes the set of call-used registers. */
3444 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3446 call_used_regs[SI_REG] = 0;
3447 call_used_regs[DI_REG] = 0;
3448 call_used_regs[XMM6_REG] = 0;
3449 call_used_regs[XMM7_REG] = 0;
3450 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3451 call_used_regs[i] = 0;
3454 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3455 other call-clobbered regs for 64-bit. */
3456 if (TARGET_64BIT)
3458 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3460 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3461 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3462 && call_used_regs[i])
3463 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3466 /* If MMX is disabled, squash the registers. */
3467 if (! TARGET_MMX)
3468 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3469 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3470 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3472 /* If SSE is disabled, squash the registers. */
3473 if (! TARGET_SSE)
3474 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3475 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3476 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3478 /* If the FPU is disabled, squash the registers. */
3479 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3480 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3481 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3482 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3484 /* If 32-bit, squash the 64-bit registers. */
3485 if (! TARGET_64BIT)
3487 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3488 reg_names[i] = "";
3489 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3490 reg_names[i] = "";
3495 /* Save the current options */
3497 static void
3498 ix86_function_specific_save (struct cl_target_option *ptr)
3500 ptr->arch = ix86_arch;
3501 ptr->schedule = ix86_schedule;
3502 ptr->tune = ix86_tune;
3503 ptr->fpmath = ix86_fpmath;
3504 ptr->branch_cost = ix86_branch_cost;
3505 ptr->tune_defaulted = ix86_tune_defaulted;
3506 ptr->arch_specified = ix86_arch_specified;
3507 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3508 ptr->target_flags_explicit = target_flags_explicit;
3510 /* The fields are char but the variables are not; make sure the
3511 values fit in the fields. */
3512 gcc_assert (ptr->arch == ix86_arch);
3513 gcc_assert (ptr->schedule == ix86_schedule);
3514 gcc_assert (ptr->tune == ix86_tune);
3515 gcc_assert (ptr->fpmath == ix86_fpmath);
3516 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3519 /* Restore the current options */
3521 static void
3522 ix86_function_specific_restore (struct cl_target_option *ptr)
3524 enum processor_type old_tune = ix86_tune;
3525 enum processor_type old_arch = ix86_arch;
3526 unsigned int ix86_arch_mask, ix86_tune_mask;
3527 int i;
3529 ix86_arch = (enum processor_type) ptr->arch;
3530 ix86_schedule = (enum attr_cpu) ptr->schedule;
3531 ix86_tune = (enum processor_type) ptr->tune;
3532 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3533 ix86_branch_cost = ptr->branch_cost;
3534 ix86_tune_defaulted = ptr->tune_defaulted;
3535 ix86_arch_specified = ptr->arch_specified;
3536 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3537 target_flags_explicit = ptr->target_flags_explicit;
3539 /* Recreate the arch feature tests if the arch changed */
3540 if (old_arch != ix86_arch)
3542 ix86_arch_mask = 1u << ix86_arch;
3543 for (i = 0; i < X86_ARCH_LAST; ++i)
3544 ix86_arch_features[i]
3545 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3548 /* Recreate the tune optimization tests */
3549 if (old_tune != ix86_tune)
3551 ix86_tune_mask = 1u << ix86_tune;
3552 for (i = 0; i < X86_TUNE_LAST; ++i)
3553 ix86_tune_features[i]
3554 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3558 /* Print the current options */
3560 static void
3561 ix86_function_specific_print (FILE *file, int indent,
3562 struct cl_target_option *ptr)
3564 char *target_string
3565 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3566 NULL, NULL, NULL, false);
3568 fprintf (file, "%*sarch = %d (%s)\n",
3569 indent, "",
3570 ptr->arch,
3571 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3572 ? cpu_names[ptr->arch]
3573 : "<unknown>"));
3575 fprintf (file, "%*stune = %d (%s)\n",
3576 indent, "",
3577 ptr->tune,
3578 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3579 ? cpu_names[ptr->tune]
3580 : "<unknown>"));
3582 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3583 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3584 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3585 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3587 if (target_string)
3589 fprintf (file, "%*s%s\n", indent, "", target_string);
3590 free (target_string);
3595 /* Inner function to process the attribute((target(...))), take an argument and
3596 set the current options from the argument. If we have a list, recursively go
3597 over the list. */
3599 static bool
3600 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3602 char *next_optstr;
3603 bool ret = true;
3605 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3606 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3607 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3608 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3610 enum ix86_opt_type
3612 ix86_opt_unknown,
3613 ix86_opt_yes,
3614 ix86_opt_no,
3615 ix86_opt_str,
3616 ix86_opt_isa
3619 static const struct
3621 const char *string;
3622 size_t len;
3623 enum ix86_opt_type type;
3624 int opt;
3625 int mask;
3626 } attrs[] = {
3627 /* isa options */
3628 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3629 IX86_ATTR_ISA ("abm", OPT_mabm),
3630 IX86_ATTR_ISA ("aes", OPT_maes),
3631 IX86_ATTR_ISA ("avx", OPT_mavx),
3632 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3633 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3634 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3635 IX86_ATTR_ISA ("sse", OPT_msse),
3636 IX86_ATTR_ISA ("sse2", OPT_msse2),
3637 IX86_ATTR_ISA ("sse3", OPT_msse3),
3638 IX86_ATTR_ISA ("sse4", OPT_msse4),
3639 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3640 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3641 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3642 IX86_ATTR_ISA ("sse5", OPT_msse5),
3643 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3645 /* string options */
3646 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3647 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3648 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3650 /* flag options */
3651 IX86_ATTR_YES ("cld",
3652 OPT_mcld,
3653 MASK_CLD),
3655 IX86_ATTR_NO ("fancy-math-387",
3656 OPT_mfancy_math_387,
3657 MASK_NO_FANCY_MATH_387),
3659 IX86_ATTR_NO ("fused-madd",
3660 OPT_mfused_madd,
3661 MASK_NO_FUSED_MADD),
3663 IX86_ATTR_YES ("ieee-fp",
3664 OPT_mieee_fp,
3665 MASK_IEEE_FP),
3667 IX86_ATTR_YES ("inline-all-stringops",
3668 OPT_minline_all_stringops,
3669 MASK_INLINE_ALL_STRINGOPS),
3671 IX86_ATTR_YES ("inline-stringops-dynamically",
3672 OPT_minline_stringops_dynamically,
3673 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3675 IX86_ATTR_NO ("align-stringops",
3676 OPT_mno_align_stringops,
3677 MASK_NO_ALIGN_STRINGOPS),
3679 IX86_ATTR_YES ("recip",
3680 OPT_mrecip,
3681 MASK_RECIP),
3685 /* If this is a list, recurse to get the options. */
3686 if (TREE_CODE (args) == TREE_LIST)
3688 bool ret = true;
3690 for (; args; args = TREE_CHAIN (args))
3691 if (TREE_VALUE (args)
3692 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3693 ret = false;
3695 return ret;
3698 else if (TREE_CODE (args) != STRING_CST)
3699 gcc_unreachable ();
3701 /* Handle multiple arguments separated by commas. */
3702 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3704 while (next_optstr && *next_optstr != '\0')
3706 char *p = next_optstr;
3707 char *orig_p = p;
3708 char *comma = strchr (next_optstr, ',');
3709 const char *opt_string;
3710 size_t len, opt_len;
3711 int opt;
3712 bool opt_set_p;
3713 char ch;
3714 unsigned i;
3715 enum ix86_opt_type type = ix86_opt_unknown;
3716 int mask = 0;
3718 if (comma)
3720 *comma = '\0';
3721 len = comma - next_optstr;
3722 next_optstr = comma + 1;
3724 else
3726 len = strlen (p);
3727 next_optstr = NULL;
3730 /* Recognize no-xxx. */
3731 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3733 opt_set_p = false;
3734 p += 3;
3735 len -= 3;
3737 else
3738 opt_set_p = true;
3740 /* Find the option. */
3741 ch = *p;
3742 opt = N_OPTS;
3743 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3745 type = attrs[i].type;
3746 opt_len = attrs[i].len;
3747 if (ch == attrs[i].string[0]
3748 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3749 && memcmp (p, attrs[i].string, opt_len) == 0)
3751 opt = attrs[i].opt;
3752 mask = attrs[i].mask;
3753 opt_string = attrs[i].string;
3754 break;
3758 /* Process the option. */
3759 if (opt == N_OPTS)
3761 error ("attribute(target(\"%s\")) is unknown", orig_p);
3762 ret = false;
3765 else if (type == ix86_opt_isa)
3766 ix86_handle_option (opt, p, opt_set_p);
3768 else if (type == ix86_opt_yes || type == ix86_opt_no)
3770 if (type == ix86_opt_no)
3771 opt_set_p = !opt_set_p;
3773 if (opt_set_p)
3774 target_flags |= mask;
3775 else
3776 target_flags &= ~mask;
3779 else if (type == ix86_opt_str)
3781 if (p_strings[opt])
3783 error ("option(\"%s\") was already specified", opt_string);
3784 ret = false;
3786 else
3787 p_strings[opt] = xstrdup (p + opt_len);
3790 else
3791 gcc_unreachable ();
3794 return ret;
3797 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3799 tree
3800 ix86_valid_target_attribute_tree (tree args)
3802 const char *orig_arch_string = ix86_arch_string;
3803 const char *orig_tune_string = ix86_tune_string;
3804 const char *orig_fpmath_string = ix86_fpmath_string;
3805 int orig_tune_defaulted = ix86_tune_defaulted;
3806 int orig_arch_specified = ix86_arch_specified;
3807 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3808 tree t = NULL_TREE;
3809 int i;
3810 struct cl_target_option *def
3811 = TREE_TARGET_OPTION (target_option_default_node);
3813 /* Process each of the options on the chain. */
3814 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3815 return NULL_TREE;
3817 /* If the changed options are different from the default, rerun override_options,
3818 and then save the options away. The string options are are attribute options,
3819 and will be undone when we copy the save structure. */
3820 if (ix86_isa_flags != def->ix86_isa_flags
3821 || target_flags != def->target_flags
3822 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3823 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3824 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3826 /* If we are using the default tune= or arch=, undo the string assigned,
3827 and use the default. */
3828 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3829 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3830 else if (!orig_arch_specified)
3831 ix86_arch_string = NULL;
3833 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3834 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3835 else if (orig_tune_defaulted)
3836 ix86_tune_string = NULL;
3838 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3839 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3840 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3841 else if (!TARGET_64BIT && TARGET_SSE)
3842 ix86_fpmath_string = "sse,387";
3844 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3845 override_options (false);
3847 /* Add any builtin functions with the new isa if any. */
3848 ix86_add_new_builtins (ix86_isa_flags);
3850 /* Save the current options unless we are validating options for
3851 #pragma. */
3852 t = build_target_option_node ();
3854 ix86_arch_string = orig_arch_string;
3855 ix86_tune_string = orig_tune_string;
3856 ix86_fpmath_string = orig_fpmath_string;
3858 /* Free up memory allocated to hold the strings */
3859 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3860 if (option_strings[i])
3861 free (option_strings[i]);
3864 return t;
3867 /* Hook to validate attribute((target("string"))). */
3869 static bool
3870 ix86_valid_target_attribute_p (tree fndecl,
3871 tree ARG_UNUSED (name),
3872 tree args,
3873 int ARG_UNUSED (flags))
3875 struct cl_target_option cur_target;
3876 bool ret = true;
3877 tree old_optimize = build_optimization_node ();
3878 tree new_target, new_optimize;
3879 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3881 /* If the function changed the optimization levels as well as setting target
3882 options, start with the optimizations specified. */
3883 if (func_optimize && func_optimize != old_optimize)
3884 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3886 /* The target attributes may also change some optimization flags, so update
3887 the optimization options if necessary. */
3888 cl_target_option_save (&cur_target);
3889 new_target = ix86_valid_target_attribute_tree (args);
3890 new_optimize = build_optimization_node ();
3892 if (!new_target)
3893 ret = false;
3895 else if (fndecl)
3897 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3899 if (old_optimize != new_optimize)
3900 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3903 cl_target_option_restore (&cur_target);
3905 if (old_optimize != new_optimize)
3906 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3908 return ret;
3912 /* Hook to determine if one function can safely inline another. */
3914 static bool
3915 ix86_can_inline_p (tree caller, tree callee)
3917 bool ret = false;
3918 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3919 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3921 /* If callee has no option attributes, then it is ok to inline. */
3922 if (!callee_tree)
3923 ret = true;
3925 /* If caller has no option attributes, but callee does then it is not ok to
3926 inline. */
3927 else if (!caller_tree)
3928 ret = false;
3930 else
3932 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3933 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3935 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3936 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3937 function. */
3938 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3939 != callee_opts->ix86_isa_flags)
3940 ret = false;
3942 /* See if we have the same non-isa options. */
3943 else if (caller_opts->target_flags != callee_opts->target_flags)
3944 ret = false;
3946 /* See if arch, tune, etc. are the same. */
3947 else if (caller_opts->arch != callee_opts->arch)
3948 ret = false;
3950 else if (caller_opts->tune != callee_opts->tune)
3951 ret = false;
3953 else if (caller_opts->fpmath != callee_opts->fpmath)
3954 ret = false;
3956 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3957 ret = false;
3959 else
3960 ret = true;
3963 return ret;
3967 /* Remember the last target of ix86_set_current_function. */
3968 static GTY(()) tree ix86_previous_fndecl;
3970 /* Establish appropriate back-end context for processing the function
3971 FNDECL. The argument might be NULL to indicate processing at top
3972 level, outside of any function scope. */
3973 static void
3974 ix86_set_current_function (tree fndecl)
3976 /* Only change the context if the function changes. This hook is called
3977 several times in the course of compiling a function, and we don't want to
3978 slow things down too much or call target_reinit when it isn't safe. */
3979 if (fndecl && fndecl != ix86_previous_fndecl)
3981 tree old_tree = (ix86_previous_fndecl
3982 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3983 : NULL_TREE);
3985 tree new_tree = (fndecl
3986 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3987 : NULL_TREE);
3989 ix86_previous_fndecl = fndecl;
3990 if (old_tree == new_tree)
3993 else if (new_tree)
3995 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3996 target_reinit ();
3999 else if (old_tree)
4001 struct cl_target_option *def
4002 = TREE_TARGET_OPTION (target_option_current_node);
4004 cl_target_option_restore (def);
4005 target_reinit ();
4011 /* Return true if this goes in large data/bss. */
4013 static bool
4014 ix86_in_large_data_p (tree exp)
4016 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4017 return false;
4019 /* Functions are never large data. */
4020 if (TREE_CODE (exp) == FUNCTION_DECL)
4021 return false;
4023 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4025 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4026 if (strcmp (section, ".ldata") == 0
4027 || strcmp (section, ".lbss") == 0)
4028 return true;
4029 return false;
4031 else
4033 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4035 /* If this is an incomplete type with size 0, then we can't put it
4036 in data because it might be too big when completed. */
4037 if (!size || size > ix86_section_threshold)
4038 return true;
4041 return false;
4044 /* Switch to the appropriate section for output of DECL.
4045 DECL is either a `VAR_DECL' node or a constant of some sort.
4046 RELOC indicates whether forming the initial value of DECL requires
4047 link-time relocations. */
4049 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4050 ATTRIBUTE_UNUSED;
4052 static section *
4053 x86_64_elf_select_section (tree decl, int reloc,
4054 unsigned HOST_WIDE_INT align)
4056 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4057 && ix86_in_large_data_p (decl))
4059 const char *sname = NULL;
4060 unsigned int flags = SECTION_WRITE;
4061 switch (categorize_decl_for_section (decl, reloc))
4063 case SECCAT_DATA:
4064 sname = ".ldata";
4065 break;
4066 case SECCAT_DATA_REL:
4067 sname = ".ldata.rel";
4068 break;
4069 case SECCAT_DATA_REL_LOCAL:
4070 sname = ".ldata.rel.local";
4071 break;
4072 case SECCAT_DATA_REL_RO:
4073 sname = ".ldata.rel.ro";
4074 break;
4075 case SECCAT_DATA_REL_RO_LOCAL:
4076 sname = ".ldata.rel.ro.local";
4077 break;
4078 case SECCAT_BSS:
4079 sname = ".lbss";
4080 flags |= SECTION_BSS;
4081 break;
4082 case SECCAT_RODATA:
4083 case SECCAT_RODATA_MERGE_STR:
4084 case SECCAT_RODATA_MERGE_STR_INIT:
4085 case SECCAT_RODATA_MERGE_CONST:
4086 sname = ".lrodata";
4087 flags = 0;
4088 break;
4089 case SECCAT_SRODATA:
4090 case SECCAT_SDATA:
4091 case SECCAT_SBSS:
4092 gcc_unreachable ();
4093 case SECCAT_TEXT:
4094 case SECCAT_TDATA:
4095 case SECCAT_TBSS:
4096 /* We don't split these for medium model. Place them into
4097 default sections and hope for best. */
4098 break;
4099 case SECCAT_EMUTLS_VAR:
4100 case SECCAT_EMUTLS_TMPL:
4101 gcc_unreachable ();
4103 if (sname)
4105 /* We might get called with string constants, but get_named_section
4106 doesn't like them as they are not DECLs. Also, we need to set
4107 flags in that case. */
4108 if (!DECL_P (decl))
4109 return get_section (sname, flags, NULL);
4110 return get_named_section (decl, sname, reloc);
4113 return default_elf_select_section (decl, reloc, align);
4116 /* Build up a unique section name, expressed as a
4117 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4118 RELOC indicates whether the initial value of EXP requires
4119 link-time relocations. */
4121 static void ATTRIBUTE_UNUSED
4122 x86_64_elf_unique_section (tree decl, int reloc)
4124 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4125 && ix86_in_large_data_p (decl))
4127 const char *prefix = NULL;
4128 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4129 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4131 switch (categorize_decl_for_section (decl, reloc))
4133 case SECCAT_DATA:
4134 case SECCAT_DATA_REL:
4135 case SECCAT_DATA_REL_LOCAL:
4136 case SECCAT_DATA_REL_RO:
4137 case SECCAT_DATA_REL_RO_LOCAL:
4138 prefix = one_only ? ".ld" : ".ldata";
4139 break;
4140 case SECCAT_BSS:
4141 prefix = one_only ? ".lb" : ".lbss";
4142 break;
4143 case SECCAT_RODATA:
4144 case SECCAT_RODATA_MERGE_STR:
4145 case SECCAT_RODATA_MERGE_STR_INIT:
4146 case SECCAT_RODATA_MERGE_CONST:
4147 prefix = one_only ? ".lr" : ".lrodata";
4148 break;
4149 case SECCAT_SRODATA:
4150 case SECCAT_SDATA:
4151 case SECCAT_SBSS:
4152 gcc_unreachable ();
4153 case SECCAT_TEXT:
4154 case SECCAT_TDATA:
4155 case SECCAT_TBSS:
4156 /* We don't split these for medium model. Place them into
4157 default sections and hope for best. */
4158 break;
4159 case SECCAT_EMUTLS_VAR:
4160 prefix = targetm.emutls.var_section;
4161 break;
4162 case SECCAT_EMUTLS_TMPL:
4163 prefix = targetm.emutls.tmpl_section;
4164 break;
4166 if (prefix)
4168 const char *name, *linkonce;
4169 char *string;
4171 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4172 name = targetm.strip_name_encoding (name);
4174 /* If we're using one_only, then there needs to be a .gnu.linkonce
4175 prefix to the section name. */
4176 linkonce = one_only ? ".gnu.linkonce" : "";
4178 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4180 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4181 return;
4184 default_unique_section (decl, reloc);
4187 #ifdef COMMON_ASM_OP
4188 /* This says how to output assembler code to declare an
4189 uninitialized external linkage data object.
4191 For medium model x86-64 we need to use .largecomm opcode for
4192 large objects. */
4193 void
4194 x86_elf_aligned_common (FILE *file,
4195 const char *name, unsigned HOST_WIDE_INT size,
4196 int align)
4198 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4199 && size > (unsigned int)ix86_section_threshold)
4200 fprintf (file, ".largecomm\t");
4201 else
4202 fprintf (file, "%s", COMMON_ASM_OP);
4203 assemble_name (file, name);
4204 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4205 size, align / BITS_PER_UNIT);
4207 #endif
4209 /* Utility function for targets to use in implementing
4210 ASM_OUTPUT_ALIGNED_BSS. */
4212 void
4213 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4214 const char *name, unsigned HOST_WIDE_INT size,
4215 int align)
4217 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4218 && size > (unsigned int)ix86_section_threshold)
4219 switch_to_section (get_named_section (decl, ".lbss", 0));
4220 else
4221 switch_to_section (bss_section);
4222 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4223 #ifdef ASM_DECLARE_OBJECT_NAME
4224 last_assemble_variable_decl = decl;
4225 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4226 #else
4227 /* Standard thing is just output label for the object. */
4228 ASM_OUTPUT_LABEL (file, name);
4229 #endif /* ASM_DECLARE_OBJECT_NAME */
4230 ASM_OUTPUT_SKIP (file, size ? size : 1);
4233 void
4234 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4236 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4237 make the problem with not enough registers even worse. */
4238 #ifdef INSN_SCHEDULING
4239 if (level > 1)
4240 flag_schedule_insns = 0;
4241 #endif
4243 if (TARGET_MACHO)
4244 /* The Darwin libraries never set errno, so we might as well
4245 avoid calling them when that's the only reason we would. */
4246 flag_errno_math = 0;
4248 /* The default values of these switches depend on the TARGET_64BIT
4249 that is not known at this moment. Mark these values with 2 and
4250 let user the to override these. In case there is no command line option
4251 specifying them, we will set the defaults in override_options. */
4252 if (optimize >= 1)
4253 flag_omit_frame_pointer = 2;
4254 flag_pcc_struct_return = 2;
4255 flag_asynchronous_unwind_tables = 2;
4256 flag_vect_cost_model = 1;
4257 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4258 SUBTARGET_OPTIMIZATION_OPTIONS;
4259 #endif
4262 /* Decide whether we can make a sibling call to a function. DECL is the
4263 declaration of the function being targeted by the call and EXP is the
4264 CALL_EXPR representing the call. */
4266 static bool
4267 ix86_function_ok_for_sibcall (tree decl, tree exp)
4269 tree type, decl_or_type;
4270 rtx a, b;
4272 /* If we are generating position-independent code, we cannot sibcall
4273 optimize any indirect call, or a direct call to a global function,
4274 as the PLT requires %ebx be live. */
4275 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4276 return false;
4278 /* If we need to align the outgoing stack, then sibcalling would
4279 unalign the stack, which may break the called function. */
4280 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4281 return false;
4283 if (decl)
4285 decl_or_type = decl;
4286 type = TREE_TYPE (decl);
4288 else
4290 /* We're looking at the CALL_EXPR, we need the type of the function. */
4291 type = CALL_EXPR_FN (exp); /* pointer expression */
4292 type = TREE_TYPE (type); /* pointer type */
4293 type = TREE_TYPE (type); /* function type */
4294 decl_or_type = type;
4297 /* Check that the return value locations are the same. Like
4298 if we are returning floats on the 80387 register stack, we cannot
4299 make a sibcall from a function that doesn't return a float to a
4300 function that does or, conversely, from a function that does return
4301 a float to a function that doesn't; the necessary stack adjustment
4302 would not be executed. This is also the place we notice
4303 differences in the return value ABI. Note that it is ok for one
4304 of the functions to have void return type as long as the return
4305 value of the other is passed in a register. */
4306 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4307 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4308 cfun->decl, false);
4309 if (STACK_REG_P (a) || STACK_REG_P (b))
4311 if (!rtx_equal_p (a, b))
4312 return false;
4314 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4316 else if (!rtx_equal_p (a, b))
4317 return false;
4319 if (TARGET_64BIT)
4321 /* The SYSV ABI has more call-clobbered registers;
4322 disallow sibcalls from MS to SYSV. */
4323 if (cfun->machine->call_abi == MS_ABI
4324 && ix86_function_type_abi (type) == SYSV_ABI)
4325 return false;
4327 else
4329 /* If this call is indirect, we'll need to be able to use a
4330 call-clobbered register for the address of the target function.
4331 Make sure that all such registers are not used for passing
4332 parameters. Note that DLLIMPORT functions are indirect. */
4333 if (!decl
4334 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4336 if (ix86_function_regparm (type, NULL) >= 3)
4338 /* ??? Need to count the actual number of registers to be used,
4339 not the possible number of registers. Fix later. */
4340 return false;
4345 /* Otherwise okay. That also includes certain types of indirect calls. */
4346 return true;
4349 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4350 calling convention attributes;
4351 arguments as in struct attribute_spec.handler. */
4353 static tree
4354 ix86_handle_cconv_attribute (tree *node, tree name,
4355 tree args,
4356 int flags ATTRIBUTE_UNUSED,
4357 bool *no_add_attrs)
4359 if (TREE_CODE (*node) != FUNCTION_TYPE
4360 && TREE_CODE (*node) != METHOD_TYPE
4361 && TREE_CODE (*node) != FIELD_DECL
4362 && TREE_CODE (*node) != TYPE_DECL)
4364 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4365 name);
4366 *no_add_attrs = true;
4367 return NULL_TREE;
4370 /* Can combine regparm with all attributes but fastcall. */
4371 if (is_attribute_p ("regparm", name))
4373 tree cst;
4375 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4377 error ("fastcall and regparm attributes are not compatible");
4380 cst = TREE_VALUE (args);
4381 if (TREE_CODE (cst) != INTEGER_CST)
4383 warning (OPT_Wattributes,
4384 "%qE attribute requires an integer constant argument",
4385 name);
4386 *no_add_attrs = true;
4388 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4390 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4391 name, REGPARM_MAX);
4392 *no_add_attrs = true;
4395 return NULL_TREE;
4398 if (TARGET_64BIT)
4400 /* Do not warn when emulating the MS ABI. */
4401 if (TREE_CODE (*node) != FUNCTION_TYPE
4402 || ix86_function_type_abi (*node) != MS_ABI)
4403 warning (OPT_Wattributes, "%qE attribute ignored",
4404 name);
4405 *no_add_attrs = true;
4406 return NULL_TREE;
4409 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4410 if (is_attribute_p ("fastcall", name))
4412 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4414 error ("fastcall and cdecl attributes are not compatible");
4416 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4418 error ("fastcall and stdcall attributes are not compatible");
4420 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4422 error ("fastcall and regparm attributes are not compatible");
4426 /* Can combine stdcall with fastcall (redundant), regparm and
4427 sseregparm. */
4428 else if (is_attribute_p ("stdcall", name))
4430 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4432 error ("stdcall and cdecl attributes are not compatible");
4434 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4436 error ("stdcall and fastcall attributes are not compatible");
4440 /* Can combine cdecl with regparm and sseregparm. */
4441 else if (is_attribute_p ("cdecl", name))
4443 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4445 error ("stdcall and cdecl attributes are not compatible");
4447 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4449 error ("fastcall and cdecl attributes are not compatible");
4453 /* Can combine sseregparm with all attributes. */
4455 return NULL_TREE;
4458 /* Return 0 if the attributes for two types are incompatible, 1 if they
4459 are compatible, and 2 if they are nearly compatible (which causes a
4460 warning to be generated). */
4462 static int
4463 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4465 /* Check for mismatch of non-default calling convention. */
4466 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4468 if (TREE_CODE (type1) != FUNCTION_TYPE
4469 && TREE_CODE (type1) != METHOD_TYPE)
4470 return 1;
4472 /* Check for mismatched fastcall/regparm types. */
4473 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4474 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4475 || (ix86_function_regparm (type1, NULL)
4476 != ix86_function_regparm (type2, NULL)))
4477 return 0;
4479 /* Check for mismatched sseregparm types. */
4480 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4481 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4482 return 0;
4484 /* Check for mismatched return types (cdecl vs stdcall). */
4485 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4486 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4487 return 0;
4489 return 1;
4492 /* Return the regparm value for a function with the indicated TYPE and DECL.
4493 DECL may be NULL when calling function indirectly
4494 or considering a libcall. */
4496 static int
4497 ix86_function_regparm (const_tree type, const_tree decl)
4499 tree attr;
4500 int regparm;
4502 static bool error_issued;
4504 if (TARGET_64BIT)
4505 return (ix86_function_type_abi (type) == SYSV_ABI
4506 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4508 regparm = ix86_regparm;
4509 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4510 if (attr)
4512 regparm
4513 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4515 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4517 /* We can't use regparm(3) for nested functions because
4518 these pass static chain pointer in %ecx register. */
4519 if (!error_issued && regparm == 3
4520 && decl_function_context (decl)
4521 && !DECL_NO_STATIC_CHAIN (decl))
4523 error ("nested functions are limited to 2 register parameters");
4524 error_issued = true;
4525 return 0;
4529 return regparm;
4532 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4533 return 2;
4535 /* Use register calling convention for local functions when possible. */
4536 if (decl
4537 && TREE_CODE (decl) == FUNCTION_DECL
4538 && optimize
4539 && !profile_flag)
4541 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4542 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4543 if (i && i->local)
4545 int local_regparm, globals = 0, regno;
4546 struct function *f;
4548 /* Make sure no regparm register is taken by a
4549 fixed register variable. */
4550 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4551 if (fixed_regs[local_regparm])
4552 break;
4554 /* We can't use regparm(3) for nested functions as these use
4555 static chain pointer in third argument. */
4556 if (local_regparm == 3
4557 && decl_function_context (decl)
4558 && !DECL_NO_STATIC_CHAIN (decl))
4559 local_regparm = 2;
4561 /* If the function realigns its stackpointer, the prologue will
4562 clobber %ecx. If we've already generated code for the callee,
4563 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4564 scanning the attributes for the self-realigning property. */
4565 f = DECL_STRUCT_FUNCTION (decl);
4566 /* Since current internal arg pointer won't conflict with
4567 parameter passing regs, so no need to change stack
4568 realignment and adjust regparm number.
4570 Each fixed register usage increases register pressure,
4571 so less registers should be used for argument passing.
4572 This functionality can be overriden by an explicit
4573 regparm value. */
4574 for (regno = 0; regno <= DI_REG; regno++)
4575 if (fixed_regs[regno])
4576 globals++;
4578 local_regparm
4579 = globals < local_regparm ? local_regparm - globals : 0;
4581 if (local_regparm > regparm)
4582 regparm = local_regparm;
4586 return regparm;
4589 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4590 DFmode (2) arguments in SSE registers for a function with the
4591 indicated TYPE and DECL. DECL may be NULL when calling function
4592 indirectly or considering a libcall. Otherwise return 0. */
4594 static int
4595 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4597 gcc_assert (!TARGET_64BIT);
4599 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4600 by the sseregparm attribute. */
4601 if (TARGET_SSEREGPARM
4602 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4604 if (!TARGET_SSE)
4606 if (warn)
4608 if (decl)
4609 error ("Calling %qD with attribute sseregparm without "
4610 "SSE/SSE2 enabled", decl);
4611 else
4612 error ("Calling %qT with attribute sseregparm without "
4613 "SSE/SSE2 enabled", type);
4615 return 0;
4618 return 2;
4621 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4622 (and DFmode for SSE2) arguments in SSE registers. */
4623 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4625 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4626 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4627 if (i && i->local)
4628 return TARGET_SSE2 ? 2 : 1;
4631 return 0;
4634 /* Return true if EAX is live at the start of the function. Used by
4635 ix86_expand_prologue to determine if we need special help before
4636 calling allocate_stack_worker. */
4638 static bool
4639 ix86_eax_live_at_start_p (void)
4641 /* Cheat. Don't bother working forward from ix86_function_regparm
4642 to the function type to whether an actual argument is located in
4643 eax. Instead just look at cfg info, which is still close enough
4644 to correct at this point. This gives false positives for broken
4645 functions that might use uninitialized data that happens to be
4646 allocated in eax, but who cares? */
4647 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4650 /* Value is the number of bytes of arguments automatically
4651 popped when returning from a subroutine call.
4652 FUNDECL is the declaration node of the function (as a tree),
4653 FUNTYPE is the data type of the function (as a tree),
4654 or for a library call it is an identifier node for the subroutine name.
4655 SIZE is the number of bytes of arguments passed on the stack.
4657 On the 80386, the RTD insn may be used to pop them if the number
4658 of args is fixed, but if the number is variable then the caller
4659 must pop them all. RTD can't be used for library calls now
4660 because the library is compiled with the Unix compiler.
4661 Use of RTD is a selectable option, since it is incompatible with
4662 standard Unix calling sequences. If the option is not selected,
4663 the caller must always pop the args.
4665 The attribute stdcall is equivalent to RTD on a per module basis. */
4668 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4670 int rtd;
4672 /* None of the 64-bit ABIs pop arguments. */
4673 if (TARGET_64BIT)
4674 return 0;
4676 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4678 /* Cdecl functions override -mrtd, and never pop the stack. */
4679 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4681 /* Stdcall and fastcall functions will pop the stack if not
4682 variable args. */
4683 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4684 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4685 rtd = 1;
4687 if (rtd && ! stdarg_p (funtype))
4688 return size;
4691 /* Lose any fake structure return argument if it is passed on the stack. */
4692 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4693 && !KEEP_AGGREGATE_RETURN_POINTER)
4695 int nregs = ix86_function_regparm (funtype, fundecl);
4696 if (nregs == 0)
4697 return GET_MODE_SIZE (Pmode);
4700 return 0;
4703 /* Argument support functions. */
4705 /* Return true when register may be used to pass function parameters. */
4706 bool
4707 ix86_function_arg_regno_p (int regno)
4709 int i;
4710 const int *parm_regs;
4712 if (!TARGET_64BIT)
4714 if (TARGET_MACHO)
4715 return (regno < REGPARM_MAX
4716 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4717 else
4718 return (regno < REGPARM_MAX
4719 || (TARGET_MMX && MMX_REGNO_P (regno)
4720 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4721 || (TARGET_SSE && SSE_REGNO_P (regno)
4722 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4725 if (TARGET_MACHO)
4727 if (SSE_REGNO_P (regno) && TARGET_SSE)
4728 return true;
4730 else
4732 if (TARGET_SSE && SSE_REGNO_P (regno)
4733 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4734 return true;
4737 /* TODO: The function should depend on current function ABI but
4738 builtins.c would need updating then. Therefore we use the
4739 default ABI. */
4741 /* RAX is used as hidden argument to va_arg functions. */
4742 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4743 return true;
4745 if (ix86_abi == MS_ABI)
4746 parm_regs = x86_64_ms_abi_int_parameter_registers;
4747 else
4748 parm_regs = x86_64_int_parameter_registers;
4749 for (i = 0; i < (ix86_abi == MS_ABI
4750 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
4751 if (regno == parm_regs[i])
4752 return true;
4753 return false;
4756 /* Return if we do not know how to pass TYPE solely in registers. */
4758 static bool
4759 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4761 if (must_pass_in_stack_var_size_or_pad (mode, type))
4762 return true;
4764 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4765 The layout_type routine is crafty and tries to trick us into passing
4766 currently unsupported vector types on the stack by using TImode. */
4767 return (!TARGET_64BIT && mode == TImode
4768 && type && TREE_CODE (type) != VECTOR_TYPE);
4771 /* It returns the size, in bytes, of the area reserved for arguments passed
4772 in registers for the function represented by fndecl dependent to the used
4773 abi format. */
4775 ix86_reg_parm_stack_space (const_tree fndecl)
4777 enum calling_abi call_abi = SYSV_ABI;
4778 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4779 call_abi = ix86_function_abi (fndecl);
4780 else
4781 call_abi = ix86_function_type_abi (fndecl);
4782 if (call_abi == MS_ABI)
4783 return 32;
4784 return 0;
4787 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4788 call abi used. */
4789 enum calling_abi
4790 ix86_function_type_abi (const_tree fntype)
4792 if (TARGET_64BIT && fntype != NULL)
4794 enum calling_abi abi = ix86_abi;
4795 if (abi == SYSV_ABI)
4797 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4798 abi = MS_ABI;
4800 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4801 abi = SYSV_ABI;
4802 return abi;
4804 return ix86_abi;
4807 static enum calling_abi
4808 ix86_function_abi (const_tree fndecl)
4810 if (! fndecl)
4811 return ix86_abi;
4812 return ix86_function_type_abi (TREE_TYPE (fndecl));
4815 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4816 call abi used. */
4817 enum calling_abi
4818 ix86_cfun_abi (void)
4820 if (! cfun || ! TARGET_64BIT)
4821 return ix86_abi;
4822 return cfun->machine->call_abi;
4825 /* regclass.c */
4826 extern void init_regs (void);
4828 /* Implementation of call abi switching target hook. Specific to FNDECL
4829 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4830 for more details. */
4831 void
4832 ix86_call_abi_override (const_tree fndecl)
4834 if (fndecl == NULL_TREE)
4835 cfun->machine->call_abi = ix86_abi;
4836 else
4837 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4840 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4841 re-initialization of init_regs each time we switch function context since
4842 this is needed only during RTL expansion. */
4843 static void
4844 ix86_maybe_switch_abi (void)
4846 if (TARGET_64BIT &&
4847 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4848 reinit_regs ();
4851 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4852 for a call to a function whose data type is FNTYPE.
4853 For a library call, FNTYPE is 0. */
4855 void
4856 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4857 tree fntype, /* tree ptr for function decl */
4858 rtx libname, /* SYMBOL_REF of library name or 0 */
4859 tree fndecl)
4861 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4862 memset (cum, 0, sizeof (*cum));
4864 if (fndecl)
4865 cum->call_abi = ix86_function_abi (fndecl);
4866 else
4867 cum->call_abi = ix86_function_type_abi (fntype);
4868 /* Set up the number of registers to use for passing arguments. */
4870 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4871 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
4872 "or subtarget optimization implying it");
4873 cum->nregs = ix86_regparm;
4874 if (TARGET_64BIT)
4876 if (cum->call_abi != ix86_abi)
4877 cum->nregs = (ix86_abi != SYSV_ABI
4878 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4880 if (TARGET_SSE)
4882 cum->sse_nregs = SSE_REGPARM_MAX;
4883 if (TARGET_64BIT)
4885 if (cum->call_abi != ix86_abi)
4886 cum->sse_nregs = (ix86_abi != SYSV_ABI
4887 ? X86_64_SSE_REGPARM_MAX
4888 : X86_64_MS_SSE_REGPARM_MAX);
4891 if (TARGET_MMX)
4892 cum->mmx_nregs = MMX_REGPARM_MAX;
4893 cum->warn_avx = true;
4894 cum->warn_sse = true;
4895 cum->warn_mmx = true;
4897 /* Because type might mismatch in between caller and callee, we need to
4898 use actual type of function for local calls.
4899 FIXME: cgraph_analyze can be told to actually record if function uses
4900 va_start so for local functions maybe_vaarg can be made aggressive
4901 helping K&R code.
4902 FIXME: once typesytem is fixed, we won't need this code anymore. */
4903 if (i && i->local)
4904 fntype = TREE_TYPE (fndecl);
4905 cum->maybe_vaarg = (fntype
4906 ? (!prototype_p (fntype) || stdarg_p (fntype))
4907 : !libname);
4909 if (!TARGET_64BIT)
4911 /* If there are variable arguments, then we won't pass anything
4912 in registers in 32-bit mode. */
4913 if (stdarg_p (fntype))
4915 cum->nregs = 0;
4916 cum->sse_nregs = 0;
4917 cum->mmx_nregs = 0;
4918 cum->warn_avx = 0;
4919 cum->warn_sse = 0;
4920 cum->warn_mmx = 0;
4921 return;
4924 /* Use ecx and edx registers if function has fastcall attribute,
4925 else look for regparm information. */
4926 if (fntype)
4928 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4930 cum->nregs = 2;
4931 cum->fastcall = 1;
4933 else
4934 cum->nregs = ix86_function_regparm (fntype, fndecl);
4937 /* Set up the number of SSE registers used for passing SFmode
4938 and DFmode arguments. Warn for mismatching ABI. */
4939 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4943 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4944 But in the case of vector types, it is some vector mode.
4946 When we have only some of our vector isa extensions enabled, then there
4947 are some modes for which vector_mode_supported_p is false. For these
4948 modes, the generic vector support in gcc will choose some non-vector mode
4949 in order to implement the type. By computing the natural mode, we'll
4950 select the proper ABI location for the operand and not depend on whatever
4951 the middle-end decides to do with these vector types.
4953 The midde-end can't deal with the vector types > 16 bytes. In this
4954 case, we return the original mode and warn ABI change if CUM isn't
4955 NULL. */
4957 static enum machine_mode
4958 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4960 enum machine_mode mode = TYPE_MODE (type);
4962 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4964 HOST_WIDE_INT size = int_size_in_bytes (type);
4965 if ((size == 8 || size == 16 || size == 32)
4966 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4967 && TYPE_VECTOR_SUBPARTS (type) > 1)
4969 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4971 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4972 mode = MIN_MODE_VECTOR_FLOAT;
4973 else
4974 mode = MIN_MODE_VECTOR_INT;
4976 /* Get the mode which has this inner mode and number of units. */
4977 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4978 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4979 && GET_MODE_INNER (mode) == innermode)
4981 if (size == 32 && !TARGET_AVX)
4983 static bool warnedavx;
4985 if (cum
4986 && !warnedavx
4987 && cum->warn_avx)
4989 warnedavx = true;
4990 warning (0, "AVX vector argument without AVX "
4991 "enabled changes the ABI");
4993 return TYPE_MODE (type);
4995 else
4996 return mode;
4999 gcc_unreachable ();
5003 return mode;
5006 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5007 this may not agree with the mode that the type system has chosen for the
5008 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5009 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5011 static rtx
5012 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5013 unsigned int regno)
5015 rtx tmp;
5017 if (orig_mode != BLKmode)
5018 tmp = gen_rtx_REG (orig_mode, regno);
5019 else
5021 tmp = gen_rtx_REG (mode, regno);
5022 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5023 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5026 return tmp;
5029 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5030 of this code is to classify each 8bytes of incoming argument by the register
5031 class and assign registers accordingly. */
5033 /* Return the union class of CLASS1 and CLASS2.
5034 See the x86-64 PS ABI for details. */
5036 static enum x86_64_reg_class
5037 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5039 /* Rule #1: If both classes are equal, this is the resulting class. */
5040 if (class1 == class2)
5041 return class1;
5043 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5044 the other class. */
5045 if (class1 == X86_64_NO_CLASS)
5046 return class2;
5047 if (class2 == X86_64_NO_CLASS)
5048 return class1;
5050 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5051 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5052 return X86_64_MEMORY_CLASS;
5054 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5055 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5056 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5057 return X86_64_INTEGERSI_CLASS;
5058 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5059 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5060 return X86_64_INTEGER_CLASS;
5062 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5063 MEMORY is used. */
5064 if (class1 == X86_64_X87_CLASS
5065 || class1 == X86_64_X87UP_CLASS
5066 || class1 == X86_64_COMPLEX_X87_CLASS
5067 || class2 == X86_64_X87_CLASS
5068 || class2 == X86_64_X87UP_CLASS
5069 || class2 == X86_64_COMPLEX_X87_CLASS)
5070 return X86_64_MEMORY_CLASS;
5072 /* Rule #6: Otherwise class SSE is used. */
5073 return X86_64_SSE_CLASS;
5076 /* Classify the argument of type TYPE and mode MODE.
5077 CLASSES will be filled by the register class used to pass each word
5078 of the operand. The number of words is returned. In case the parameter
5079 should be passed in memory, 0 is returned. As a special case for zero
5080 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5082 BIT_OFFSET is used internally for handling records and specifies offset
5083 of the offset in bits modulo 256 to avoid overflow cases.
5085 See the x86-64 PS ABI for details.
5088 static int
5089 classify_argument (enum machine_mode mode, const_tree type,
5090 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5092 HOST_WIDE_INT bytes =
5093 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5094 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5096 /* Variable sized entities are always passed/returned in memory. */
5097 if (bytes < 0)
5098 return 0;
5100 if (mode != VOIDmode
5101 && targetm.calls.must_pass_in_stack (mode, type))
5102 return 0;
5104 if (type && AGGREGATE_TYPE_P (type))
5106 int i;
5107 tree field;
5108 enum x86_64_reg_class subclasses[MAX_CLASSES];
5110 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5111 if (bytes > 32)
5112 return 0;
5114 for (i = 0; i < words; i++)
5115 classes[i] = X86_64_NO_CLASS;
5117 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5118 signalize memory class, so handle it as special case. */
5119 if (!words)
5121 classes[0] = X86_64_NO_CLASS;
5122 return 1;
5125 /* Classify each field of record and merge classes. */
5126 switch (TREE_CODE (type))
5128 case RECORD_TYPE:
5129 /* And now merge the fields of structure. */
5130 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5132 if (TREE_CODE (field) == FIELD_DECL)
5134 int num;
5136 if (TREE_TYPE (field) == error_mark_node)
5137 continue;
5139 /* Bitfields are always classified as integer. Handle them
5140 early, since later code would consider them to be
5141 misaligned integers. */
5142 if (DECL_BIT_FIELD (field))
5144 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5145 i < ((int_bit_position (field) + (bit_offset % 64))
5146 + tree_low_cst (DECL_SIZE (field), 0)
5147 + 63) / 8 / 8; i++)
5148 classes[i] =
5149 merge_classes (X86_64_INTEGER_CLASS,
5150 classes[i]);
5152 else
5154 int pos;
5156 type = TREE_TYPE (field);
5158 /* Flexible array member is ignored. */
5159 if (TYPE_MODE (type) == BLKmode
5160 && TREE_CODE (type) == ARRAY_TYPE
5161 && TYPE_SIZE (type) == NULL_TREE
5162 && TYPE_DOMAIN (type) != NULL_TREE
5163 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5164 == NULL_TREE))
5166 static bool warned;
5168 if (!warned && warn_psabi)
5170 warned = true;
5171 inform (input_location,
5172 "The ABI of passing struct with"
5173 " a flexible array member has"
5174 " changed in GCC 4.4");
5176 continue;
5178 num = classify_argument (TYPE_MODE (type), type,
5179 subclasses,
5180 (int_bit_position (field)
5181 + bit_offset) % 256);
5182 if (!num)
5183 return 0;
5184 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5185 for (i = 0; i < num && (i + pos) < words; i++)
5186 classes[i + pos] =
5187 merge_classes (subclasses[i], classes[i + pos]);
5191 break;
5193 case ARRAY_TYPE:
5194 /* Arrays are handled as small records. */
5196 int num;
5197 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5198 TREE_TYPE (type), subclasses, bit_offset);
5199 if (!num)
5200 return 0;
5202 /* The partial classes are now full classes. */
5203 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5204 subclasses[0] = X86_64_SSE_CLASS;
5205 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5206 && !((bit_offset % 64) == 0 && bytes == 4))
5207 subclasses[0] = X86_64_INTEGER_CLASS;
5209 for (i = 0; i < words; i++)
5210 classes[i] = subclasses[i % num];
5212 break;
5214 case UNION_TYPE:
5215 case QUAL_UNION_TYPE:
5216 /* Unions are similar to RECORD_TYPE but offset is always 0.
5218 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5220 if (TREE_CODE (field) == FIELD_DECL)
5222 int num;
5224 if (TREE_TYPE (field) == error_mark_node)
5225 continue;
5227 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5228 TREE_TYPE (field), subclasses,
5229 bit_offset);
5230 if (!num)
5231 return 0;
5232 for (i = 0; i < num; i++)
5233 classes[i] = merge_classes (subclasses[i], classes[i]);
5236 break;
5238 default:
5239 gcc_unreachable ();
5242 if (words > 2)
5244 /* When size > 16 bytes, if the first one isn't
5245 X86_64_SSE_CLASS or any other ones aren't
5246 X86_64_SSEUP_CLASS, everything should be passed in
5247 memory. */
5248 if (classes[0] != X86_64_SSE_CLASS)
5249 return 0;
5251 for (i = 1; i < words; i++)
5252 if (classes[i] != X86_64_SSEUP_CLASS)
5253 return 0;
5256 /* Final merger cleanup. */
5257 for (i = 0; i < words; i++)
5259 /* If one class is MEMORY, everything should be passed in
5260 memory. */
5261 if (classes[i] == X86_64_MEMORY_CLASS)
5262 return 0;
5264 /* The X86_64_SSEUP_CLASS should be always preceded by
5265 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5266 if (classes[i] == X86_64_SSEUP_CLASS
5267 && classes[i - 1] != X86_64_SSE_CLASS
5268 && classes[i - 1] != X86_64_SSEUP_CLASS)
5270 /* The first one should never be X86_64_SSEUP_CLASS. */
5271 gcc_assert (i != 0);
5272 classes[i] = X86_64_SSE_CLASS;
5275 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5276 everything should be passed in memory. */
5277 if (classes[i] == X86_64_X87UP_CLASS
5278 && (classes[i - 1] != X86_64_X87_CLASS))
5280 static bool warned;
5282 /* The first one should never be X86_64_X87UP_CLASS. */
5283 gcc_assert (i != 0);
5284 if (!warned && warn_psabi)
5286 warned = true;
5287 inform (input_location,
5288 "The ABI of passing union with long double"
5289 " has changed in GCC 4.4");
5291 return 0;
5294 return words;
5297 /* Compute alignment needed. We align all types to natural boundaries with
5298 exception of XFmode that is aligned to 64bits. */
5299 if (mode != VOIDmode && mode != BLKmode)
5301 int mode_alignment = GET_MODE_BITSIZE (mode);
5303 if (mode == XFmode)
5304 mode_alignment = 128;
5305 else if (mode == XCmode)
5306 mode_alignment = 256;
5307 if (COMPLEX_MODE_P (mode))
5308 mode_alignment /= 2;
5309 /* Misaligned fields are always returned in memory. */
5310 if (bit_offset % mode_alignment)
5311 return 0;
5314 /* for V1xx modes, just use the base mode */
5315 if (VECTOR_MODE_P (mode) && mode != V1DImode
5316 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5317 mode = GET_MODE_INNER (mode);
5319 /* Classification of atomic types. */
5320 switch (mode)
5322 case SDmode:
5323 case DDmode:
5324 classes[0] = X86_64_SSE_CLASS;
5325 return 1;
5326 case TDmode:
5327 classes[0] = X86_64_SSE_CLASS;
5328 classes[1] = X86_64_SSEUP_CLASS;
5329 return 2;
5330 case DImode:
5331 case SImode:
5332 case HImode:
5333 case QImode:
5334 case CSImode:
5335 case CHImode:
5336 case CQImode:
5338 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5340 if (size <= 32)
5342 classes[0] = X86_64_INTEGERSI_CLASS;
5343 return 1;
5345 else if (size <= 64)
5347 classes[0] = X86_64_INTEGER_CLASS;
5348 return 1;
5350 else if (size <= 64+32)
5352 classes[0] = X86_64_INTEGER_CLASS;
5353 classes[1] = X86_64_INTEGERSI_CLASS;
5354 return 2;
5356 else if (size <= 64+64)
5358 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5359 return 2;
5361 else
5362 gcc_unreachable ();
5364 case CDImode:
5365 case TImode:
5366 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5367 return 2;
5368 case COImode:
5369 case OImode:
5370 /* OImode shouldn't be used directly. */
5371 gcc_unreachable ();
5372 case CTImode:
5373 return 0;
5374 case SFmode:
5375 if (!(bit_offset % 64))
5376 classes[0] = X86_64_SSESF_CLASS;
5377 else
5378 classes[0] = X86_64_SSE_CLASS;
5379 return 1;
5380 case DFmode:
5381 classes[0] = X86_64_SSEDF_CLASS;
5382 return 1;
5383 case XFmode:
5384 classes[0] = X86_64_X87_CLASS;
5385 classes[1] = X86_64_X87UP_CLASS;
5386 return 2;
5387 case TFmode:
5388 classes[0] = X86_64_SSE_CLASS;
5389 classes[1] = X86_64_SSEUP_CLASS;
5390 return 2;
5391 case SCmode:
5392 classes[0] = X86_64_SSE_CLASS;
5393 if (!(bit_offset % 64))
5394 return 1;
5395 else
5397 static bool warned;
5399 if (!warned && warn_psabi)
5401 warned = true;
5402 inform (input_location,
5403 "The ABI of passing structure with complex float"
5404 " member has changed in GCC 4.4");
5406 classes[1] = X86_64_SSESF_CLASS;
5407 return 2;
5409 case DCmode:
5410 classes[0] = X86_64_SSEDF_CLASS;
5411 classes[1] = X86_64_SSEDF_CLASS;
5412 return 2;
5413 case XCmode:
5414 classes[0] = X86_64_COMPLEX_X87_CLASS;
5415 return 1;
5416 case TCmode:
5417 /* This modes is larger than 16 bytes. */
5418 return 0;
5419 case V8SFmode:
5420 case V8SImode:
5421 case V32QImode:
5422 case V16HImode:
5423 case V4DFmode:
5424 case V4DImode:
5425 classes[0] = X86_64_SSE_CLASS;
5426 classes[1] = X86_64_SSEUP_CLASS;
5427 classes[2] = X86_64_SSEUP_CLASS;
5428 classes[3] = X86_64_SSEUP_CLASS;
5429 return 4;
5430 case V4SFmode:
5431 case V4SImode:
5432 case V16QImode:
5433 case V8HImode:
5434 case V2DFmode:
5435 case V2DImode:
5436 classes[0] = X86_64_SSE_CLASS;
5437 classes[1] = X86_64_SSEUP_CLASS;
5438 return 2;
5439 case V1DImode:
5440 case V2SFmode:
5441 case V2SImode:
5442 case V4HImode:
5443 case V8QImode:
5444 classes[0] = X86_64_SSE_CLASS;
5445 return 1;
5446 case BLKmode:
5447 case VOIDmode:
5448 return 0;
5449 default:
5450 gcc_assert (VECTOR_MODE_P (mode));
5452 if (bytes > 16)
5453 return 0;
5455 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5457 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5458 classes[0] = X86_64_INTEGERSI_CLASS;
5459 else
5460 classes[0] = X86_64_INTEGER_CLASS;
5461 classes[1] = X86_64_INTEGER_CLASS;
5462 return 1 + (bytes > 8);
5466 /* Examine the argument and return set number of register required in each
5467 class. Return 0 iff parameter should be passed in memory. */
5468 static int
5469 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5470 int *int_nregs, int *sse_nregs)
5472 enum x86_64_reg_class regclass[MAX_CLASSES];
5473 int n = classify_argument (mode, type, regclass, 0);
5475 *int_nregs = 0;
5476 *sse_nregs = 0;
5477 if (!n)
5478 return 0;
5479 for (n--; n >= 0; n--)
5480 switch (regclass[n])
5482 case X86_64_INTEGER_CLASS:
5483 case X86_64_INTEGERSI_CLASS:
5484 (*int_nregs)++;
5485 break;
5486 case X86_64_SSE_CLASS:
5487 case X86_64_SSESF_CLASS:
5488 case X86_64_SSEDF_CLASS:
5489 (*sse_nregs)++;
5490 break;
5491 case X86_64_NO_CLASS:
5492 case X86_64_SSEUP_CLASS:
5493 break;
5494 case X86_64_X87_CLASS:
5495 case X86_64_X87UP_CLASS:
5496 if (!in_return)
5497 return 0;
5498 break;
5499 case X86_64_COMPLEX_X87_CLASS:
5500 return in_return ? 2 : 0;
5501 case X86_64_MEMORY_CLASS:
5502 gcc_unreachable ();
5504 return 1;
5507 /* Construct container for the argument used by GCC interface. See
5508 FUNCTION_ARG for the detailed description. */
5510 static rtx
5511 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5512 const_tree type, int in_return, int nintregs, int nsseregs,
5513 const int *intreg, int sse_regno)
5515 /* The following variables hold the static issued_error state. */
5516 static bool issued_sse_arg_error;
5517 static bool issued_sse_ret_error;
5518 static bool issued_x87_ret_error;
5520 enum machine_mode tmpmode;
5521 int bytes =
5522 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5523 enum x86_64_reg_class regclass[MAX_CLASSES];
5524 int n;
5525 int i;
5526 int nexps = 0;
5527 int needed_sseregs, needed_intregs;
5528 rtx exp[MAX_CLASSES];
5529 rtx ret;
5531 n = classify_argument (mode, type, regclass, 0);
5532 if (!n)
5533 return NULL;
5534 if (!examine_argument (mode, type, in_return, &needed_intregs,
5535 &needed_sseregs))
5536 return NULL;
5537 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5538 return NULL;
5540 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5541 some less clueful developer tries to use floating-point anyway. */
5542 if (needed_sseregs && !TARGET_SSE)
5544 if (in_return)
5546 if (!issued_sse_ret_error)
5548 error ("SSE register return with SSE disabled");
5549 issued_sse_ret_error = true;
5552 else if (!issued_sse_arg_error)
5554 error ("SSE register argument with SSE disabled");
5555 issued_sse_arg_error = true;
5557 return NULL;
5560 /* Likewise, error if the ABI requires us to return values in the
5561 x87 registers and the user specified -mno-80387. */
5562 if (!TARGET_80387 && in_return)
5563 for (i = 0; i < n; i++)
5564 if (regclass[i] == X86_64_X87_CLASS
5565 || regclass[i] == X86_64_X87UP_CLASS
5566 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5568 if (!issued_x87_ret_error)
5570 error ("x87 register return with x87 disabled");
5571 issued_x87_ret_error = true;
5573 return NULL;
5576 /* First construct simple cases. Avoid SCmode, since we want to use
5577 single register to pass this type. */
5578 if (n == 1 && mode != SCmode)
5579 switch (regclass[0])
5581 case X86_64_INTEGER_CLASS:
5582 case X86_64_INTEGERSI_CLASS:
5583 return gen_rtx_REG (mode, intreg[0]);
5584 case X86_64_SSE_CLASS:
5585 case X86_64_SSESF_CLASS:
5586 case X86_64_SSEDF_CLASS:
5587 if (mode != BLKmode)
5588 return gen_reg_or_parallel (mode, orig_mode,
5589 SSE_REGNO (sse_regno));
5590 break;
5591 case X86_64_X87_CLASS:
5592 case X86_64_COMPLEX_X87_CLASS:
5593 return gen_rtx_REG (mode, FIRST_STACK_REG);
5594 case X86_64_NO_CLASS:
5595 /* Zero sized array, struct or class. */
5596 return NULL;
5597 default:
5598 gcc_unreachable ();
5600 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5601 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5602 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5603 if (n == 4
5604 && regclass[0] == X86_64_SSE_CLASS
5605 && regclass[1] == X86_64_SSEUP_CLASS
5606 && regclass[2] == X86_64_SSEUP_CLASS
5607 && regclass[3] == X86_64_SSEUP_CLASS
5608 && mode != BLKmode)
5609 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5611 if (n == 2
5612 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5613 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5614 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5615 && regclass[1] == X86_64_INTEGER_CLASS
5616 && (mode == CDImode || mode == TImode || mode == TFmode)
5617 && intreg[0] + 1 == intreg[1])
5618 return gen_rtx_REG (mode, intreg[0]);
5620 /* Otherwise figure out the entries of the PARALLEL. */
5621 for (i = 0; i < n; i++)
5623 int pos;
5625 switch (regclass[i])
5627 case X86_64_NO_CLASS:
5628 break;
5629 case X86_64_INTEGER_CLASS:
5630 case X86_64_INTEGERSI_CLASS:
5631 /* Merge TImodes on aligned occasions here too. */
5632 if (i * 8 + 8 > bytes)
5633 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5634 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5635 tmpmode = SImode;
5636 else
5637 tmpmode = DImode;
5638 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5639 if (tmpmode == BLKmode)
5640 tmpmode = DImode;
5641 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5642 gen_rtx_REG (tmpmode, *intreg),
5643 GEN_INT (i*8));
5644 intreg++;
5645 break;
5646 case X86_64_SSESF_CLASS:
5647 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5648 gen_rtx_REG (SFmode,
5649 SSE_REGNO (sse_regno)),
5650 GEN_INT (i*8));
5651 sse_regno++;
5652 break;
5653 case X86_64_SSEDF_CLASS:
5654 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5655 gen_rtx_REG (DFmode,
5656 SSE_REGNO (sse_regno)),
5657 GEN_INT (i*8));
5658 sse_regno++;
5659 break;
5660 case X86_64_SSE_CLASS:
5661 pos = i;
5662 switch (n)
5664 case 1:
5665 tmpmode = DImode;
5666 break;
5667 case 2:
5668 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5670 tmpmode = TImode;
5671 i++;
5673 else
5674 tmpmode = DImode;
5675 break;
5676 case 4:
5677 gcc_assert (i == 0
5678 && regclass[1] == X86_64_SSEUP_CLASS
5679 && regclass[2] == X86_64_SSEUP_CLASS
5680 && regclass[3] == X86_64_SSEUP_CLASS);
5681 tmpmode = OImode;
5682 i += 3;
5683 break;
5684 default:
5685 gcc_unreachable ();
5687 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5688 gen_rtx_REG (tmpmode,
5689 SSE_REGNO (sse_regno)),
5690 GEN_INT (pos*8));
5691 sse_regno++;
5692 break;
5693 default:
5694 gcc_unreachable ();
5698 /* Empty aligned struct, union or class. */
5699 if (nexps == 0)
5700 return NULL;
5702 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5703 for (i = 0; i < nexps; i++)
5704 XVECEXP (ret, 0, i) = exp [i];
5705 return ret;
5708 /* Update the data in CUM to advance over an argument of mode MODE
5709 and data type TYPE. (TYPE is null for libcalls where that information
5710 may not be available.) */
5712 static void
5713 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5714 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5716 switch (mode)
5718 default:
5719 break;
5721 case BLKmode:
5722 if (bytes < 0)
5723 break;
5724 /* FALLTHRU */
5726 case DImode:
5727 case SImode:
5728 case HImode:
5729 case QImode:
5730 cum->words += words;
5731 cum->nregs -= words;
5732 cum->regno += words;
5734 if (cum->nregs <= 0)
5736 cum->nregs = 0;
5737 cum->regno = 0;
5739 break;
5741 case OImode:
5742 /* OImode shouldn't be used directly. */
5743 gcc_unreachable ();
5745 case DFmode:
5746 if (cum->float_in_sse < 2)
5747 break;
5748 case SFmode:
5749 if (cum->float_in_sse < 1)
5750 break;
5751 /* FALLTHRU */
5753 case V8SFmode:
5754 case V8SImode:
5755 case V32QImode:
5756 case V16HImode:
5757 case V4DFmode:
5758 case V4DImode:
5759 case TImode:
5760 case V16QImode:
5761 case V8HImode:
5762 case V4SImode:
5763 case V2DImode:
5764 case V4SFmode:
5765 case V2DFmode:
5766 if (!type || !AGGREGATE_TYPE_P (type))
5768 cum->sse_words += words;
5769 cum->sse_nregs -= 1;
5770 cum->sse_regno += 1;
5771 if (cum->sse_nregs <= 0)
5773 cum->sse_nregs = 0;
5774 cum->sse_regno = 0;
5777 break;
5779 case V8QImode:
5780 case V4HImode:
5781 case V2SImode:
5782 case V2SFmode:
5783 case V1DImode:
5784 if (!type || !AGGREGATE_TYPE_P (type))
5786 cum->mmx_words += words;
5787 cum->mmx_nregs -= 1;
5788 cum->mmx_regno += 1;
5789 if (cum->mmx_nregs <= 0)
5791 cum->mmx_nregs = 0;
5792 cum->mmx_regno = 0;
5795 break;
5799 static void
5800 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5801 tree type, HOST_WIDE_INT words, int named)
5803 int int_nregs, sse_nregs;
5805 /* Unnamed 256bit vector mode parameters are passed on stack. */
5806 if (!named && VALID_AVX256_REG_MODE (mode))
5807 return;
5809 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5810 cum->words += words;
5811 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5813 cum->nregs -= int_nregs;
5814 cum->sse_nregs -= sse_nregs;
5815 cum->regno += int_nregs;
5816 cum->sse_regno += sse_nregs;
5818 else
5819 cum->words += words;
5822 static void
5823 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5824 HOST_WIDE_INT words)
5826 /* Otherwise, this should be passed indirect. */
5827 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5829 cum->words += words;
5830 if (cum->nregs > 0)
5832 cum->nregs -= 1;
5833 cum->regno += 1;
5837 void
5838 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5839 tree type, int named)
5841 HOST_WIDE_INT bytes, words;
5843 if (mode == BLKmode)
5844 bytes = int_size_in_bytes (type);
5845 else
5846 bytes = GET_MODE_SIZE (mode);
5847 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5849 if (type)
5850 mode = type_natural_mode (type, NULL);
5852 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5853 function_arg_advance_ms_64 (cum, bytes, words);
5854 else if (TARGET_64BIT)
5855 function_arg_advance_64 (cum, mode, type, words, named);
5856 else
5857 function_arg_advance_32 (cum, mode, type, bytes, words);
5860 /* Define where to put the arguments to a function.
5861 Value is zero to push the argument on the stack,
5862 or a hard register in which to store the argument.
5864 MODE is the argument's machine mode.
5865 TYPE is the data type of the argument (as a tree).
5866 This is null for libcalls where that information may
5867 not be available.
5868 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5869 the preceding args and about the function being called.
5870 NAMED is nonzero if this argument is a named parameter
5871 (otherwise it is an extra parameter matching an ellipsis). */
5873 static rtx
5874 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5875 enum machine_mode orig_mode, tree type,
5876 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5878 static bool warnedsse, warnedmmx;
5880 /* Avoid the AL settings for the Unix64 ABI. */
5881 if (mode == VOIDmode)
5882 return constm1_rtx;
5884 switch (mode)
5886 default:
5887 break;
5889 case BLKmode:
5890 if (bytes < 0)
5891 break;
5892 /* FALLTHRU */
5893 case DImode:
5894 case SImode:
5895 case HImode:
5896 case QImode:
5897 if (words <= cum->nregs)
5899 int regno = cum->regno;
5901 /* Fastcall allocates the first two DWORD (SImode) or
5902 smaller arguments to ECX and EDX if it isn't an
5903 aggregate type . */
5904 if (cum->fastcall)
5906 if (mode == BLKmode
5907 || mode == DImode
5908 || (type && AGGREGATE_TYPE_P (type)))
5909 break;
5911 /* ECX not EAX is the first allocated register. */
5912 if (regno == AX_REG)
5913 regno = CX_REG;
5915 return gen_rtx_REG (mode, regno);
5917 break;
5919 case DFmode:
5920 if (cum->float_in_sse < 2)
5921 break;
5922 case SFmode:
5923 if (cum->float_in_sse < 1)
5924 break;
5925 /* FALLTHRU */
5926 case TImode:
5927 /* In 32bit, we pass TImode in xmm registers. */
5928 case V16QImode:
5929 case V8HImode:
5930 case V4SImode:
5931 case V2DImode:
5932 case V4SFmode:
5933 case V2DFmode:
5934 if (!type || !AGGREGATE_TYPE_P (type))
5936 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5938 warnedsse = true;
5939 warning (0, "SSE vector argument without SSE enabled "
5940 "changes the ABI");
5942 if (cum->sse_nregs)
5943 return gen_reg_or_parallel (mode, orig_mode,
5944 cum->sse_regno + FIRST_SSE_REG);
5946 break;
5948 case OImode:
5949 /* OImode shouldn't be used directly. */
5950 gcc_unreachable ();
5952 case V8SFmode:
5953 case V8SImode:
5954 case V32QImode:
5955 case V16HImode:
5956 case V4DFmode:
5957 case V4DImode:
5958 if (!type || !AGGREGATE_TYPE_P (type))
5960 if (cum->sse_nregs)
5961 return gen_reg_or_parallel (mode, orig_mode,
5962 cum->sse_regno + FIRST_SSE_REG);
5964 break;
5966 case V8QImode:
5967 case V4HImode:
5968 case V2SImode:
5969 case V2SFmode:
5970 case V1DImode:
5971 if (!type || !AGGREGATE_TYPE_P (type))
5973 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5975 warnedmmx = true;
5976 warning (0, "MMX vector argument without MMX enabled "
5977 "changes the ABI");
5979 if (cum->mmx_nregs)
5980 return gen_reg_or_parallel (mode, orig_mode,
5981 cum->mmx_regno + FIRST_MMX_REG);
5983 break;
5986 return NULL_RTX;
5989 static rtx
5990 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5991 enum machine_mode orig_mode, tree type, int named)
5993 /* Handle a hidden AL argument containing number of registers
5994 for varargs x86-64 functions. */
5995 if (mode == VOIDmode)
5996 return GEN_INT (cum->maybe_vaarg
5997 ? (cum->sse_nregs < 0
5998 ? (cum->call_abi == ix86_abi
5999 ? SSE_REGPARM_MAX
6000 : (ix86_abi != SYSV_ABI
6001 ? X86_64_SSE_REGPARM_MAX
6002 : X86_64_MS_SSE_REGPARM_MAX))
6003 : cum->sse_regno)
6004 : -1);
6006 switch (mode)
6008 default:
6009 break;
6011 case V8SFmode:
6012 case V8SImode:
6013 case V32QImode:
6014 case V16HImode:
6015 case V4DFmode:
6016 case V4DImode:
6017 /* Unnamed 256bit vector mode parameters are passed on stack. */
6018 if (!named)
6019 return NULL;
6020 break;
6023 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6024 cum->sse_nregs,
6025 &x86_64_int_parameter_registers [cum->regno],
6026 cum->sse_regno);
6029 static rtx
6030 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6031 enum machine_mode orig_mode, int named,
6032 HOST_WIDE_INT bytes)
6034 unsigned int regno;
6036 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6037 We use value of -2 to specify that current function call is MSABI. */
6038 if (mode == VOIDmode)
6039 return GEN_INT (-2);
6041 /* If we've run out of registers, it goes on the stack. */
6042 if (cum->nregs == 0)
6043 return NULL_RTX;
6045 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6047 /* Only floating point modes are passed in anything but integer regs. */
6048 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6050 if (named)
6051 regno = cum->regno + FIRST_SSE_REG;
6052 else
6054 rtx t1, t2;
6056 /* Unnamed floating parameters are passed in both the
6057 SSE and integer registers. */
6058 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6059 t2 = gen_rtx_REG (mode, regno);
6060 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6061 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6062 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6065 /* Handle aggregated types passed in register. */
6066 if (orig_mode == BLKmode)
6068 if (bytes > 0 && bytes <= 8)
6069 mode = (bytes > 4 ? DImode : SImode);
6070 if (mode == BLKmode)
6071 mode = DImode;
6074 return gen_reg_or_parallel (mode, orig_mode, regno);
6078 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6079 tree type, int named)
6081 enum machine_mode mode = omode;
6082 HOST_WIDE_INT bytes, words;
6084 if (mode == BLKmode)
6085 bytes = int_size_in_bytes (type);
6086 else
6087 bytes = GET_MODE_SIZE (mode);
6088 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6090 /* To simplify the code below, represent vector types with a vector mode
6091 even if MMX/SSE are not active. */
6092 if (type && TREE_CODE (type) == VECTOR_TYPE)
6093 mode = type_natural_mode (type, cum);
6095 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6096 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6097 else if (TARGET_64BIT)
6098 return function_arg_64 (cum, mode, omode, type, named);
6099 else
6100 return function_arg_32 (cum, mode, omode, type, bytes, words);
6103 /* A C expression that indicates when an argument must be passed by
6104 reference. If nonzero for an argument, a copy of that argument is
6105 made in memory and a pointer to the argument is passed instead of
6106 the argument itself. The pointer is passed in whatever way is
6107 appropriate for passing a pointer to that type. */
6109 static bool
6110 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6111 enum machine_mode mode ATTRIBUTE_UNUSED,
6112 const_tree type, bool named ATTRIBUTE_UNUSED)
6114 /* See Windows x64 Software Convention. */
6115 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6117 int msize = (int) GET_MODE_SIZE (mode);
6118 if (type)
6120 /* Arrays are passed by reference. */
6121 if (TREE_CODE (type) == ARRAY_TYPE)
6122 return true;
6124 if (AGGREGATE_TYPE_P (type))
6126 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6127 are passed by reference. */
6128 msize = int_size_in_bytes (type);
6132 /* __m128 is passed by reference. */
6133 switch (msize) {
6134 case 1: case 2: case 4: case 8:
6135 break;
6136 default:
6137 return true;
6140 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6141 return 1;
6143 return 0;
6146 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6147 ABI. */
6148 static bool
6149 contains_aligned_value_p (tree type)
6151 enum machine_mode mode = TYPE_MODE (type);
6152 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6153 || mode == TDmode
6154 || mode == TFmode
6155 || mode == TCmode)
6156 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6157 return true;
6158 if (TYPE_ALIGN (type) < 128)
6159 return false;
6161 if (AGGREGATE_TYPE_P (type))
6163 /* Walk the aggregates recursively. */
6164 switch (TREE_CODE (type))
6166 case RECORD_TYPE:
6167 case UNION_TYPE:
6168 case QUAL_UNION_TYPE:
6170 tree field;
6172 /* Walk all the structure fields. */
6173 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6175 if (TREE_CODE (field) == FIELD_DECL
6176 && contains_aligned_value_p (TREE_TYPE (field)))
6177 return true;
6179 break;
6182 case ARRAY_TYPE:
6183 /* Just for use if some languages passes arrays by value. */
6184 if (contains_aligned_value_p (TREE_TYPE (type)))
6185 return true;
6186 break;
6188 default:
6189 gcc_unreachable ();
6192 return false;
6195 /* Gives the alignment boundary, in bits, of an argument with the
6196 specified mode and type. */
6199 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6201 int align;
6202 if (type)
6204 /* Since canonical type is used for call, we convert it to
6205 canonical type if needed. */
6206 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6207 type = TYPE_CANONICAL (type);
6208 align = TYPE_ALIGN (type);
6210 else
6211 align = GET_MODE_ALIGNMENT (mode);
6212 if (align < PARM_BOUNDARY)
6213 align = PARM_BOUNDARY;
6214 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6215 natural boundaries. */
6216 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6218 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6219 make an exception for SSE modes since these require 128bit
6220 alignment.
6222 The handling here differs from field_alignment. ICC aligns MMX
6223 arguments to 4 byte boundaries, while structure fields are aligned
6224 to 8 byte boundaries. */
6225 if (!type)
6227 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6228 align = PARM_BOUNDARY;
6230 else
6232 if (!contains_aligned_value_p (type))
6233 align = PARM_BOUNDARY;
6236 if (align > BIGGEST_ALIGNMENT)
6237 align = BIGGEST_ALIGNMENT;
6238 return align;
6241 /* Return true if N is a possible register number of function value. */
6243 bool
6244 ix86_function_value_regno_p (int regno)
6246 switch (regno)
6248 case 0:
6249 return true;
6251 case FIRST_FLOAT_REG:
6252 /* TODO: The function should depend on current function ABI but
6253 builtins.c would need updating then. Therefore we use the
6254 default ABI. */
6255 if (TARGET_64BIT && ix86_abi == MS_ABI)
6256 return false;
6257 return TARGET_FLOAT_RETURNS_IN_80387;
6259 case FIRST_SSE_REG:
6260 return TARGET_SSE;
6262 case FIRST_MMX_REG:
6263 if (TARGET_MACHO || TARGET_64BIT)
6264 return false;
6265 return TARGET_MMX;
6268 return false;
6271 /* Define how to find the value returned by a function.
6272 VALTYPE is the data type of the value (as a tree).
6273 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6274 otherwise, FUNC is 0. */
6276 static rtx
6277 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6278 const_tree fntype, const_tree fn)
6280 unsigned int regno;
6282 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6283 we normally prevent this case when mmx is not available. However
6284 some ABIs may require the result to be returned like DImode. */
6285 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6286 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6288 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6289 we prevent this case when sse is not available. However some ABIs
6290 may require the result to be returned like integer TImode. */
6291 else if (mode == TImode
6292 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6293 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6295 /* 32-byte vector modes in %ymm0. */
6296 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6297 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6299 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6300 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6301 regno = FIRST_FLOAT_REG;
6302 else
6303 /* Most things go in %eax. */
6304 regno = AX_REG;
6306 /* Override FP return register with %xmm0 for local functions when
6307 SSE math is enabled or for functions with sseregparm attribute. */
6308 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6310 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6311 if ((sse_level >= 1 && mode == SFmode)
6312 || (sse_level == 2 && mode == DFmode))
6313 regno = FIRST_SSE_REG;
6316 /* OImode shouldn't be used directly. */
6317 gcc_assert (mode != OImode);
6319 return gen_rtx_REG (orig_mode, regno);
6322 static rtx
6323 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6324 const_tree valtype)
6326 rtx ret;
6328 /* Handle libcalls, which don't provide a type node. */
6329 if (valtype == NULL)
6331 switch (mode)
6333 case SFmode:
6334 case SCmode:
6335 case DFmode:
6336 case DCmode:
6337 case TFmode:
6338 case SDmode:
6339 case DDmode:
6340 case TDmode:
6341 return gen_rtx_REG (mode, FIRST_SSE_REG);
6342 case XFmode:
6343 case XCmode:
6344 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6345 case TCmode:
6346 return NULL;
6347 default:
6348 return gen_rtx_REG (mode, AX_REG);
6352 ret = construct_container (mode, orig_mode, valtype, 1,
6353 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6354 x86_64_int_return_registers, 0);
6356 /* For zero sized structures, construct_container returns NULL, but we
6357 need to keep rest of compiler happy by returning meaningful value. */
6358 if (!ret)
6359 ret = gen_rtx_REG (orig_mode, AX_REG);
6361 return ret;
6364 static rtx
6365 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6367 unsigned int regno = AX_REG;
6369 if (TARGET_SSE)
6371 switch (GET_MODE_SIZE (mode))
6373 case 16:
6374 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6375 && !COMPLEX_MODE_P (mode))
6376 regno = FIRST_SSE_REG;
6377 break;
6378 case 8:
6379 case 4:
6380 if (mode == SFmode || mode == DFmode)
6381 regno = FIRST_SSE_REG;
6382 break;
6383 default:
6384 break;
6387 return gen_rtx_REG (orig_mode, regno);
6390 static rtx
6391 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6392 enum machine_mode orig_mode, enum machine_mode mode)
6394 const_tree fn, fntype;
6396 fn = NULL_TREE;
6397 if (fntype_or_decl && DECL_P (fntype_or_decl))
6398 fn = fntype_or_decl;
6399 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6401 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6402 return function_value_ms_64 (orig_mode, mode);
6403 else if (TARGET_64BIT)
6404 return function_value_64 (orig_mode, mode, valtype);
6405 else
6406 return function_value_32 (orig_mode, mode, fntype, fn);
6409 static rtx
6410 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6411 bool outgoing ATTRIBUTE_UNUSED)
6413 enum machine_mode mode, orig_mode;
6415 orig_mode = TYPE_MODE (valtype);
6416 mode = type_natural_mode (valtype, NULL);
6417 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6421 ix86_libcall_value (enum machine_mode mode)
6423 return ix86_function_value_1 (NULL, NULL, mode, mode);
6426 /* Return true iff type is returned in memory. */
6428 static int ATTRIBUTE_UNUSED
6429 return_in_memory_32 (const_tree type, enum machine_mode mode)
6431 HOST_WIDE_INT size;
6433 if (mode == BLKmode)
6434 return 1;
6436 size = int_size_in_bytes (type);
6438 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6439 return 0;
6441 if (VECTOR_MODE_P (mode) || mode == TImode)
6443 /* User-created vectors small enough to fit in EAX. */
6444 if (size < 8)
6445 return 0;
6447 /* MMX/3dNow values are returned in MM0,
6448 except when it doesn't exits. */
6449 if (size == 8)
6450 return (TARGET_MMX ? 0 : 1);
6452 /* SSE values are returned in XMM0, except when it doesn't exist. */
6453 if (size == 16)
6454 return (TARGET_SSE ? 0 : 1);
6456 /* AVX values are returned in YMM0, except when it doesn't exist. */
6457 if (size == 32)
6458 return TARGET_AVX ? 0 : 1;
6461 if (mode == XFmode)
6462 return 0;
6464 if (size > 12)
6465 return 1;
6467 /* OImode shouldn't be used directly. */
6468 gcc_assert (mode != OImode);
6470 return 0;
6473 static int ATTRIBUTE_UNUSED
6474 return_in_memory_64 (const_tree type, enum machine_mode mode)
6476 int needed_intregs, needed_sseregs;
6477 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6480 static int ATTRIBUTE_UNUSED
6481 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6483 HOST_WIDE_INT size = int_size_in_bytes (type);
6485 /* __m128 is returned in xmm0. */
6486 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6487 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6488 return 0;
6490 /* Otherwise, the size must be exactly in [1248]. */
6491 return (size != 1 && size != 2 && size != 4 && size != 8);
6494 static bool
6495 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6497 #ifdef SUBTARGET_RETURN_IN_MEMORY
6498 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6499 #else
6500 const enum machine_mode mode = type_natural_mode (type, NULL);
6502 if (TARGET_64BIT)
6504 if (ix86_function_type_abi (fntype) == MS_ABI)
6505 return return_in_memory_ms_64 (type, mode);
6506 else
6507 return return_in_memory_64 (type, mode);
6509 else
6510 return return_in_memory_32 (type, mode);
6511 #endif
6514 /* Return false iff TYPE is returned in memory. This version is used
6515 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6516 but differs notably in that when MMX is available, 8-byte vectors
6517 are returned in memory, rather than in MMX registers. */
6519 bool
6520 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6522 int size;
6523 enum machine_mode mode = type_natural_mode (type, NULL);
6525 if (TARGET_64BIT)
6526 return return_in_memory_64 (type, mode);
6528 if (mode == BLKmode)
6529 return 1;
6531 size = int_size_in_bytes (type);
6533 if (VECTOR_MODE_P (mode))
6535 /* Return in memory only if MMX registers *are* available. This
6536 seems backwards, but it is consistent with the existing
6537 Solaris x86 ABI. */
6538 if (size == 8)
6539 return TARGET_MMX;
6540 if (size == 16)
6541 return !TARGET_SSE;
6543 else if (mode == TImode)
6544 return !TARGET_SSE;
6545 else if (mode == XFmode)
6546 return 0;
6548 return size > 12;
6551 /* When returning SSE vector types, we have a choice of either
6552 (1) being abi incompatible with a -march switch, or
6553 (2) generating an error.
6554 Given no good solution, I think the safest thing is one warning.
6555 The user won't be able to use -Werror, but....
6557 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6558 called in response to actually generating a caller or callee that
6559 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6560 via aggregate_value_p for general type probing from tree-ssa. */
6562 static rtx
6563 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6565 static bool warnedsse, warnedmmx;
6567 if (!TARGET_64BIT && type)
6569 /* Look at the return type of the function, not the function type. */
6570 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6572 if (!TARGET_SSE && !warnedsse)
6574 if (mode == TImode
6575 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6577 warnedsse = true;
6578 warning (0, "SSE vector return without SSE enabled "
6579 "changes the ABI");
6583 if (!TARGET_MMX && !warnedmmx)
6585 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6587 warnedmmx = true;
6588 warning (0, "MMX vector return without MMX enabled "
6589 "changes the ABI");
6594 return NULL;
6598 /* Create the va_list data type. */
6600 /* Returns the calling convention specific va_list date type.
6601 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6603 static tree
6604 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6606 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6608 /* For i386 we use plain pointer to argument area. */
6609 if (!TARGET_64BIT || abi == MS_ABI)
6610 return build_pointer_type (char_type_node);
6612 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6613 type_decl = build_decl (BUILTINS_LOCATION,
6614 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6616 f_gpr = build_decl (BUILTINS_LOCATION,
6617 FIELD_DECL, get_identifier ("gp_offset"),
6618 unsigned_type_node);
6619 f_fpr = build_decl (BUILTINS_LOCATION,
6620 FIELD_DECL, get_identifier ("fp_offset"),
6621 unsigned_type_node);
6622 f_ovf = build_decl (BUILTINS_LOCATION,
6623 FIELD_DECL, get_identifier ("overflow_arg_area"),
6624 ptr_type_node);
6625 f_sav = build_decl (BUILTINS_LOCATION,
6626 FIELD_DECL, get_identifier ("reg_save_area"),
6627 ptr_type_node);
6629 va_list_gpr_counter_field = f_gpr;
6630 va_list_fpr_counter_field = f_fpr;
6632 DECL_FIELD_CONTEXT (f_gpr) = record;
6633 DECL_FIELD_CONTEXT (f_fpr) = record;
6634 DECL_FIELD_CONTEXT (f_ovf) = record;
6635 DECL_FIELD_CONTEXT (f_sav) = record;
6637 TREE_CHAIN (record) = type_decl;
6638 TYPE_NAME (record) = type_decl;
6639 TYPE_FIELDS (record) = f_gpr;
6640 TREE_CHAIN (f_gpr) = f_fpr;
6641 TREE_CHAIN (f_fpr) = f_ovf;
6642 TREE_CHAIN (f_ovf) = f_sav;
6644 layout_type (record);
6646 /* The correct type is an array type of one element. */
6647 return build_array_type (record, build_index_type (size_zero_node));
6650 /* Setup the builtin va_list data type and for 64-bit the additional
6651 calling convention specific va_list data types. */
6653 static tree
6654 ix86_build_builtin_va_list (void)
6656 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6658 /* Initialize abi specific va_list builtin types. */
6659 if (TARGET_64BIT)
6661 tree t;
6662 if (ix86_abi == MS_ABI)
6664 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6665 if (TREE_CODE (t) != RECORD_TYPE)
6666 t = build_variant_type_copy (t);
6667 sysv_va_list_type_node = t;
6669 else
6671 t = ret;
6672 if (TREE_CODE (t) != RECORD_TYPE)
6673 t = build_variant_type_copy (t);
6674 sysv_va_list_type_node = t;
6676 if (ix86_abi != MS_ABI)
6678 t = ix86_build_builtin_va_list_abi (MS_ABI);
6679 if (TREE_CODE (t) != RECORD_TYPE)
6680 t = build_variant_type_copy (t);
6681 ms_va_list_type_node = t;
6683 else
6685 t = ret;
6686 if (TREE_CODE (t) != RECORD_TYPE)
6687 t = build_variant_type_copy (t);
6688 ms_va_list_type_node = t;
6692 return ret;
6695 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6697 static void
6698 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6700 rtx save_area, mem;
6701 rtx label;
6702 rtx label_ref;
6703 rtx tmp_reg;
6704 rtx nsse_reg;
6705 alias_set_type set;
6706 int i;
6707 int regparm = ix86_regparm;
6709 if (cum->call_abi != ix86_abi)
6710 regparm = (ix86_abi != SYSV_ABI
6711 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6713 /* GPR size of varargs save area. */
6714 if (cfun->va_list_gpr_size)
6715 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6716 else
6717 ix86_varargs_gpr_size = 0;
6719 /* FPR size of varargs save area. We don't need it if we don't pass
6720 anything in SSE registers. */
6721 if (cum->sse_nregs && cfun->va_list_fpr_size)
6722 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6723 else
6724 ix86_varargs_fpr_size = 0;
6726 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6727 return;
6729 save_area = frame_pointer_rtx;
6730 set = get_varargs_alias_set ();
6732 for (i = cum->regno;
6733 i < regparm
6734 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6735 i++)
6737 mem = gen_rtx_MEM (Pmode,
6738 plus_constant (save_area, i * UNITS_PER_WORD));
6739 MEM_NOTRAP_P (mem) = 1;
6740 set_mem_alias_set (mem, set);
6741 emit_move_insn (mem, gen_rtx_REG (Pmode,
6742 x86_64_int_parameter_registers[i]));
6745 if (ix86_varargs_fpr_size)
6747 /* Now emit code to save SSE registers. The AX parameter contains number
6748 of SSE parameter registers used to call this function. We use
6749 sse_prologue_save insn template that produces computed jump across
6750 SSE saves. We need some preparation work to get this working. */
6752 label = gen_label_rtx ();
6753 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6755 /* Compute address to jump to :
6756 label - eax*4 + nnamed_sse_arguments*4 Or
6757 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6758 tmp_reg = gen_reg_rtx (Pmode);
6759 nsse_reg = gen_reg_rtx (Pmode);
6760 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6761 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6762 gen_rtx_MULT (Pmode, nsse_reg,
6763 GEN_INT (4))));
6765 /* vmovaps is one byte longer than movaps. */
6766 if (TARGET_AVX)
6767 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6768 gen_rtx_PLUS (Pmode, tmp_reg,
6769 nsse_reg)));
6771 if (cum->sse_regno)
6772 emit_move_insn
6773 (nsse_reg,
6774 gen_rtx_CONST (DImode,
6775 gen_rtx_PLUS (DImode,
6776 label_ref,
6777 GEN_INT (cum->sse_regno
6778 * (TARGET_AVX ? 5 : 4)))));
6779 else
6780 emit_move_insn (nsse_reg, label_ref);
6781 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6783 /* Compute address of memory block we save into. We always use pointer
6784 pointing 127 bytes after first byte to store - this is needed to keep
6785 instruction size limited by 4 bytes (5 bytes for AVX) with one
6786 byte displacement. */
6787 tmp_reg = gen_reg_rtx (Pmode);
6788 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6789 plus_constant (save_area,
6790 ix86_varargs_gpr_size + 127)));
6791 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6792 MEM_NOTRAP_P (mem) = 1;
6793 set_mem_alias_set (mem, set);
6794 set_mem_align (mem, BITS_PER_WORD);
6796 /* And finally do the dirty job! */
6797 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6798 GEN_INT (cum->sse_regno), label));
6802 static void
6803 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6805 alias_set_type set = get_varargs_alias_set ();
6806 int i;
6808 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
6810 rtx reg, mem;
6812 mem = gen_rtx_MEM (Pmode,
6813 plus_constant (virtual_incoming_args_rtx,
6814 i * UNITS_PER_WORD));
6815 MEM_NOTRAP_P (mem) = 1;
6816 set_mem_alias_set (mem, set);
6818 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6819 emit_move_insn (mem, reg);
6823 static void
6824 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6825 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6826 int no_rtl)
6828 CUMULATIVE_ARGS next_cum;
6829 tree fntype;
6831 /* This argument doesn't appear to be used anymore. Which is good,
6832 because the old code here didn't suppress rtl generation. */
6833 gcc_assert (!no_rtl);
6835 if (!TARGET_64BIT)
6836 return;
6838 fntype = TREE_TYPE (current_function_decl);
6840 /* For varargs, we do not want to skip the dummy va_dcl argument.
6841 For stdargs, we do want to skip the last named argument. */
6842 next_cum = *cum;
6843 if (stdarg_p (fntype))
6844 function_arg_advance (&next_cum, mode, type, 1);
6846 if (cum->call_abi == MS_ABI)
6847 setup_incoming_varargs_ms_64 (&next_cum);
6848 else
6849 setup_incoming_varargs_64 (&next_cum);
6852 /* Checks if TYPE is of kind va_list char *. */
6854 static bool
6855 is_va_list_char_pointer (tree type)
6857 tree canonic;
6859 /* For 32-bit it is always true. */
6860 if (!TARGET_64BIT)
6861 return true;
6862 canonic = ix86_canonical_va_list_type (type);
6863 return (canonic == ms_va_list_type_node
6864 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6867 /* Implement va_start. */
6869 static void
6870 ix86_va_start (tree valist, rtx nextarg)
6872 HOST_WIDE_INT words, n_gpr, n_fpr;
6873 tree f_gpr, f_fpr, f_ovf, f_sav;
6874 tree gpr, fpr, ovf, sav, t;
6875 tree type;
6877 /* Only 64bit target needs something special. */
6878 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6880 std_expand_builtin_va_start (valist, nextarg);
6881 return;
6884 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6885 f_fpr = TREE_CHAIN (f_gpr);
6886 f_ovf = TREE_CHAIN (f_fpr);
6887 f_sav = TREE_CHAIN (f_ovf);
6889 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6890 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6891 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6892 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6893 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6895 /* Count number of gp and fp argument registers used. */
6896 words = crtl->args.info.words;
6897 n_gpr = crtl->args.info.regno;
6898 n_fpr = crtl->args.info.sse_regno;
6900 if (cfun->va_list_gpr_size)
6902 type = TREE_TYPE (gpr);
6903 t = build2 (MODIFY_EXPR, type,
6904 gpr, build_int_cst (type, n_gpr * 8));
6905 TREE_SIDE_EFFECTS (t) = 1;
6906 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6909 if (TARGET_SSE && cfun->va_list_fpr_size)
6911 type = TREE_TYPE (fpr);
6912 t = build2 (MODIFY_EXPR, type, fpr,
6913 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6914 TREE_SIDE_EFFECTS (t) = 1;
6915 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6918 /* Find the overflow area. */
6919 type = TREE_TYPE (ovf);
6920 t = make_tree (type, crtl->args.internal_arg_pointer);
6921 if (words != 0)
6922 t = build2 (POINTER_PLUS_EXPR, type, t,
6923 size_int (words * UNITS_PER_WORD));
6924 t = build2 (MODIFY_EXPR, type, ovf, t);
6925 TREE_SIDE_EFFECTS (t) = 1;
6926 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6928 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6930 /* Find the register save area.
6931 Prologue of the function save it right above stack frame. */
6932 type = TREE_TYPE (sav);
6933 t = make_tree (type, frame_pointer_rtx);
6934 if (!ix86_varargs_gpr_size)
6935 t = build2 (POINTER_PLUS_EXPR, type, t,
6936 size_int (-8 * X86_64_REGPARM_MAX));
6937 t = build2 (MODIFY_EXPR, type, sav, t);
6938 TREE_SIDE_EFFECTS (t) = 1;
6939 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6943 /* Implement va_arg. */
6945 static tree
6946 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6947 gimple_seq *post_p)
6949 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6950 tree f_gpr, f_fpr, f_ovf, f_sav;
6951 tree gpr, fpr, ovf, sav, t;
6952 int size, rsize;
6953 tree lab_false, lab_over = NULL_TREE;
6954 tree addr, t2;
6955 rtx container;
6956 int indirect_p = 0;
6957 tree ptrtype;
6958 enum machine_mode nat_mode;
6959 int arg_boundary;
6961 /* Only 64bit target needs something special. */
6962 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6963 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6965 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6966 f_fpr = TREE_CHAIN (f_gpr);
6967 f_ovf = TREE_CHAIN (f_fpr);
6968 f_sav = TREE_CHAIN (f_ovf);
6970 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6971 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6972 valist = build_va_arg_indirect_ref (valist);
6973 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6974 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6975 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6977 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6978 if (indirect_p)
6979 type = build_pointer_type (type);
6980 size = int_size_in_bytes (type);
6981 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6983 nat_mode = type_natural_mode (type, NULL);
6984 switch (nat_mode)
6986 case V8SFmode:
6987 case V8SImode:
6988 case V32QImode:
6989 case V16HImode:
6990 case V4DFmode:
6991 case V4DImode:
6992 /* Unnamed 256bit vector mode parameters are passed on stack. */
6993 if (ix86_cfun_abi () == SYSV_ABI)
6995 container = NULL;
6996 break;
6999 default:
7000 container = construct_container (nat_mode, TYPE_MODE (type),
7001 type, 0, X86_64_REGPARM_MAX,
7002 X86_64_SSE_REGPARM_MAX, intreg,
7004 break;
7007 /* Pull the value out of the saved registers. */
7009 addr = create_tmp_var (ptr_type_node, "addr");
7011 if (container)
7013 int needed_intregs, needed_sseregs;
7014 bool need_temp;
7015 tree int_addr, sse_addr;
7017 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7018 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7020 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7022 need_temp = (!REG_P (container)
7023 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7024 || TYPE_ALIGN (type) > 128));
7026 /* In case we are passing structure, verify that it is consecutive block
7027 on the register save area. If not we need to do moves. */
7028 if (!need_temp && !REG_P (container))
7030 /* Verify that all registers are strictly consecutive */
7031 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7033 int i;
7035 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7037 rtx slot = XVECEXP (container, 0, i);
7038 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7039 || INTVAL (XEXP (slot, 1)) != i * 16)
7040 need_temp = 1;
7043 else
7045 int i;
7047 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7049 rtx slot = XVECEXP (container, 0, i);
7050 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7051 || INTVAL (XEXP (slot, 1)) != i * 8)
7052 need_temp = 1;
7056 if (!need_temp)
7058 int_addr = addr;
7059 sse_addr = addr;
7061 else
7063 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7064 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7067 /* First ensure that we fit completely in registers. */
7068 if (needed_intregs)
7070 t = build_int_cst (TREE_TYPE (gpr),
7071 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7072 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7073 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7074 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7075 gimplify_and_add (t, pre_p);
7077 if (needed_sseregs)
7079 t = build_int_cst (TREE_TYPE (fpr),
7080 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7081 + X86_64_REGPARM_MAX * 8);
7082 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7083 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7084 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7085 gimplify_and_add (t, pre_p);
7088 /* Compute index to start of area used for integer regs. */
7089 if (needed_intregs)
7091 /* int_addr = gpr + sav; */
7092 t = fold_convert (sizetype, gpr);
7093 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7094 gimplify_assign (int_addr, t, pre_p);
7096 if (needed_sseregs)
7098 /* sse_addr = fpr + sav; */
7099 t = fold_convert (sizetype, fpr);
7100 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7101 gimplify_assign (sse_addr, t, pre_p);
7103 if (need_temp)
7105 int i;
7106 tree temp = create_tmp_var (type, "va_arg_tmp");
7108 /* addr = &temp; */
7109 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7110 gimplify_assign (addr, t, pre_p);
7112 for (i = 0; i < XVECLEN (container, 0); i++)
7114 rtx slot = XVECEXP (container, 0, i);
7115 rtx reg = XEXP (slot, 0);
7116 enum machine_mode mode = GET_MODE (reg);
7117 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7118 tree addr_type = build_pointer_type (piece_type);
7119 tree daddr_type = build_pointer_type_for_mode (piece_type,
7120 ptr_mode, true);
7121 tree src_addr, src;
7122 int src_offset;
7123 tree dest_addr, dest;
7125 if (SSE_REGNO_P (REGNO (reg)))
7127 src_addr = sse_addr;
7128 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7130 else
7132 src_addr = int_addr;
7133 src_offset = REGNO (reg) * 8;
7135 src_addr = fold_convert (addr_type, src_addr);
7136 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7137 size_int (src_offset));
7138 src = build_va_arg_indirect_ref (src_addr);
7140 dest_addr = fold_convert (daddr_type, addr);
7141 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7142 size_int (INTVAL (XEXP (slot, 1))));
7143 dest = build_va_arg_indirect_ref (dest_addr);
7145 gimplify_assign (dest, src, pre_p);
7149 if (needed_intregs)
7151 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7152 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7153 gimplify_assign (gpr, t, pre_p);
7156 if (needed_sseregs)
7158 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7159 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7160 gimplify_assign (fpr, t, pre_p);
7163 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7165 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7168 /* ... otherwise out of the overflow area. */
7170 /* When we align parameter on stack for caller, if the parameter
7171 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7172 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7173 here with caller. */
7174 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7175 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7176 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7178 /* Care for on-stack alignment if needed. */
7179 if (arg_boundary <= 64
7180 || integer_zerop (TYPE_SIZE (type)))
7181 t = ovf;
7182 else
7184 HOST_WIDE_INT align = arg_boundary / 8;
7185 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7186 size_int (align - 1));
7187 t = fold_convert (sizetype, t);
7188 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7189 size_int (-align));
7190 t = fold_convert (TREE_TYPE (ovf), t);
7192 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7193 gimplify_assign (addr, t, pre_p);
7195 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7196 size_int (rsize * UNITS_PER_WORD));
7197 gimplify_assign (unshare_expr (ovf), t, pre_p);
7199 if (container)
7200 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7202 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7203 addr = fold_convert (ptrtype, addr);
7205 if (indirect_p)
7206 addr = build_va_arg_indirect_ref (addr);
7207 return build_va_arg_indirect_ref (addr);
7210 /* Return nonzero if OPNUM's MEM should be matched
7211 in movabs* patterns. */
7214 ix86_check_movabs (rtx insn, int opnum)
7216 rtx set, mem;
7218 set = PATTERN (insn);
7219 if (GET_CODE (set) == PARALLEL)
7220 set = XVECEXP (set, 0, 0);
7221 gcc_assert (GET_CODE (set) == SET);
7222 mem = XEXP (set, opnum);
7223 while (GET_CODE (mem) == SUBREG)
7224 mem = SUBREG_REG (mem);
7225 gcc_assert (MEM_P (mem));
7226 return (volatile_ok || !MEM_VOLATILE_P (mem));
7229 /* Initialize the table of extra 80387 mathematical constants. */
7231 static void
7232 init_ext_80387_constants (void)
7234 static const char * cst[5] =
7236 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7237 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7238 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7239 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7240 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7242 int i;
7244 for (i = 0; i < 5; i++)
7246 real_from_string (&ext_80387_constants_table[i], cst[i]);
7247 /* Ensure each constant is rounded to XFmode precision. */
7248 real_convert (&ext_80387_constants_table[i],
7249 XFmode, &ext_80387_constants_table[i]);
7252 ext_80387_constants_init = 1;
7255 /* Return true if the constant is something that can be loaded with
7256 a special instruction. */
7259 standard_80387_constant_p (rtx x)
7261 enum machine_mode mode = GET_MODE (x);
7263 REAL_VALUE_TYPE r;
7265 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7266 return -1;
7268 if (x == CONST0_RTX (mode))
7269 return 1;
7270 if (x == CONST1_RTX (mode))
7271 return 2;
7273 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7275 /* For XFmode constants, try to find a special 80387 instruction when
7276 optimizing for size or on those CPUs that benefit from them. */
7277 if (mode == XFmode
7278 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7280 int i;
7282 if (! ext_80387_constants_init)
7283 init_ext_80387_constants ();
7285 for (i = 0; i < 5; i++)
7286 if (real_identical (&r, &ext_80387_constants_table[i]))
7287 return i + 3;
7290 /* Load of the constant -0.0 or -1.0 will be split as
7291 fldz;fchs or fld1;fchs sequence. */
7292 if (real_isnegzero (&r))
7293 return 8;
7294 if (real_identical (&r, &dconstm1))
7295 return 9;
7297 return 0;
7300 /* Return the opcode of the special instruction to be used to load
7301 the constant X. */
7303 const char *
7304 standard_80387_constant_opcode (rtx x)
7306 switch (standard_80387_constant_p (x))
7308 case 1:
7309 return "fldz";
7310 case 2:
7311 return "fld1";
7312 case 3:
7313 return "fldlg2";
7314 case 4:
7315 return "fldln2";
7316 case 5:
7317 return "fldl2e";
7318 case 6:
7319 return "fldl2t";
7320 case 7:
7321 return "fldpi";
7322 case 8:
7323 case 9:
7324 return "#";
7325 default:
7326 gcc_unreachable ();
7330 /* Return the CONST_DOUBLE representing the 80387 constant that is
7331 loaded by the specified special instruction. The argument IDX
7332 matches the return value from standard_80387_constant_p. */
7335 standard_80387_constant_rtx (int idx)
7337 int i;
7339 if (! ext_80387_constants_init)
7340 init_ext_80387_constants ();
7342 switch (idx)
7344 case 3:
7345 case 4:
7346 case 5:
7347 case 6:
7348 case 7:
7349 i = idx - 3;
7350 break;
7352 default:
7353 gcc_unreachable ();
7356 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7357 XFmode);
7360 /* Return 1 if mode is a valid mode for sse. */
7361 static int
7362 standard_sse_mode_p (enum machine_mode mode)
7364 switch (mode)
7366 case V16QImode:
7367 case V8HImode:
7368 case V4SImode:
7369 case V2DImode:
7370 case V4SFmode:
7371 case V2DFmode:
7372 return 1;
7374 default:
7375 return 0;
7379 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7380 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7381 modes and AVX is enabled. */
7384 standard_sse_constant_p (rtx x)
7386 enum machine_mode mode = GET_MODE (x);
7388 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7389 return 1;
7390 if (vector_all_ones_operand (x, mode))
7392 if (standard_sse_mode_p (mode))
7393 return TARGET_SSE2 ? 2 : -2;
7394 else if (VALID_AVX256_REG_MODE (mode))
7395 return TARGET_AVX ? 3 : -3;
7398 return 0;
7401 /* Return the opcode of the special instruction to be used to load
7402 the constant X. */
7404 const char *
7405 standard_sse_constant_opcode (rtx insn, rtx x)
7407 switch (standard_sse_constant_p (x))
7409 case 1:
7410 switch (get_attr_mode (insn))
7412 case MODE_V4SF:
7413 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7414 case MODE_V2DF:
7415 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7416 case MODE_TI:
7417 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7418 case MODE_V8SF:
7419 return "vxorps\t%x0, %x0, %x0";
7420 case MODE_V4DF:
7421 return "vxorpd\t%x0, %x0, %x0";
7422 case MODE_OI:
7423 return "vpxor\t%x0, %x0, %x0";
7424 default:
7425 gcc_unreachable ();
7427 case 2:
7428 if (TARGET_AVX)
7429 switch (get_attr_mode (insn))
7431 case MODE_V4SF:
7432 case MODE_V2DF:
7433 case MODE_TI:
7434 return "vpcmpeqd\t%0, %0, %0";
7435 break;
7436 default:
7437 gcc_unreachable ();
7439 else
7440 return "pcmpeqd\t%0, %0";
7442 gcc_unreachable ();
7445 /* Returns 1 if OP contains a symbol reference */
7448 symbolic_reference_mentioned_p (rtx op)
7450 const char *fmt;
7451 int i;
7453 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7454 return 1;
7456 fmt = GET_RTX_FORMAT (GET_CODE (op));
7457 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7459 if (fmt[i] == 'E')
7461 int j;
7463 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7464 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7465 return 1;
7468 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7469 return 1;
7472 return 0;
7475 /* Return 1 if it is appropriate to emit `ret' instructions in the
7476 body of a function. Do this only if the epilogue is simple, needing a
7477 couple of insns. Prior to reloading, we can't tell how many registers
7478 must be saved, so return 0 then. Return 0 if there is no frame
7479 marker to de-allocate. */
7482 ix86_can_use_return_insn_p (void)
7484 struct ix86_frame frame;
7486 if (! reload_completed || frame_pointer_needed)
7487 return 0;
7489 /* Don't allow more than 32 pop, since that's all we can do
7490 with one instruction. */
7491 if (crtl->args.pops_args
7492 && crtl->args.size >= 32768)
7493 return 0;
7495 ix86_compute_frame_layout (&frame);
7496 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7499 /* Value should be nonzero if functions must have frame pointers.
7500 Zero means the frame pointer need not be set up (and parms may
7501 be accessed via the stack pointer) in functions that seem suitable. */
7503 static bool
7504 ix86_frame_pointer_required (void)
7506 /* If we accessed previous frames, then the generated code expects
7507 to be able to access the saved ebp value in our frame. */
7508 if (cfun->machine->accesses_prev_frame)
7509 return true;
7511 /* Several x86 os'es need a frame pointer for other reasons,
7512 usually pertaining to setjmp. */
7513 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7514 return true;
7516 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7517 the frame pointer by default. Turn it back on now if we've not
7518 got a leaf function. */
7519 if (TARGET_OMIT_LEAF_FRAME_POINTER
7520 && (!current_function_is_leaf
7521 || ix86_current_function_calls_tls_descriptor))
7522 return true;
7524 if (crtl->profile)
7525 return true;
7527 return false;
7530 /* Record that the current function accesses previous call frames. */
7532 void
7533 ix86_setup_frame_addresses (void)
7535 cfun->machine->accesses_prev_frame = 1;
7538 #ifndef USE_HIDDEN_LINKONCE
7539 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7540 # define USE_HIDDEN_LINKONCE 1
7541 # else
7542 # define USE_HIDDEN_LINKONCE 0
7543 # endif
7544 #endif
7546 static int pic_labels_used;
7548 /* Fills in the label name that should be used for a pc thunk for
7549 the given register. */
7551 static void
7552 get_pc_thunk_name (char name[32], unsigned int regno)
7554 gcc_assert (!TARGET_64BIT);
7556 if (USE_HIDDEN_LINKONCE)
7557 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7558 else
7559 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7563 /* This function generates code for -fpic that loads %ebx with
7564 the return address of the caller and then returns. */
7566 void
7567 ix86_file_end (void)
7569 rtx xops[2];
7570 int regno;
7572 for (regno = 0; regno < 8; ++regno)
7574 char name[32];
7576 if (! ((pic_labels_used >> regno) & 1))
7577 continue;
7579 get_pc_thunk_name (name, regno);
7581 #if TARGET_MACHO
7582 if (TARGET_MACHO)
7584 switch_to_section (darwin_sections[text_coal_section]);
7585 fputs ("\t.weak_definition\t", asm_out_file);
7586 assemble_name (asm_out_file, name);
7587 fputs ("\n\t.private_extern\t", asm_out_file);
7588 assemble_name (asm_out_file, name);
7589 fputs ("\n", asm_out_file);
7590 ASM_OUTPUT_LABEL (asm_out_file, name);
7592 else
7593 #endif
7594 if (USE_HIDDEN_LINKONCE)
7596 tree decl;
7598 decl = build_decl (BUILTINS_LOCATION,
7599 FUNCTION_DECL, get_identifier (name),
7600 error_mark_node);
7601 TREE_PUBLIC (decl) = 1;
7602 TREE_STATIC (decl) = 1;
7603 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7605 (*targetm.asm_out.unique_section) (decl, 0);
7606 switch_to_section (get_named_section (decl, NULL, 0));
7608 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7609 fputs ("\t.hidden\t", asm_out_file);
7610 assemble_name (asm_out_file, name);
7611 fputc ('\n', asm_out_file);
7612 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7614 else
7616 switch_to_section (text_section);
7617 ASM_OUTPUT_LABEL (asm_out_file, name);
7620 xops[0] = gen_rtx_REG (Pmode, regno);
7621 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7622 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7623 output_asm_insn ("ret", xops);
7626 if (NEED_INDICATE_EXEC_STACK)
7627 file_end_indicate_exec_stack ();
7630 /* Emit code for the SET_GOT patterns. */
7632 const char *
7633 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7635 rtx xops[3];
7637 xops[0] = dest;
7639 if (TARGET_VXWORKS_RTP && flag_pic)
7641 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7642 xops[2] = gen_rtx_MEM (Pmode,
7643 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7644 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7646 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7647 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7648 an unadorned address. */
7649 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7650 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7651 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7652 return "";
7655 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7657 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7659 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7661 if (!flag_pic)
7662 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7663 else
7664 output_asm_insn ("call\t%a2", xops);
7666 #if TARGET_MACHO
7667 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7668 is what will be referenced by the Mach-O PIC subsystem. */
7669 if (!label)
7670 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7671 #endif
7673 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7674 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7676 if (flag_pic)
7677 output_asm_insn ("pop%z0\t%0", xops);
7679 else
7681 char name[32];
7682 get_pc_thunk_name (name, REGNO (dest));
7683 pic_labels_used |= 1 << REGNO (dest);
7685 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7686 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7687 output_asm_insn ("call\t%X2", xops);
7688 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7689 is what will be referenced by the Mach-O PIC subsystem. */
7690 #if TARGET_MACHO
7691 if (!label)
7692 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7693 else
7694 targetm.asm_out.internal_label (asm_out_file, "L",
7695 CODE_LABEL_NUMBER (label));
7696 #endif
7699 if (TARGET_MACHO)
7700 return "";
7702 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7703 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7704 else
7705 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7707 return "";
7710 /* Generate an "push" pattern for input ARG. */
7712 static rtx
7713 gen_push (rtx arg)
7715 if (ix86_cfa_state->reg == stack_pointer_rtx)
7716 ix86_cfa_state->offset += UNITS_PER_WORD;
7718 return gen_rtx_SET (VOIDmode,
7719 gen_rtx_MEM (Pmode,
7720 gen_rtx_PRE_DEC (Pmode,
7721 stack_pointer_rtx)),
7722 arg);
7725 /* Return >= 0 if there is an unused call-clobbered register available
7726 for the entire function. */
7728 static unsigned int
7729 ix86_select_alt_pic_regnum (void)
7731 if (current_function_is_leaf && !crtl->profile
7732 && !ix86_current_function_calls_tls_descriptor)
7734 int i, drap;
7735 /* Can't use the same register for both PIC and DRAP. */
7736 if (crtl->drap_reg)
7737 drap = REGNO (crtl->drap_reg);
7738 else
7739 drap = -1;
7740 for (i = 2; i >= 0; --i)
7741 if (i != drap && !df_regs_ever_live_p (i))
7742 return i;
7745 return INVALID_REGNUM;
7748 /* Return 1 if we need to save REGNO. */
7749 static int
7750 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7752 if (pic_offset_table_rtx
7753 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7754 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7755 || crtl->profile
7756 || crtl->calls_eh_return
7757 || crtl->uses_const_pool))
7759 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7760 return 0;
7761 return 1;
7764 if (crtl->calls_eh_return && maybe_eh_return)
7766 unsigned i;
7767 for (i = 0; ; i++)
7769 unsigned test = EH_RETURN_DATA_REGNO (i);
7770 if (test == INVALID_REGNUM)
7771 break;
7772 if (test == regno)
7773 return 1;
7777 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7778 return 1;
7780 return (df_regs_ever_live_p (regno)
7781 && !call_used_regs[regno]
7782 && !fixed_regs[regno]
7783 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7786 /* Return number of saved general prupose registers. */
7788 static int
7789 ix86_nsaved_regs (void)
7791 int nregs = 0;
7792 int regno;
7794 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7795 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7796 nregs ++;
7797 return nregs;
7800 /* Return number of saved SSE registrers. */
7802 static int
7803 ix86_nsaved_sseregs (void)
7805 int nregs = 0;
7806 int regno;
7808 if (ix86_cfun_abi () != MS_ABI)
7809 return 0;
7810 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7811 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7812 nregs ++;
7813 return nregs;
7816 /* Given FROM and TO register numbers, say whether this elimination is
7817 allowed. If stack alignment is needed, we can only replace argument
7818 pointer with hard frame pointer, or replace frame pointer with stack
7819 pointer. Otherwise, frame pointer elimination is automatically
7820 handled and all other eliminations are valid. */
7823 ix86_can_eliminate (int from, int to)
7825 if (stack_realign_fp)
7826 return ((from == ARG_POINTER_REGNUM
7827 && to == HARD_FRAME_POINTER_REGNUM)
7828 || (from == FRAME_POINTER_REGNUM
7829 && to == STACK_POINTER_REGNUM));
7830 else
7831 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7834 /* Return the offset between two registers, one to be eliminated, and the other
7835 its replacement, at the start of a routine. */
7837 HOST_WIDE_INT
7838 ix86_initial_elimination_offset (int from, int to)
7840 struct ix86_frame frame;
7841 ix86_compute_frame_layout (&frame);
7843 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7844 return frame.hard_frame_pointer_offset;
7845 else if (from == FRAME_POINTER_REGNUM
7846 && to == HARD_FRAME_POINTER_REGNUM)
7847 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7848 else
7850 gcc_assert (to == STACK_POINTER_REGNUM);
7852 if (from == ARG_POINTER_REGNUM)
7853 return frame.stack_pointer_offset;
7855 gcc_assert (from == FRAME_POINTER_REGNUM);
7856 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7860 /* In a dynamically-aligned function, we can't know the offset from
7861 stack pointer to frame pointer, so we must ensure that setjmp
7862 eliminates fp against the hard fp (%ebp) rather than trying to
7863 index from %esp up to the top of the frame across a gap that is
7864 of unknown (at compile-time) size. */
7865 static rtx
7866 ix86_builtin_setjmp_frame_value (void)
7868 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7871 /* Fill structure ix86_frame about frame of currently computed function. */
7873 static void
7874 ix86_compute_frame_layout (struct ix86_frame *frame)
7876 HOST_WIDE_INT total_size;
7877 unsigned int stack_alignment_needed;
7878 HOST_WIDE_INT offset;
7879 unsigned int preferred_alignment;
7880 HOST_WIDE_INT size = get_frame_size ();
7882 frame->nregs = ix86_nsaved_regs ();
7883 frame->nsseregs = ix86_nsaved_sseregs ();
7884 total_size = size;
7886 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7887 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7889 /* MS ABI seem to require stack alignment to be always 16 except for function
7890 prologues. */
7891 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7893 preferred_alignment = 16;
7894 stack_alignment_needed = 16;
7895 crtl->preferred_stack_boundary = 128;
7896 crtl->stack_alignment_needed = 128;
7899 gcc_assert (!size || stack_alignment_needed);
7900 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7901 gcc_assert (preferred_alignment <= stack_alignment_needed);
7903 /* During reload iteration the amount of registers saved can change.
7904 Recompute the value as needed. Do not recompute when amount of registers
7905 didn't change as reload does multiple calls to the function and does not
7906 expect the decision to change within single iteration. */
7907 if (!optimize_function_for_size_p (cfun)
7908 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7910 int count = frame->nregs;
7912 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7913 /* The fast prologue uses move instead of push to save registers. This
7914 is significantly longer, but also executes faster as modern hardware
7915 can execute the moves in parallel, but can't do that for push/pop.
7917 Be careful about choosing what prologue to emit: When function takes
7918 many instructions to execute we may use slow version as well as in
7919 case function is known to be outside hot spot (this is known with
7920 feedback only). Weight the size of function by number of registers
7921 to save as it is cheap to use one or two push instructions but very
7922 slow to use many of them. */
7923 if (count)
7924 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7925 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7926 || (flag_branch_probabilities
7927 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7928 cfun->machine->use_fast_prologue_epilogue = false;
7929 else
7930 cfun->machine->use_fast_prologue_epilogue
7931 = !expensive_function_p (count);
7933 if (TARGET_PROLOGUE_USING_MOVE
7934 && cfun->machine->use_fast_prologue_epilogue)
7935 frame->save_regs_using_mov = true;
7936 else
7937 frame->save_regs_using_mov = false;
7940 /* Skip return address and saved base pointer. */
7941 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7943 frame->hard_frame_pointer_offset = offset;
7945 /* Set offset to aligned because the realigned frame starts from
7946 here. */
7947 if (stack_realign_fp)
7948 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7950 /* Register save area */
7951 offset += frame->nregs * UNITS_PER_WORD;
7953 /* Align SSE reg save area. */
7954 if (frame->nsseregs)
7955 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7956 else
7957 frame->padding0 = 0;
7959 /* SSE register save area. */
7960 offset += frame->padding0 + frame->nsseregs * 16;
7962 /* Va-arg area */
7963 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7964 offset += frame->va_arg_size;
7966 /* Align start of frame for local function. */
7967 frame->padding1 = ((offset + stack_alignment_needed - 1)
7968 & -stack_alignment_needed) - offset;
7970 offset += frame->padding1;
7972 /* Frame pointer points here. */
7973 frame->frame_pointer_offset = offset;
7975 offset += size;
7977 /* Add outgoing arguments area. Can be skipped if we eliminated
7978 all the function calls as dead code.
7979 Skipping is however impossible when function calls alloca. Alloca
7980 expander assumes that last crtl->outgoing_args_size
7981 of stack frame are unused. */
7982 if (ACCUMULATE_OUTGOING_ARGS
7983 && (!current_function_is_leaf || cfun->calls_alloca
7984 || ix86_current_function_calls_tls_descriptor))
7986 offset += crtl->outgoing_args_size;
7987 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7989 else
7990 frame->outgoing_arguments_size = 0;
7992 /* Align stack boundary. Only needed if we're calling another function
7993 or using alloca. */
7994 if (!current_function_is_leaf || cfun->calls_alloca
7995 || ix86_current_function_calls_tls_descriptor)
7996 frame->padding2 = ((offset + preferred_alignment - 1)
7997 & -preferred_alignment) - offset;
7998 else
7999 frame->padding2 = 0;
8001 offset += frame->padding2;
8003 /* We've reached end of stack frame. */
8004 frame->stack_pointer_offset = offset;
8006 /* Size prologue needs to allocate. */
8007 frame->to_allocate =
8008 (size + frame->padding1 + frame->padding2
8009 + frame->outgoing_arguments_size + frame->va_arg_size);
8011 if ((!frame->to_allocate && frame->nregs <= 1)
8012 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
8013 frame->save_regs_using_mov = false;
8015 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8016 && current_function_sp_is_unchanging
8017 && current_function_is_leaf
8018 && !ix86_current_function_calls_tls_descriptor)
8020 frame->red_zone_size = frame->to_allocate;
8021 if (frame->save_regs_using_mov)
8022 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8023 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8024 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8026 else
8027 frame->red_zone_size = 0;
8028 frame->to_allocate -= frame->red_zone_size;
8029 frame->stack_pointer_offset -= frame->red_zone_size;
8030 #if 0
8031 fprintf (stderr, "\n");
8032 fprintf (stderr, "size: %ld\n", (long)size);
8033 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
8034 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
8035 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
8036 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
8037 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
8038 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
8039 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
8040 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
8041 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
8042 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
8043 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
8044 (long)frame->hard_frame_pointer_offset);
8045 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
8046 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
8047 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
8048 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
8049 #endif
8052 /* Emit code to save registers in the prologue. */
8054 static void
8055 ix86_emit_save_regs (void)
8057 unsigned int regno;
8058 rtx insn;
8060 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8061 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8063 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8064 RTX_FRAME_RELATED_P (insn) = 1;
8068 /* Emit code to save registers using MOV insns. First register
8069 is restored from POINTER + OFFSET. */
8070 static void
8071 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8073 unsigned int regno;
8074 rtx insn;
8076 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8077 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8079 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8080 Pmode, offset),
8081 gen_rtx_REG (Pmode, regno));
8082 RTX_FRAME_RELATED_P (insn) = 1;
8083 offset += UNITS_PER_WORD;
8087 /* Emit code to save registers using MOV insns. First register
8088 is restored from POINTER + OFFSET. */
8089 static void
8090 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8092 unsigned int regno;
8093 rtx insn;
8094 rtx mem;
8096 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8097 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8099 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8100 set_mem_align (mem, 128);
8101 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8102 RTX_FRAME_RELATED_P (insn) = 1;
8103 offset += 16;
8107 static GTY(()) rtx queued_cfa_restores;
8109 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8110 manipulation insn. Don't add it if the previously
8111 saved value will be left untouched within stack red-zone till return,
8112 as unwinders can find the same value in the register and
8113 on the stack. */
8115 static void
8116 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8118 if (TARGET_RED_ZONE
8119 && !TARGET_64BIT_MS_ABI
8120 && red_offset + RED_ZONE_SIZE >= 0
8121 && crtl->args.pops_args < 65536)
8122 return;
8124 if (insn)
8126 add_reg_note (insn, REG_CFA_RESTORE, reg);
8127 RTX_FRAME_RELATED_P (insn) = 1;
8129 else
8130 queued_cfa_restores
8131 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8134 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8136 static void
8137 ix86_add_queued_cfa_restore_notes (rtx insn)
8139 rtx last;
8140 if (!queued_cfa_restores)
8141 return;
8142 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8144 XEXP (last, 1) = REG_NOTES (insn);
8145 REG_NOTES (insn) = queued_cfa_restores;
8146 queued_cfa_restores = NULL_RTX;
8147 RTX_FRAME_RELATED_P (insn) = 1;
8150 /* Expand prologue or epilogue stack adjustment.
8151 The pattern exist to put a dependency on all ebp-based memory accesses.
8152 STYLE should be negative if instructions should be marked as frame related,
8153 zero if %r11 register is live and cannot be freely used and positive
8154 otherwise. */
8156 static void
8157 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8158 int style, bool set_cfa)
8160 rtx insn;
8162 if (! TARGET_64BIT)
8163 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8164 else if (x86_64_immediate_operand (offset, DImode))
8165 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8166 else
8168 rtx r11;
8169 /* r11 is used by indirect sibcall return as well, set before the
8170 epilogue and used after the epilogue. ATM indirect sibcall
8171 shouldn't be used together with huge frame sizes in one
8172 function because of the frame_size check in sibcall.c. */
8173 gcc_assert (style);
8174 r11 = gen_rtx_REG (DImode, R11_REG);
8175 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8176 if (style < 0)
8177 RTX_FRAME_RELATED_P (insn) = 1;
8178 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8179 offset));
8182 if (style >= 0)
8183 ix86_add_queued_cfa_restore_notes (insn);
8185 if (set_cfa)
8187 rtx r;
8189 gcc_assert (ix86_cfa_state->reg == src);
8190 ix86_cfa_state->offset += INTVAL (offset);
8191 ix86_cfa_state->reg = dest;
8193 r = gen_rtx_PLUS (Pmode, src, offset);
8194 r = gen_rtx_SET (VOIDmode, dest, r);
8195 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8196 RTX_FRAME_RELATED_P (insn) = 1;
8198 else if (style < 0)
8199 RTX_FRAME_RELATED_P (insn) = 1;
8202 /* Find an available register to be used as dynamic realign argument
8203 pointer regsiter. Such a register will be written in prologue and
8204 used in begin of body, so it must not be
8205 1. parameter passing register.
8206 2. GOT pointer.
8207 We reuse static-chain register if it is available. Otherwise, we
8208 use DI for i386 and R13 for x86-64. We chose R13 since it has
8209 shorter encoding.
8211 Return: the regno of chosen register. */
8213 static unsigned int
8214 find_drap_reg (void)
8216 tree decl = cfun->decl;
8218 if (TARGET_64BIT)
8220 /* Use R13 for nested function or function need static chain.
8221 Since function with tail call may use any caller-saved
8222 registers in epilogue, DRAP must not use caller-saved
8223 register in such case. */
8224 if ((decl_function_context (decl)
8225 && !DECL_NO_STATIC_CHAIN (decl))
8226 || crtl->tail_call_emit)
8227 return R13_REG;
8229 return R10_REG;
8231 else
8233 /* Use DI for nested function or function need static chain.
8234 Since function with tail call may use any caller-saved
8235 registers in epilogue, DRAP must not use caller-saved
8236 register in such case. */
8237 if ((decl_function_context (decl)
8238 && !DECL_NO_STATIC_CHAIN (decl))
8239 || crtl->tail_call_emit)
8240 return DI_REG;
8242 /* Reuse static chain register if it isn't used for parameter
8243 passing. */
8244 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8245 && !lookup_attribute ("fastcall",
8246 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8247 return CX_REG;
8248 else
8249 return DI_REG;
8253 /* Update incoming stack boundary and estimated stack alignment. */
8255 static void
8256 ix86_update_stack_boundary (void)
8258 /* Prefer the one specified at command line. */
8259 ix86_incoming_stack_boundary
8260 = (ix86_user_incoming_stack_boundary
8261 ? ix86_user_incoming_stack_boundary
8262 : ix86_default_incoming_stack_boundary);
8264 /* Incoming stack alignment can be changed on individual functions
8265 via force_align_arg_pointer attribute. We use the smallest
8266 incoming stack boundary. */
8267 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8268 && lookup_attribute (ix86_force_align_arg_pointer_string,
8269 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8270 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8272 /* The incoming stack frame has to be aligned at least at
8273 parm_stack_boundary. */
8274 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8275 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8277 /* Stack at entrance of main is aligned by runtime. We use the
8278 smallest incoming stack boundary. */
8279 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8280 && DECL_NAME (current_function_decl)
8281 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8282 && DECL_FILE_SCOPE_P (current_function_decl))
8283 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8285 /* x86_64 vararg needs 16byte stack alignment for register save
8286 area. */
8287 if (TARGET_64BIT
8288 && cfun->stdarg
8289 && crtl->stack_alignment_estimated < 128)
8290 crtl->stack_alignment_estimated = 128;
8293 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8294 needed or an rtx for DRAP otherwise. */
8296 static rtx
8297 ix86_get_drap_rtx (void)
8299 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8300 crtl->need_drap = true;
8302 if (stack_realign_drap)
8304 /* Assign DRAP to vDRAP and returns vDRAP */
8305 unsigned int regno = find_drap_reg ();
8306 rtx drap_vreg;
8307 rtx arg_ptr;
8308 rtx seq, insn;
8310 arg_ptr = gen_rtx_REG (Pmode, regno);
8311 crtl->drap_reg = arg_ptr;
8313 start_sequence ();
8314 drap_vreg = copy_to_reg (arg_ptr);
8315 seq = get_insns ();
8316 end_sequence ();
8318 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8319 RTX_FRAME_RELATED_P (insn) = 1;
8320 return drap_vreg;
8322 else
8323 return NULL;
8326 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8328 static rtx
8329 ix86_internal_arg_pointer (void)
8331 return virtual_incoming_args_rtx;
8334 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8335 to be generated in correct form. */
8336 static void
8337 ix86_finalize_stack_realign_flags (void)
8339 /* Check if stack realign is really needed after reload, and
8340 stores result in cfun */
8341 unsigned int incoming_stack_boundary
8342 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8343 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8344 unsigned int stack_realign = (incoming_stack_boundary
8345 < (current_function_is_leaf
8346 ? crtl->max_used_stack_slot_alignment
8347 : crtl->stack_alignment_needed));
8349 if (crtl->stack_realign_finalized)
8351 /* After stack_realign_needed is finalized, we can't no longer
8352 change it. */
8353 gcc_assert (crtl->stack_realign_needed == stack_realign);
8355 else
8357 crtl->stack_realign_needed = stack_realign;
8358 crtl->stack_realign_finalized = true;
8362 /* Expand the prologue into a bunch of separate insns. */
8364 void
8365 ix86_expand_prologue (void)
8367 rtx insn;
8368 bool pic_reg_used;
8369 struct ix86_frame frame;
8370 HOST_WIDE_INT allocate;
8372 ix86_finalize_stack_realign_flags ();
8374 /* DRAP should not coexist with stack_realign_fp */
8375 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8377 /* Initialize CFA state for before the prologue. */
8378 ix86_cfa_state->reg = stack_pointer_rtx;
8379 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8381 ix86_compute_frame_layout (&frame);
8383 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8384 of DRAP is needed and stack realignment is really needed after reload */
8385 if (crtl->drap_reg && crtl->stack_realign_needed)
8387 rtx x, y;
8388 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8389 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8390 ? 0 : UNITS_PER_WORD);
8392 gcc_assert (stack_realign_drap);
8394 /* Grab the argument pointer. */
8395 x = plus_constant (stack_pointer_rtx,
8396 (UNITS_PER_WORD + param_ptr_offset));
8397 y = crtl->drap_reg;
8399 /* Only need to push parameter pointer reg if it is caller
8400 saved reg */
8401 if (!call_used_regs[REGNO (crtl->drap_reg)])
8403 /* Push arg pointer reg */
8404 insn = emit_insn (gen_push (y));
8405 RTX_FRAME_RELATED_P (insn) = 1;
8408 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8409 RTX_FRAME_RELATED_P (insn) = 1;
8410 ix86_cfa_state->reg = crtl->drap_reg;
8412 /* Align the stack. */
8413 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8414 stack_pointer_rtx,
8415 GEN_INT (-align_bytes)));
8416 RTX_FRAME_RELATED_P (insn) = 1;
8418 /* Replicate the return address on the stack so that return
8419 address can be reached via (argp - 1) slot. This is needed
8420 to implement macro RETURN_ADDR_RTX and intrinsic function
8421 expand_builtin_return_addr etc. */
8422 x = crtl->drap_reg;
8423 x = gen_frame_mem (Pmode,
8424 plus_constant (x, -UNITS_PER_WORD));
8425 insn = emit_insn (gen_push (x));
8426 RTX_FRAME_RELATED_P (insn) = 1;
8429 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8430 slower on all targets. Also sdb doesn't like it. */
8432 if (frame_pointer_needed)
8434 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8435 RTX_FRAME_RELATED_P (insn) = 1;
8437 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8438 RTX_FRAME_RELATED_P (insn) = 1;
8440 if (ix86_cfa_state->reg == stack_pointer_rtx)
8441 ix86_cfa_state->reg = hard_frame_pointer_rtx;
8444 if (stack_realign_fp)
8446 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8447 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8449 /* Align the stack. */
8450 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8451 stack_pointer_rtx,
8452 GEN_INT (-align_bytes)));
8453 RTX_FRAME_RELATED_P (insn) = 1;
8456 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8458 if (!frame.save_regs_using_mov)
8459 ix86_emit_save_regs ();
8460 else
8461 allocate += frame.nregs * UNITS_PER_WORD;
8463 /* When using red zone we may start register saving before allocating
8464 the stack frame saving one cycle of the prologue. However I will
8465 avoid doing this if I am going to have to probe the stack since
8466 at least on x86_64 the stack probe can turn into a call that clobbers
8467 a red zone location */
8468 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8469 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8470 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8471 && !crtl->stack_realign_needed)
8472 ? hard_frame_pointer_rtx
8473 : stack_pointer_rtx,
8474 -frame.nregs * UNITS_PER_WORD);
8476 if (allocate == 0)
8478 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8479 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8480 GEN_INT (-allocate), -1,
8481 ix86_cfa_state->reg == stack_pointer_rtx);
8482 else
8484 /* Only valid for Win32. */
8485 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8486 bool eax_live;
8487 rtx t;
8489 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8491 if (cfun->machine->call_abi == MS_ABI)
8492 eax_live = false;
8493 else
8494 eax_live = ix86_eax_live_at_start_p ();
8496 if (eax_live)
8498 emit_insn (gen_push (eax));
8499 allocate -= UNITS_PER_WORD;
8502 emit_move_insn (eax, GEN_INT (allocate));
8504 if (TARGET_64BIT)
8505 insn = gen_allocate_stack_worker_64 (eax, eax);
8506 else
8507 insn = gen_allocate_stack_worker_32 (eax, eax);
8508 insn = emit_insn (insn);
8510 if (ix86_cfa_state->reg == stack_pointer_rtx)
8512 ix86_cfa_state->offset += allocate;
8513 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8514 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8515 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8516 RTX_FRAME_RELATED_P (insn) = 1;
8519 if (eax_live)
8521 if (frame_pointer_needed)
8522 t = plus_constant (hard_frame_pointer_rtx,
8523 allocate
8524 - frame.to_allocate
8525 - frame.nregs * UNITS_PER_WORD);
8526 else
8527 t = plus_constant (stack_pointer_rtx, allocate);
8528 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8532 if (frame.save_regs_using_mov
8533 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8534 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8536 if (!frame_pointer_needed
8537 || !frame.to_allocate
8538 || crtl->stack_realign_needed)
8539 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8540 frame.to_allocate
8541 + frame.nsseregs * 16 + frame.padding0);
8542 else
8543 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8544 -frame.nregs * UNITS_PER_WORD);
8546 if (!frame_pointer_needed
8547 || !frame.to_allocate
8548 || crtl->stack_realign_needed)
8549 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8550 frame.to_allocate);
8551 else
8552 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8553 - frame.nregs * UNITS_PER_WORD
8554 - frame.nsseregs * 16
8555 - frame.padding0);
8557 pic_reg_used = false;
8558 if (pic_offset_table_rtx
8559 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8560 || crtl->profile))
8562 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8564 if (alt_pic_reg_used != INVALID_REGNUM)
8565 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8567 pic_reg_used = true;
8570 if (pic_reg_used)
8572 if (TARGET_64BIT)
8574 if (ix86_cmodel == CM_LARGE_PIC)
8576 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8577 rtx label = gen_label_rtx ();
8578 emit_label (label);
8579 LABEL_PRESERVE_P (label) = 1;
8580 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8581 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8582 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8583 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8584 pic_offset_table_rtx, tmp_reg));
8586 else
8587 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8589 else
8590 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8593 /* In the pic_reg_used case, make sure that the got load isn't deleted
8594 when mcount needs it. Blockage to avoid call movement across mcount
8595 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8596 note. */
8597 if (crtl->profile && pic_reg_used)
8598 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8600 if (crtl->drap_reg && !crtl->stack_realign_needed)
8602 /* vDRAP is setup but after reload it turns out stack realign
8603 isn't necessary, here we will emit prologue to setup DRAP
8604 without stack realign adjustment */
8605 int drap_bp_offset = UNITS_PER_WORD * 2;
8606 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8607 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8610 /* Prevent instructions from being scheduled into register save push
8611 sequence when access to the redzone area is done through frame pointer.
8612 The offset betweeh the frame pointer and the stack pointer is calculated
8613 relative to the value of the stack pointer at the end of the function
8614 prologue, and moving instructions that access redzone area via frame
8615 pointer inside push sequence violates this assumption. */
8616 if (frame_pointer_needed && frame.red_zone_size)
8617 emit_insn (gen_memory_blockage ());
8619 /* Emit cld instruction if stringops are used in the function. */
8620 if (TARGET_CLD && ix86_current_function_needs_cld)
8621 emit_insn (gen_cld ());
8624 /* Emit code to restore REG using a POP insn. */
8626 static void
8627 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
8629 rtx insn = emit_insn (ix86_gen_pop1 (reg));
8631 if (ix86_cfa_state->reg == crtl->drap_reg
8632 && REGNO (reg) == REGNO (crtl->drap_reg))
8634 /* Previously we'd represented the CFA as an expression
8635 like *(%ebp - 8). We've just popped that value from
8636 the stack, which means we need to reset the CFA to
8637 the drap register. This will remain until we restore
8638 the stack pointer. */
8639 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8640 RTX_FRAME_RELATED_P (insn) = 1;
8641 return;
8644 if (ix86_cfa_state->reg == stack_pointer_rtx)
8646 ix86_cfa_state->offset -= UNITS_PER_WORD;
8647 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8648 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8649 RTX_FRAME_RELATED_P (insn) = 1;
8652 /* When the frame pointer is the CFA, and we pop it, we are
8653 swapping back to the stack pointer as the CFA. This happens
8654 for stack frames that don't allocate other data, so we assume
8655 the stack pointer is now pointing at the return address, i.e.
8656 the function entry state, which makes the offset be 1 word. */
8657 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8658 && reg == hard_frame_pointer_rtx)
8660 ix86_cfa_state->reg = stack_pointer_rtx;
8661 ix86_cfa_state->offset = UNITS_PER_WORD;
8663 add_reg_note (insn, REG_CFA_DEF_CFA,
8664 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8665 GEN_INT (UNITS_PER_WORD)));
8666 RTX_FRAME_RELATED_P (insn) = 1;
8669 ix86_add_cfa_restore_note (insn, reg, red_offset);
8672 /* Emit code to restore saved registers using POP insns. */
8674 static void
8675 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
8677 int regno;
8679 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8680 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8682 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
8683 red_offset);
8684 red_offset += UNITS_PER_WORD;
8688 /* Emit code and notes for the LEAVE instruction. */
8690 static void
8691 ix86_emit_leave (HOST_WIDE_INT red_offset)
8693 rtx insn = emit_insn (ix86_gen_leave ());
8695 ix86_add_queued_cfa_restore_notes (insn);
8697 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8699 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8700 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8701 RTX_FRAME_RELATED_P (insn) = 1;
8702 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
8706 /* Emit code to restore saved registers using MOV insns. First register
8707 is restored from POINTER + OFFSET. */
8708 static void
8709 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8710 HOST_WIDE_INT red_offset,
8711 int maybe_eh_return)
8713 unsigned int regno;
8714 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8715 rtx insn;
8717 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8718 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8720 rtx reg = gen_rtx_REG (Pmode, regno);
8722 /* Ensure that adjust_address won't be forced to produce pointer
8723 out of range allowed by x86-64 instruction set. */
8724 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8726 rtx r11;
8728 r11 = gen_rtx_REG (DImode, R11_REG);
8729 emit_move_insn (r11, GEN_INT (offset));
8730 emit_insn (gen_adddi3 (r11, r11, pointer));
8731 base_address = gen_rtx_MEM (Pmode, r11);
8732 offset = 0;
8734 insn = emit_move_insn (reg,
8735 adjust_address (base_address, Pmode, offset));
8736 offset += UNITS_PER_WORD;
8738 if (ix86_cfa_state->reg == crtl->drap_reg
8739 && regno == REGNO (crtl->drap_reg))
8741 /* Previously we'd represented the CFA as an expression
8742 like *(%ebp - 8). We've just popped that value from
8743 the stack, which means we need to reset the CFA to
8744 the drap register. This will remain until we restore
8745 the stack pointer. */
8746 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8747 RTX_FRAME_RELATED_P (insn) = 1;
8749 else
8750 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8752 red_offset += UNITS_PER_WORD;
8756 /* Emit code to restore saved registers using MOV insns. First register
8757 is restored from POINTER + OFFSET. */
8758 static void
8759 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8760 HOST_WIDE_INT red_offset,
8761 int maybe_eh_return)
8763 int regno;
8764 rtx base_address = gen_rtx_MEM (TImode, pointer);
8765 rtx mem, insn;
8767 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8768 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8770 rtx reg = gen_rtx_REG (TImode, regno);
8772 /* Ensure that adjust_address won't be forced to produce pointer
8773 out of range allowed by x86-64 instruction set. */
8774 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8776 rtx r11;
8778 r11 = gen_rtx_REG (DImode, R11_REG);
8779 emit_move_insn (r11, GEN_INT (offset));
8780 emit_insn (gen_adddi3 (r11, r11, pointer));
8781 base_address = gen_rtx_MEM (TImode, r11);
8782 offset = 0;
8784 mem = adjust_address (base_address, TImode, offset);
8785 set_mem_align (mem, 128);
8786 insn = emit_move_insn (reg, mem);
8787 offset += 16;
8789 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8791 red_offset += 16;
8795 /* Restore function stack, frame, and registers. */
8797 void
8798 ix86_expand_epilogue (int style)
8800 int sp_valid;
8801 struct ix86_frame frame;
8802 HOST_WIDE_INT offset, red_offset;
8803 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
8804 bool using_drap;
8806 ix86_finalize_stack_realign_flags ();
8808 /* When stack is realigned, SP must be valid. */
8809 sp_valid = (!frame_pointer_needed
8810 || current_function_sp_is_unchanging
8811 || stack_realign_fp);
8813 ix86_compute_frame_layout (&frame);
8815 /* See the comment about red zone and frame
8816 pointer usage in ix86_expand_prologue. */
8817 if (frame_pointer_needed && frame.red_zone_size)
8818 emit_insn (gen_memory_blockage ());
8820 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8821 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
8823 /* Calculate start of saved registers relative to ebp. Special care
8824 must be taken for the normal return case of a function using
8825 eh_return: the eax and edx registers are marked as saved, but not
8826 restored along this path. */
8827 offset = frame.nregs;
8828 if (crtl->calls_eh_return && style != 2)
8829 offset -= 2;
8830 offset *= -UNITS_PER_WORD;
8831 offset -= frame.nsseregs * 16 + frame.padding0;
8833 /* Calculate start of saved registers relative to esp on entry of the
8834 function. When realigning stack, this needs to be the most negative
8835 value possible at runtime. */
8836 red_offset = offset;
8837 if (using_drap)
8838 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8839 + UNITS_PER_WORD;
8840 else if (stack_realign_fp)
8841 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8842 - UNITS_PER_WORD;
8843 if (frame_pointer_needed)
8844 red_offset -= UNITS_PER_WORD;
8846 /* If we're only restoring one register and sp is not valid then
8847 using a move instruction to restore the register since it's
8848 less work than reloading sp and popping the register.
8850 The default code result in stack adjustment using add/lea instruction,
8851 while this code results in LEAVE instruction (or discrete equivalent),
8852 so it is profitable in some other cases as well. Especially when there
8853 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8854 and there is exactly one register to pop. This heuristic may need some
8855 tuning in future. */
8856 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8857 || (TARGET_EPILOGUE_USING_MOVE
8858 && cfun->machine->use_fast_prologue_epilogue
8859 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8860 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8861 && frame.to_allocate)
8862 || (frame_pointer_needed && TARGET_USE_LEAVE
8863 && cfun->machine->use_fast_prologue_epilogue
8864 && (frame.nregs + frame.nsseregs) == 1)
8865 || crtl->calls_eh_return)
8867 /* Restore registers. We can use ebp or esp to address the memory
8868 locations. If both are available, default to ebp, since offsets
8869 are known to be small. Only exception is esp pointing directly
8870 to the end of block of saved registers, where we may simplify
8871 addressing mode.
8873 If we are realigning stack with bp and sp, regs restore can't
8874 be addressed by bp. sp must be used instead. */
8876 if (!frame_pointer_needed
8877 || (sp_valid && !frame.to_allocate)
8878 || stack_realign_fp)
8880 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8881 frame.to_allocate, red_offset,
8882 style == 2);
8883 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8884 frame.to_allocate
8885 + frame.nsseregs * 16
8886 + frame.padding0,
8887 red_offset
8888 + frame.nsseregs * 16
8889 + frame.padding0, style == 2);
8891 else
8893 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8894 offset, red_offset,
8895 style == 2);
8896 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8897 offset
8898 + frame.nsseregs * 16
8899 + frame.padding0,
8900 red_offset
8901 + frame.nsseregs * 16
8902 + frame.padding0, style == 2);
8905 red_offset -= offset;
8907 /* eh_return epilogues need %ecx added to the stack pointer. */
8908 if (style == 2)
8910 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8912 /* Stack align doesn't work with eh_return. */
8913 gcc_assert (!crtl->stack_realign_needed);
8915 if (frame_pointer_needed)
8917 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8918 tmp = plus_constant (tmp, UNITS_PER_WORD);
8919 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8921 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8922 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
8924 /* Note that we use SA as a temporary CFA, as the return
8925 address is at the proper place relative to it. We
8926 pretend this happens at the FP restore insn because
8927 prior to this insn the FP would be stored at the wrong
8928 offset relative to SA, and after this insn we have no
8929 other reasonable register to use for the CFA. We don't
8930 bother resetting the CFA to the SP for the duration of
8931 the return insn. */
8932 add_reg_note (tmp, REG_CFA_DEF_CFA,
8933 plus_constant (sa, UNITS_PER_WORD));
8934 ix86_add_queued_cfa_restore_notes (tmp);
8935 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8936 RTX_FRAME_RELATED_P (tmp) = 1;
8937 ix86_cfa_state->reg = sa;
8938 ix86_cfa_state->offset = UNITS_PER_WORD;
8940 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8941 const0_rtx, style, false);
8943 else
8945 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8946 tmp = plus_constant (tmp, (frame.to_allocate
8947 + frame.nregs * UNITS_PER_WORD
8948 + frame.nsseregs * 16
8949 + frame.padding0));
8950 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8951 ix86_add_queued_cfa_restore_notes (tmp);
8953 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
8954 if (ix86_cfa_state->offset != UNITS_PER_WORD)
8956 ix86_cfa_state->offset = UNITS_PER_WORD;
8957 add_reg_note (tmp, REG_CFA_DEF_CFA,
8958 plus_constant (stack_pointer_rtx,
8959 UNITS_PER_WORD));
8960 RTX_FRAME_RELATED_P (tmp) = 1;
8964 else if (!frame_pointer_needed)
8965 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8966 GEN_INT (frame.to_allocate
8967 + frame.nregs * UNITS_PER_WORD
8968 + frame.nsseregs * 16
8969 + frame.padding0),
8970 style, !using_drap);
8971 /* If not an i386, mov & pop is faster than "leave". */
8972 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8973 || !cfun->machine->use_fast_prologue_epilogue)
8974 ix86_emit_leave (red_offset);
8975 else
8977 pro_epilogue_adjust_stack (stack_pointer_rtx,
8978 hard_frame_pointer_rtx,
8979 const0_rtx, style, !using_drap);
8981 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
8984 else
8986 /* First step is to deallocate the stack frame so that we can
8987 pop the registers.
8989 If we realign stack with frame pointer, then stack pointer
8990 won't be able to recover via lea $offset(%bp), %sp, because
8991 there is a padding area between bp and sp for realign.
8992 "add $to_allocate, %sp" must be used instead. */
8993 if (!sp_valid)
8995 gcc_assert (frame_pointer_needed);
8996 gcc_assert (!stack_realign_fp);
8997 pro_epilogue_adjust_stack (stack_pointer_rtx,
8998 hard_frame_pointer_rtx,
8999 GEN_INT (offset), style, false);
9000 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9001 frame.to_allocate, red_offset,
9002 style == 2);
9003 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9004 GEN_INT (frame.nsseregs * 16),
9005 style, false);
9007 else if (frame.to_allocate || frame.nsseregs)
9009 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9010 frame.to_allocate, red_offset,
9011 style == 2);
9012 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9013 GEN_INT (frame.to_allocate
9014 + frame.nsseregs * 16
9015 + frame.padding0), style,
9016 !using_drap && !frame_pointer_needed);
9019 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
9020 + frame.padding0);
9021 red_offset -= offset;
9023 if (frame_pointer_needed)
9025 /* Leave results in shorter dependency chains on CPUs that are
9026 able to grok it fast. */
9027 if (TARGET_USE_LEAVE)
9028 ix86_emit_leave (red_offset);
9029 else
9031 /* For stack realigned really happens, recover stack
9032 pointer to hard frame pointer is a must, if not using
9033 leave. */
9034 if (stack_realign_fp)
9035 pro_epilogue_adjust_stack (stack_pointer_rtx,
9036 hard_frame_pointer_rtx,
9037 const0_rtx, style, !using_drap);
9038 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9039 red_offset);
9044 if (using_drap)
9046 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
9047 ? 0 : UNITS_PER_WORD);
9048 rtx insn;
9050 gcc_assert (stack_realign_drap);
9052 insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
9053 crtl->drap_reg,
9054 GEN_INT (-(UNITS_PER_WORD
9055 + param_ptr_offset))));
9057 ix86_cfa_state->reg = stack_pointer_rtx;
9058 ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
9060 add_reg_note (insn, REG_CFA_DEF_CFA,
9061 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9062 GEN_INT (ix86_cfa_state->offset)));
9063 RTX_FRAME_RELATED_P (insn) = 1;
9065 if (param_ptr_offset)
9066 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
9069 /* Sibcall epilogues don't want a return instruction. */
9070 if (style == 0)
9072 *ix86_cfa_state = cfa_state_save;
9073 return;
9076 if (crtl->args.pops_args && crtl->args.size)
9078 rtx popc = GEN_INT (crtl->args.pops_args);
9080 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9081 address, do explicit add, and jump indirectly to the caller. */
9083 if (crtl->args.pops_args >= 65536)
9085 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9086 rtx insn;
9088 /* There is no "pascal" calling convention in any 64bit ABI. */
9089 gcc_assert (!TARGET_64BIT);
9091 insn = emit_insn (gen_popsi1 (ecx));
9092 ix86_cfa_state->offset -= UNITS_PER_WORD;
9094 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9095 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9096 add_reg_note (insn, REG_CFA_REGISTER,
9097 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
9098 RTX_FRAME_RELATED_P (insn) = 1;
9100 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9101 popc, -1, true);
9102 emit_jump_insn (gen_return_indirect_internal (ecx));
9104 else
9105 emit_jump_insn (gen_return_pop_internal (popc));
9107 else
9108 emit_jump_insn (gen_return_internal ());
9110 /* Restore the state back to the state from the prologue,
9111 so that it's correct for the next epilogue. */
9112 *ix86_cfa_state = cfa_state_save;
9115 /* Reset from the function's potential modifications. */
9117 static void
9118 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9119 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
9121 if (pic_offset_table_rtx)
9122 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9123 #if TARGET_MACHO
9124 /* Mach-O doesn't support labels at the end of objects, so if
9125 it looks like we might want one, insert a NOP. */
9127 rtx insn = get_last_insn ();
9128 while (insn
9129 && NOTE_P (insn)
9130 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9131 insn = PREV_INSN (insn);
9132 if (insn
9133 && (LABEL_P (insn)
9134 || (NOTE_P (insn)
9135 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
9136 fputs ("\tnop\n", file);
9138 #endif
9142 /* Extract the parts of an RTL expression that is a valid memory address
9143 for an instruction. Return 0 if the structure of the address is
9144 grossly off. Return -1 if the address contains ASHIFT, so it is not
9145 strictly valid, but still used for computing length of lea instruction. */
9148 ix86_decompose_address (rtx addr, struct ix86_address *out)
9150 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9151 rtx base_reg, index_reg;
9152 HOST_WIDE_INT scale = 1;
9153 rtx scale_rtx = NULL_RTX;
9154 int retval = 1;
9155 enum ix86_address_seg seg = SEG_DEFAULT;
9157 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
9158 base = addr;
9159 else if (GET_CODE (addr) == PLUS)
9161 rtx addends[4], op;
9162 int n = 0, i;
9164 op = addr;
9167 if (n >= 4)
9168 return 0;
9169 addends[n++] = XEXP (op, 1);
9170 op = XEXP (op, 0);
9172 while (GET_CODE (op) == PLUS);
9173 if (n >= 4)
9174 return 0;
9175 addends[n] = op;
9177 for (i = n; i >= 0; --i)
9179 op = addends[i];
9180 switch (GET_CODE (op))
9182 case MULT:
9183 if (index)
9184 return 0;
9185 index = XEXP (op, 0);
9186 scale_rtx = XEXP (op, 1);
9187 break;
9189 case UNSPEC:
9190 if (XINT (op, 1) == UNSPEC_TP
9191 && TARGET_TLS_DIRECT_SEG_REFS
9192 && seg == SEG_DEFAULT)
9193 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9194 else
9195 return 0;
9196 break;
9198 case REG:
9199 case SUBREG:
9200 if (!base)
9201 base = op;
9202 else if (!index)
9203 index = op;
9204 else
9205 return 0;
9206 break;
9208 case CONST:
9209 case CONST_INT:
9210 case SYMBOL_REF:
9211 case LABEL_REF:
9212 if (disp)
9213 return 0;
9214 disp = op;
9215 break;
9217 default:
9218 return 0;
9222 else if (GET_CODE (addr) == MULT)
9224 index = XEXP (addr, 0); /* index*scale */
9225 scale_rtx = XEXP (addr, 1);
9227 else if (GET_CODE (addr) == ASHIFT)
9229 rtx tmp;
9231 /* We're called for lea too, which implements ashift on occasion. */
9232 index = XEXP (addr, 0);
9233 tmp = XEXP (addr, 1);
9234 if (!CONST_INT_P (tmp))
9235 return 0;
9236 scale = INTVAL (tmp);
9237 if ((unsigned HOST_WIDE_INT) scale > 3)
9238 return 0;
9239 scale = 1 << scale;
9240 retval = -1;
9242 else
9243 disp = addr; /* displacement */
9245 /* Extract the integral value of scale. */
9246 if (scale_rtx)
9248 if (!CONST_INT_P (scale_rtx))
9249 return 0;
9250 scale = INTVAL (scale_rtx);
9253 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9254 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9256 /* Avoid useless 0 displacement. */
9257 if (disp == const0_rtx && (base || index))
9258 disp = NULL_RTX;
9260 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9261 if (base_reg && index_reg && scale == 1
9262 && (index_reg == arg_pointer_rtx
9263 || index_reg == frame_pointer_rtx
9264 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9266 rtx tmp;
9267 tmp = base, base = index, index = tmp;
9268 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9271 /* Special case: %ebp cannot be encoded as a base without a displacement.
9272 Similarly %r13. */
9273 if (!disp
9274 && base_reg
9275 && (base_reg == hard_frame_pointer_rtx
9276 || base_reg == frame_pointer_rtx
9277 || base_reg == arg_pointer_rtx
9278 || (REG_P (base_reg)
9279 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9280 || REGNO (base_reg) == R13_REG))))
9281 disp = const0_rtx;
9283 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9284 Avoid this by transforming to [%esi+0].
9285 Reload calls address legitimization without cfun defined, so we need
9286 to test cfun for being non-NULL. */
9287 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9288 && base_reg && !index_reg && !disp
9289 && REG_P (base_reg)
9290 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
9291 disp = const0_rtx;
9293 /* Special case: encode reg+reg instead of reg*2. */
9294 if (!base && index && scale == 2)
9295 base = index, base_reg = index_reg, scale = 1;
9297 /* Special case: scaling cannot be encoded without base or displacement. */
9298 if (!base && !disp && index && scale != 1)
9299 disp = const0_rtx;
9301 out->base = base;
9302 out->index = index;
9303 out->disp = disp;
9304 out->scale = scale;
9305 out->seg = seg;
9307 return retval;
9310 /* Return cost of the memory address x.
9311 For i386, it is better to use a complex address than let gcc copy
9312 the address into a reg and make a new pseudo. But not if the address
9313 requires to two regs - that would mean more pseudos with longer
9314 lifetimes. */
9315 static int
9316 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9318 struct ix86_address parts;
9319 int cost = 1;
9320 int ok = ix86_decompose_address (x, &parts);
9322 gcc_assert (ok);
9324 if (parts.base && GET_CODE (parts.base) == SUBREG)
9325 parts.base = SUBREG_REG (parts.base);
9326 if (parts.index && GET_CODE (parts.index) == SUBREG)
9327 parts.index = SUBREG_REG (parts.index);
9329 /* Attempt to minimize number of registers in the address. */
9330 if ((parts.base
9331 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9332 || (parts.index
9333 && (!REG_P (parts.index)
9334 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9335 cost++;
9337 if (parts.base
9338 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9339 && parts.index
9340 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9341 && parts.base != parts.index)
9342 cost++;
9344 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9345 since it's predecode logic can't detect the length of instructions
9346 and it degenerates to vector decoded. Increase cost of such
9347 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9348 to split such addresses or even refuse such addresses at all.
9350 Following addressing modes are affected:
9351 [base+scale*index]
9352 [scale*index+disp]
9353 [base+index]
9355 The first and last case may be avoidable by explicitly coding the zero in
9356 memory address, but I don't have AMD-K6 machine handy to check this
9357 theory. */
9359 if (TARGET_K6
9360 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9361 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9362 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9363 cost += 10;
9365 return cost;
9368 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9369 this is used for to form addresses to local data when -fPIC is in
9370 use. */
9372 static bool
9373 darwin_local_data_pic (rtx disp)
9375 return (GET_CODE (disp) == UNSPEC
9376 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9379 /* Determine if a given RTX is a valid constant. We already know this
9380 satisfies CONSTANT_P. */
9382 bool
9383 legitimate_constant_p (rtx x)
9385 switch (GET_CODE (x))
9387 case CONST:
9388 x = XEXP (x, 0);
9390 if (GET_CODE (x) == PLUS)
9392 if (!CONST_INT_P (XEXP (x, 1)))
9393 return false;
9394 x = XEXP (x, 0);
9397 if (TARGET_MACHO && darwin_local_data_pic (x))
9398 return true;
9400 /* Only some unspecs are valid as "constants". */
9401 if (GET_CODE (x) == UNSPEC)
9402 switch (XINT (x, 1))
9404 case UNSPEC_GOT:
9405 case UNSPEC_GOTOFF:
9406 case UNSPEC_PLTOFF:
9407 return TARGET_64BIT;
9408 case UNSPEC_TPOFF:
9409 case UNSPEC_NTPOFF:
9410 x = XVECEXP (x, 0, 0);
9411 return (GET_CODE (x) == SYMBOL_REF
9412 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9413 case UNSPEC_DTPOFF:
9414 x = XVECEXP (x, 0, 0);
9415 return (GET_CODE (x) == SYMBOL_REF
9416 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9417 default:
9418 return false;
9421 /* We must have drilled down to a symbol. */
9422 if (GET_CODE (x) == LABEL_REF)
9423 return true;
9424 if (GET_CODE (x) != SYMBOL_REF)
9425 return false;
9426 /* FALLTHRU */
9428 case SYMBOL_REF:
9429 /* TLS symbols are never valid. */
9430 if (SYMBOL_REF_TLS_MODEL (x))
9431 return false;
9433 /* DLLIMPORT symbols are never valid. */
9434 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9435 && SYMBOL_REF_DLLIMPORT_P (x))
9436 return false;
9437 break;
9439 case CONST_DOUBLE:
9440 if (GET_MODE (x) == TImode
9441 && x != CONST0_RTX (TImode)
9442 && !TARGET_64BIT)
9443 return false;
9444 break;
9446 case CONST_VECTOR:
9447 if (!standard_sse_constant_p (x))
9448 return false;
9450 default:
9451 break;
9454 /* Otherwise we handle everything else in the move patterns. */
9455 return true;
9458 /* Determine if it's legal to put X into the constant pool. This
9459 is not possible for the address of thread-local symbols, which
9460 is checked above. */
9462 static bool
9463 ix86_cannot_force_const_mem (rtx x)
9465 /* We can always put integral constants and vectors in memory. */
9466 switch (GET_CODE (x))
9468 case CONST_INT:
9469 case CONST_DOUBLE:
9470 case CONST_VECTOR:
9471 return false;
9473 default:
9474 break;
9476 return !legitimate_constant_p (x);
9480 /* Nonzero if the constant value X is a legitimate general operand
9481 when generating PIC code. It is given that flag_pic is on and
9482 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9484 bool
9485 legitimate_pic_operand_p (rtx x)
9487 rtx inner;
9489 switch (GET_CODE (x))
9491 case CONST:
9492 inner = XEXP (x, 0);
9493 if (GET_CODE (inner) == PLUS
9494 && CONST_INT_P (XEXP (inner, 1)))
9495 inner = XEXP (inner, 0);
9497 /* Only some unspecs are valid as "constants". */
9498 if (GET_CODE (inner) == UNSPEC)
9499 switch (XINT (inner, 1))
9501 case UNSPEC_GOT:
9502 case UNSPEC_GOTOFF:
9503 case UNSPEC_PLTOFF:
9504 return TARGET_64BIT;
9505 case UNSPEC_TPOFF:
9506 x = XVECEXP (inner, 0, 0);
9507 return (GET_CODE (x) == SYMBOL_REF
9508 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9509 case UNSPEC_MACHOPIC_OFFSET:
9510 return legitimate_pic_address_disp_p (x);
9511 default:
9512 return false;
9514 /* FALLTHRU */
9516 case SYMBOL_REF:
9517 case LABEL_REF:
9518 return legitimate_pic_address_disp_p (x);
9520 default:
9521 return true;
9525 /* Determine if a given CONST RTX is a valid memory displacement
9526 in PIC mode. */
9529 legitimate_pic_address_disp_p (rtx disp)
9531 bool saw_plus;
9533 /* In 64bit mode we can allow direct addresses of symbols and labels
9534 when they are not dynamic symbols. */
9535 if (TARGET_64BIT)
9537 rtx op0 = disp, op1;
9539 switch (GET_CODE (disp))
9541 case LABEL_REF:
9542 return true;
9544 case CONST:
9545 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9546 break;
9547 op0 = XEXP (XEXP (disp, 0), 0);
9548 op1 = XEXP (XEXP (disp, 0), 1);
9549 if (!CONST_INT_P (op1)
9550 || INTVAL (op1) >= 16*1024*1024
9551 || INTVAL (op1) < -16*1024*1024)
9552 break;
9553 if (GET_CODE (op0) == LABEL_REF)
9554 return true;
9555 if (GET_CODE (op0) != SYMBOL_REF)
9556 break;
9557 /* FALLTHRU */
9559 case SYMBOL_REF:
9560 /* TLS references should always be enclosed in UNSPEC. */
9561 if (SYMBOL_REF_TLS_MODEL (op0))
9562 return false;
9563 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9564 && ix86_cmodel != CM_LARGE_PIC)
9565 return true;
9566 break;
9568 default:
9569 break;
9572 if (GET_CODE (disp) != CONST)
9573 return 0;
9574 disp = XEXP (disp, 0);
9576 if (TARGET_64BIT)
9578 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9579 of GOT tables. We should not need these anyway. */
9580 if (GET_CODE (disp) != UNSPEC
9581 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9582 && XINT (disp, 1) != UNSPEC_GOTOFF
9583 && XINT (disp, 1) != UNSPEC_PLTOFF))
9584 return 0;
9586 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9587 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9588 return 0;
9589 return 1;
9592 saw_plus = false;
9593 if (GET_CODE (disp) == PLUS)
9595 if (!CONST_INT_P (XEXP (disp, 1)))
9596 return 0;
9597 disp = XEXP (disp, 0);
9598 saw_plus = true;
9601 if (TARGET_MACHO && darwin_local_data_pic (disp))
9602 return 1;
9604 if (GET_CODE (disp) != UNSPEC)
9605 return 0;
9607 switch (XINT (disp, 1))
9609 case UNSPEC_GOT:
9610 if (saw_plus)
9611 return false;
9612 /* We need to check for both symbols and labels because VxWorks loads
9613 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9614 details. */
9615 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9616 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9617 case UNSPEC_GOTOFF:
9618 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9619 While ABI specify also 32bit relocation but we don't produce it in
9620 small PIC model at all. */
9621 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9622 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9623 && !TARGET_64BIT)
9624 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9625 return false;
9626 case UNSPEC_GOTTPOFF:
9627 case UNSPEC_GOTNTPOFF:
9628 case UNSPEC_INDNTPOFF:
9629 if (saw_plus)
9630 return false;
9631 disp = XVECEXP (disp, 0, 0);
9632 return (GET_CODE (disp) == SYMBOL_REF
9633 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9634 case UNSPEC_NTPOFF:
9635 disp = XVECEXP (disp, 0, 0);
9636 return (GET_CODE (disp) == SYMBOL_REF
9637 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9638 case UNSPEC_DTPOFF:
9639 disp = XVECEXP (disp, 0, 0);
9640 return (GET_CODE (disp) == SYMBOL_REF
9641 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9644 return 0;
9647 /* Recognizes RTL expressions that are valid memory addresses for an
9648 instruction. The MODE argument is the machine mode for the MEM
9649 expression that wants to use this address.
9651 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9652 convert common non-canonical forms to canonical form so that they will
9653 be recognized. */
9655 static bool
9656 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9657 rtx addr, bool strict)
9659 struct ix86_address parts;
9660 rtx base, index, disp;
9661 HOST_WIDE_INT scale;
9662 const char *reason = NULL;
9663 rtx reason_rtx = NULL_RTX;
9665 if (ix86_decompose_address (addr, &parts) <= 0)
9667 reason = "decomposition failed";
9668 goto report_error;
9671 base = parts.base;
9672 index = parts.index;
9673 disp = parts.disp;
9674 scale = parts.scale;
9676 /* Validate base register.
9678 Don't allow SUBREG's that span more than a word here. It can lead to spill
9679 failures when the base is one word out of a two word structure, which is
9680 represented internally as a DImode int. */
9682 if (base)
9684 rtx reg;
9685 reason_rtx = base;
9687 if (REG_P (base))
9688 reg = base;
9689 else if (GET_CODE (base) == SUBREG
9690 && REG_P (SUBREG_REG (base))
9691 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9692 <= UNITS_PER_WORD)
9693 reg = SUBREG_REG (base);
9694 else
9696 reason = "base is not a register";
9697 goto report_error;
9700 if (GET_MODE (base) != Pmode)
9702 reason = "base is not in Pmode";
9703 goto report_error;
9706 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9707 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9709 reason = "base is not valid";
9710 goto report_error;
9714 /* Validate index register.
9716 Don't allow SUBREG's that span more than a word here -- same as above. */
9718 if (index)
9720 rtx reg;
9721 reason_rtx = index;
9723 if (REG_P (index))
9724 reg = index;
9725 else if (GET_CODE (index) == SUBREG
9726 && REG_P (SUBREG_REG (index))
9727 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9728 <= UNITS_PER_WORD)
9729 reg = SUBREG_REG (index);
9730 else
9732 reason = "index is not a register";
9733 goto report_error;
9736 if (GET_MODE (index) != Pmode)
9738 reason = "index is not in Pmode";
9739 goto report_error;
9742 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9743 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9745 reason = "index is not valid";
9746 goto report_error;
9750 /* Validate scale factor. */
9751 if (scale != 1)
9753 reason_rtx = GEN_INT (scale);
9754 if (!index)
9756 reason = "scale without index";
9757 goto report_error;
9760 if (scale != 2 && scale != 4 && scale != 8)
9762 reason = "scale is not a valid multiplier";
9763 goto report_error;
9767 /* Validate displacement. */
9768 if (disp)
9770 reason_rtx = disp;
9772 if (GET_CODE (disp) == CONST
9773 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9774 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9775 switch (XINT (XEXP (disp, 0), 1))
9777 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9778 used. While ABI specify also 32bit relocations, we don't produce
9779 them at all and use IP relative instead. */
9780 case UNSPEC_GOT:
9781 case UNSPEC_GOTOFF:
9782 gcc_assert (flag_pic);
9783 if (!TARGET_64BIT)
9784 goto is_legitimate_pic;
9785 reason = "64bit address unspec";
9786 goto report_error;
9788 case UNSPEC_GOTPCREL:
9789 gcc_assert (flag_pic);
9790 goto is_legitimate_pic;
9792 case UNSPEC_GOTTPOFF:
9793 case UNSPEC_GOTNTPOFF:
9794 case UNSPEC_INDNTPOFF:
9795 case UNSPEC_NTPOFF:
9796 case UNSPEC_DTPOFF:
9797 break;
9799 default:
9800 reason = "invalid address unspec";
9801 goto report_error;
9804 else if (SYMBOLIC_CONST (disp)
9805 && (flag_pic
9806 || (TARGET_MACHO
9807 #if TARGET_MACHO
9808 && MACHOPIC_INDIRECT
9809 && !machopic_operand_p (disp)
9810 #endif
9814 is_legitimate_pic:
9815 if (TARGET_64BIT && (index || base))
9817 /* foo@dtpoff(%rX) is ok. */
9818 if (GET_CODE (disp) != CONST
9819 || GET_CODE (XEXP (disp, 0)) != PLUS
9820 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9821 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9822 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9823 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9825 reason = "non-constant pic memory reference";
9826 goto report_error;
9829 else if (! legitimate_pic_address_disp_p (disp))
9831 reason = "displacement is an invalid pic construct";
9832 goto report_error;
9835 /* This code used to verify that a symbolic pic displacement
9836 includes the pic_offset_table_rtx register.
9838 While this is good idea, unfortunately these constructs may
9839 be created by "adds using lea" optimization for incorrect
9840 code like:
9842 int a;
9843 int foo(int i)
9845 return *(&a+i);
9848 This code is nonsensical, but results in addressing
9849 GOT table with pic_offset_table_rtx base. We can't
9850 just refuse it easily, since it gets matched by
9851 "addsi3" pattern, that later gets split to lea in the
9852 case output register differs from input. While this
9853 can be handled by separate addsi pattern for this case
9854 that never results in lea, this seems to be easier and
9855 correct fix for crash to disable this test. */
9857 else if (GET_CODE (disp) != LABEL_REF
9858 && !CONST_INT_P (disp)
9859 && (GET_CODE (disp) != CONST
9860 || !legitimate_constant_p (disp))
9861 && (GET_CODE (disp) != SYMBOL_REF
9862 || !legitimate_constant_p (disp)))
9864 reason = "displacement is not constant";
9865 goto report_error;
9867 else if (TARGET_64BIT
9868 && !x86_64_immediate_operand (disp, VOIDmode))
9870 reason = "displacement is out of range";
9871 goto report_error;
9875 /* Everything looks valid. */
9876 return TRUE;
9878 report_error:
9879 return FALSE;
9882 /* Determine if a given RTX is a valid constant address. */
9884 bool
9885 constant_address_p (rtx x)
9887 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9890 /* Return a unique alias set for the GOT. */
9892 static alias_set_type
9893 ix86_GOT_alias_set (void)
9895 static alias_set_type set = -1;
9896 if (set == -1)
9897 set = new_alias_set ();
9898 return set;
9901 /* Return a legitimate reference for ORIG (an address) using the
9902 register REG. If REG is 0, a new pseudo is generated.
9904 There are two types of references that must be handled:
9906 1. Global data references must load the address from the GOT, via
9907 the PIC reg. An insn is emitted to do this load, and the reg is
9908 returned.
9910 2. Static data references, constant pool addresses, and code labels
9911 compute the address as an offset from the GOT, whose base is in
9912 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9913 differentiate them from global data objects. The returned
9914 address is the PIC reg + an unspec constant.
9916 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9917 reg also appears in the address. */
9919 static rtx
9920 legitimize_pic_address (rtx orig, rtx reg)
9922 rtx addr = orig;
9923 rtx new_rtx = orig;
9924 rtx base;
9926 #if TARGET_MACHO
9927 if (TARGET_MACHO && !TARGET_64BIT)
9929 if (reg == 0)
9930 reg = gen_reg_rtx (Pmode);
9931 /* Use the generic Mach-O PIC machinery. */
9932 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9934 #endif
9936 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9937 new_rtx = addr;
9938 else if (TARGET_64BIT
9939 && ix86_cmodel != CM_SMALL_PIC
9940 && gotoff_operand (addr, Pmode))
9942 rtx tmpreg;
9943 /* This symbol may be referenced via a displacement from the PIC
9944 base address (@GOTOFF). */
9946 if (reload_in_progress)
9947 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9948 if (GET_CODE (addr) == CONST)
9949 addr = XEXP (addr, 0);
9950 if (GET_CODE (addr) == PLUS)
9952 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9953 UNSPEC_GOTOFF);
9954 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9956 else
9957 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9958 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9959 if (!reg)
9960 tmpreg = gen_reg_rtx (Pmode);
9961 else
9962 tmpreg = reg;
9963 emit_move_insn (tmpreg, new_rtx);
9965 if (reg != 0)
9967 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9968 tmpreg, 1, OPTAB_DIRECT);
9969 new_rtx = reg;
9971 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9973 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9975 /* This symbol may be referenced via a displacement from the PIC
9976 base address (@GOTOFF). */
9978 if (reload_in_progress)
9979 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9980 if (GET_CODE (addr) == CONST)
9981 addr = XEXP (addr, 0);
9982 if (GET_CODE (addr) == PLUS)
9984 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9985 UNSPEC_GOTOFF);
9986 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9988 else
9989 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9990 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9991 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9993 if (reg != 0)
9995 emit_move_insn (reg, new_rtx);
9996 new_rtx = reg;
9999 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10000 /* We can't use @GOTOFF for text labels on VxWorks;
10001 see gotoff_operand. */
10002 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10004 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10006 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
10007 return legitimize_dllimport_symbol (addr, true);
10008 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
10009 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
10010 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
10012 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
10013 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
10017 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10019 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
10020 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10021 new_rtx = gen_const_mem (Pmode, new_rtx);
10022 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10024 if (reg == 0)
10025 reg = gen_reg_rtx (Pmode);
10026 /* Use directly gen_movsi, otherwise the address is loaded
10027 into register for CSE. We don't want to CSE this addresses,
10028 instead we CSE addresses from the GOT table, so skip this. */
10029 emit_insn (gen_movsi (reg, new_rtx));
10030 new_rtx = reg;
10032 else
10034 /* This symbol must be referenced via a load from the
10035 Global Offset Table (@GOT). */
10037 if (reload_in_progress)
10038 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10039 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10040 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10041 if (TARGET_64BIT)
10042 new_rtx = force_reg (Pmode, new_rtx);
10043 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10044 new_rtx = gen_const_mem (Pmode, new_rtx);
10045 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10047 if (reg == 0)
10048 reg = gen_reg_rtx (Pmode);
10049 emit_move_insn (reg, new_rtx);
10050 new_rtx = reg;
10053 else
10055 if (CONST_INT_P (addr)
10056 && !x86_64_immediate_operand (addr, VOIDmode))
10058 if (reg)
10060 emit_move_insn (reg, addr);
10061 new_rtx = reg;
10063 else
10064 new_rtx = force_reg (Pmode, addr);
10066 else if (GET_CODE (addr) == CONST)
10068 addr = XEXP (addr, 0);
10070 /* We must match stuff we generate before. Assume the only
10071 unspecs that can get here are ours. Not that we could do
10072 anything with them anyway.... */
10073 if (GET_CODE (addr) == UNSPEC
10074 || (GET_CODE (addr) == PLUS
10075 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10076 return orig;
10077 gcc_assert (GET_CODE (addr) == PLUS);
10079 if (GET_CODE (addr) == PLUS)
10081 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10083 /* Check first to see if this is a constant offset from a @GOTOFF
10084 symbol reference. */
10085 if (gotoff_operand (op0, Pmode)
10086 && CONST_INT_P (op1))
10088 if (!TARGET_64BIT)
10090 if (reload_in_progress)
10091 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10092 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10093 UNSPEC_GOTOFF);
10094 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10095 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10096 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10098 if (reg != 0)
10100 emit_move_insn (reg, new_rtx);
10101 new_rtx = reg;
10104 else
10106 if (INTVAL (op1) < -16*1024*1024
10107 || INTVAL (op1) >= 16*1024*1024)
10109 if (!x86_64_immediate_operand (op1, Pmode))
10110 op1 = force_reg (Pmode, op1);
10111 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10115 else
10117 base = legitimize_pic_address (XEXP (addr, 0), reg);
10118 new_rtx = legitimize_pic_address (XEXP (addr, 1),
10119 base == reg ? NULL_RTX : reg);
10121 if (CONST_INT_P (new_rtx))
10122 new_rtx = plus_constant (base, INTVAL (new_rtx));
10123 else
10125 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
10127 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
10128 new_rtx = XEXP (new_rtx, 1);
10130 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
10135 return new_rtx;
10138 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10140 static rtx
10141 get_thread_pointer (int to_reg)
10143 rtx tp, reg, insn;
10145 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10146 if (!to_reg)
10147 return tp;
10149 reg = gen_reg_rtx (Pmode);
10150 insn = gen_rtx_SET (VOIDmode, reg, tp);
10151 insn = emit_insn (insn);
10153 return reg;
10156 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10157 false if we expect this to be used for a memory address and true if
10158 we expect to load the address into a register. */
10160 static rtx
10161 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
10163 rtx dest, base, off, pic, tp;
10164 int type;
10166 switch (model)
10168 case TLS_MODEL_GLOBAL_DYNAMIC:
10169 dest = gen_reg_rtx (Pmode);
10170 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10172 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10174 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
10176 start_sequence ();
10177 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
10178 insns = get_insns ();
10179 end_sequence ();
10181 RTL_CONST_CALL_P (insns) = 1;
10182 emit_libcall_block (insns, dest, rax, x);
10184 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10185 emit_insn (gen_tls_global_dynamic_64 (dest, x));
10186 else
10187 emit_insn (gen_tls_global_dynamic_32 (dest, x));
10189 if (TARGET_GNU2_TLS)
10191 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10193 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10195 break;
10197 case TLS_MODEL_LOCAL_DYNAMIC:
10198 base = gen_reg_rtx (Pmode);
10199 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10201 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10203 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10205 start_sequence ();
10206 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10207 insns = get_insns ();
10208 end_sequence ();
10210 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10211 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10212 RTL_CONST_CALL_P (insns) = 1;
10213 emit_libcall_block (insns, base, rax, note);
10215 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10216 emit_insn (gen_tls_local_dynamic_base_64 (base));
10217 else
10218 emit_insn (gen_tls_local_dynamic_base_32 (base));
10220 if (TARGET_GNU2_TLS)
10222 rtx x = ix86_tls_module_base ();
10224 set_unique_reg_note (get_last_insn (), REG_EQUIV,
10225 gen_rtx_MINUS (Pmode, x, tp));
10228 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10229 off = gen_rtx_CONST (Pmode, off);
10231 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10233 if (TARGET_GNU2_TLS)
10235 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10237 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10240 break;
10242 case TLS_MODEL_INITIAL_EXEC:
10243 if (TARGET_64BIT)
10245 pic = NULL;
10246 type = UNSPEC_GOTNTPOFF;
10248 else if (flag_pic)
10250 if (reload_in_progress)
10251 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10252 pic = pic_offset_table_rtx;
10253 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10255 else if (!TARGET_ANY_GNU_TLS)
10257 pic = gen_reg_rtx (Pmode);
10258 emit_insn (gen_set_got (pic));
10259 type = UNSPEC_GOTTPOFF;
10261 else
10263 pic = NULL;
10264 type = UNSPEC_INDNTPOFF;
10267 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10268 off = gen_rtx_CONST (Pmode, off);
10269 if (pic)
10270 off = gen_rtx_PLUS (Pmode, pic, off);
10271 off = gen_const_mem (Pmode, off);
10272 set_mem_alias_set (off, ix86_GOT_alias_set ());
10274 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10276 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10277 off = force_reg (Pmode, off);
10278 return gen_rtx_PLUS (Pmode, base, off);
10280 else
10282 base = get_thread_pointer (true);
10283 dest = gen_reg_rtx (Pmode);
10284 emit_insn (gen_subsi3 (dest, base, off));
10286 break;
10288 case TLS_MODEL_LOCAL_EXEC:
10289 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10290 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10291 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10292 off = gen_rtx_CONST (Pmode, off);
10294 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10296 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10297 return gen_rtx_PLUS (Pmode, base, off);
10299 else
10301 base = get_thread_pointer (true);
10302 dest = gen_reg_rtx (Pmode);
10303 emit_insn (gen_subsi3 (dest, base, off));
10305 break;
10307 default:
10308 gcc_unreachable ();
10311 return dest;
10314 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10315 to symbol DECL. */
10317 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10318 htab_t dllimport_map;
10320 static tree
10321 get_dllimport_decl (tree decl)
10323 struct tree_map *h, in;
10324 void **loc;
10325 const char *name;
10326 const char *prefix;
10327 size_t namelen, prefixlen;
10328 char *imp_name;
10329 tree to;
10330 rtx rtl;
10332 if (!dllimport_map)
10333 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10335 in.hash = htab_hash_pointer (decl);
10336 in.base.from = decl;
10337 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10338 h = (struct tree_map *) *loc;
10339 if (h)
10340 return h->to;
10342 *loc = h = GGC_NEW (struct tree_map);
10343 h->hash = in.hash;
10344 h->base.from = decl;
10345 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
10346 VAR_DECL, NULL, ptr_type_node);
10347 DECL_ARTIFICIAL (to) = 1;
10348 DECL_IGNORED_P (to) = 1;
10349 DECL_EXTERNAL (to) = 1;
10350 TREE_READONLY (to) = 1;
10352 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10353 name = targetm.strip_name_encoding (name);
10354 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10355 ? "*__imp_" : "*__imp__";
10356 namelen = strlen (name);
10357 prefixlen = strlen (prefix);
10358 imp_name = (char *) alloca (namelen + prefixlen + 1);
10359 memcpy (imp_name, prefix, prefixlen);
10360 memcpy (imp_name + prefixlen, name, namelen + 1);
10362 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10363 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10364 SET_SYMBOL_REF_DECL (rtl, to);
10365 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10367 rtl = gen_const_mem (Pmode, rtl);
10368 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10370 SET_DECL_RTL (to, rtl);
10371 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10373 return to;
10376 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10377 true if we require the result be a register. */
10379 static rtx
10380 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10382 tree imp_decl;
10383 rtx x;
10385 gcc_assert (SYMBOL_REF_DECL (symbol));
10386 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10388 x = DECL_RTL (imp_decl);
10389 if (want_reg)
10390 x = force_reg (Pmode, x);
10391 return x;
10394 /* Try machine-dependent ways of modifying an illegitimate address
10395 to be legitimate. If we find one, return the new, valid address.
10396 This macro is used in only one place: `memory_address' in explow.c.
10398 OLDX is the address as it was before break_out_memory_refs was called.
10399 In some cases it is useful to look at this to decide what needs to be done.
10401 It is always safe for this macro to do nothing. It exists to recognize
10402 opportunities to optimize the output.
10404 For the 80386, we handle X+REG by loading X into a register R and
10405 using R+REG. R will go in a general reg and indexing will be used.
10406 However, if REG is a broken-out memory address or multiplication,
10407 nothing needs to be done because REG can certainly go in a general reg.
10409 When -fpic is used, special handling is needed for symbolic references.
10410 See comments by legitimize_pic_address in i386.c for details. */
10412 static rtx
10413 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10414 enum machine_mode mode)
10416 int changed = 0;
10417 unsigned log;
10419 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10420 if (log)
10421 return legitimize_tls_address (x, (enum tls_model) log, false);
10422 if (GET_CODE (x) == CONST
10423 && GET_CODE (XEXP (x, 0)) == PLUS
10424 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10425 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10427 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10428 (enum tls_model) log, false);
10429 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10432 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10434 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10435 return legitimize_dllimport_symbol (x, true);
10436 if (GET_CODE (x) == CONST
10437 && GET_CODE (XEXP (x, 0)) == PLUS
10438 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10439 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10441 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10442 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10446 if (flag_pic && SYMBOLIC_CONST (x))
10447 return legitimize_pic_address (x, 0);
10449 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10450 if (GET_CODE (x) == ASHIFT
10451 && CONST_INT_P (XEXP (x, 1))
10452 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10454 changed = 1;
10455 log = INTVAL (XEXP (x, 1));
10456 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10457 GEN_INT (1 << log));
10460 if (GET_CODE (x) == PLUS)
10462 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10464 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10465 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10466 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10468 changed = 1;
10469 log = INTVAL (XEXP (XEXP (x, 0), 1));
10470 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10471 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10472 GEN_INT (1 << log));
10475 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10476 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10477 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10479 changed = 1;
10480 log = INTVAL (XEXP (XEXP (x, 1), 1));
10481 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10482 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10483 GEN_INT (1 << log));
10486 /* Put multiply first if it isn't already. */
10487 if (GET_CODE (XEXP (x, 1)) == MULT)
10489 rtx tmp = XEXP (x, 0);
10490 XEXP (x, 0) = XEXP (x, 1);
10491 XEXP (x, 1) = tmp;
10492 changed = 1;
10495 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10496 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10497 created by virtual register instantiation, register elimination, and
10498 similar optimizations. */
10499 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10501 changed = 1;
10502 x = gen_rtx_PLUS (Pmode,
10503 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10504 XEXP (XEXP (x, 1), 0)),
10505 XEXP (XEXP (x, 1), 1));
10508 /* Canonicalize
10509 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10510 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10511 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10512 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10513 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10514 && CONSTANT_P (XEXP (x, 1)))
10516 rtx constant;
10517 rtx other = NULL_RTX;
10519 if (CONST_INT_P (XEXP (x, 1)))
10521 constant = XEXP (x, 1);
10522 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10524 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10526 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10527 other = XEXP (x, 1);
10529 else
10530 constant = 0;
10532 if (constant)
10534 changed = 1;
10535 x = gen_rtx_PLUS (Pmode,
10536 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10537 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10538 plus_constant (other, INTVAL (constant)));
10542 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10543 return x;
10545 if (GET_CODE (XEXP (x, 0)) == MULT)
10547 changed = 1;
10548 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10551 if (GET_CODE (XEXP (x, 1)) == MULT)
10553 changed = 1;
10554 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10557 if (changed
10558 && REG_P (XEXP (x, 1))
10559 && REG_P (XEXP (x, 0)))
10560 return x;
10562 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10564 changed = 1;
10565 x = legitimize_pic_address (x, 0);
10568 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10569 return x;
10571 if (REG_P (XEXP (x, 0)))
10573 rtx temp = gen_reg_rtx (Pmode);
10574 rtx val = force_operand (XEXP (x, 1), temp);
10575 if (val != temp)
10576 emit_move_insn (temp, val);
10578 XEXP (x, 1) = temp;
10579 return x;
10582 else if (REG_P (XEXP (x, 1)))
10584 rtx temp = gen_reg_rtx (Pmode);
10585 rtx val = force_operand (XEXP (x, 0), temp);
10586 if (val != temp)
10587 emit_move_insn (temp, val);
10589 XEXP (x, 0) = temp;
10590 return x;
10594 return x;
10597 /* Print an integer constant expression in assembler syntax. Addition
10598 and subtraction are the only arithmetic that may appear in these
10599 expressions. FILE is the stdio stream to write to, X is the rtx, and
10600 CODE is the operand print code from the output string. */
10602 static void
10603 output_pic_addr_const (FILE *file, rtx x, int code)
10605 char buf[256];
10607 switch (GET_CODE (x))
10609 case PC:
10610 gcc_assert (flag_pic);
10611 putc ('.', file);
10612 break;
10614 case SYMBOL_REF:
10615 if (! TARGET_MACHO || TARGET_64BIT)
10616 output_addr_const (file, x);
10617 else
10619 const char *name = XSTR (x, 0);
10621 /* Mark the decl as referenced so that cgraph will
10622 output the function. */
10623 if (SYMBOL_REF_DECL (x))
10624 mark_decl_referenced (SYMBOL_REF_DECL (x));
10626 #if TARGET_MACHO
10627 if (MACHOPIC_INDIRECT
10628 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10629 name = machopic_indirection_name (x, /*stub_p=*/true);
10630 #endif
10631 assemble_name (file, name);
10633 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10634 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10635 fputs ("@PLT", file);
10636 break;
10638 case LABEL_REF:
10639 x = XEXP (x, 0);
10640 /* FALLTHRU */
10641 case CODE_LABEL:
10642 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10643 assemble_name (asm_out_file, buf);
10644 break;
10646 case CONST_INT:
10647 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10648 break;
10650 case CONST:
10651 /* This used to output parentheses around the expression,
10652 but that does not work on the 386 (either ATT or BSD assembler). */
10653 output_pic_addr_const (file, XEXP (x, 0), code);
10654 break;
10656 case CONST_DOUBLE:
10657 if (GET_MODE (x) == VOIDmode)
10659 /* We can use %d if the number is <32 bits and positive. */
10660 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10661 fprintf (file, "0x%lx%08lx",
10662 (unsigned long) CONST_DOUBLE_HIGH (x),
10663 (unsigned long) CONST_DOUBLE_LOW (x));
10664 else
10665 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10667 else
10668 /* We can't handle floating point constants;
10669 PRINT_OPERAND must handle them. */
10670 output_operand_lossage ("floating constant misused");
10671 break;
10673 case PLUS:
10674 /* Some assemblers need integer constants to appear first. */
10675 if (CONST_INT_P (XEXP (x, 0)))
10677 output_pic_addr_const (file, XEXP (x, 0), code);
10678 putc ('+', file);
10679 output_pic_addr_const (file, XEXP (x, 1), code);
10681 else
10683 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10684 output_pic_addr_const (file, XEXP (x, 1), code);
10685 putc ('+', file);
10686 output_pic_addr_const (file, XEXP (x, 0), code);
10688 break;
10690 case MINUS:
10691 if (!TARGET_MACHO)
10692 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10693 output_pic_addr_const (file, XEXP (x, 0), code);
10694 putc ('-', file);
10695 output_pic_addr_const (file, XEXP (x, 1), code);
10696 if (!TARGET_MACHO)
10697 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10698 break;
10700 case UNSPEC:
10701 gcc_assert (XVECLEN (x, 0) == 1);
10702 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10703 switch (XINT (x, 1))
10705 case UNSPEC_GOT:
10706 fputs ("@GOT", file);
10707 break;
10708 case UNSPEC_GOTOFF:
10709 fputs ("@GOTOFF", file);
10710 break;
10711 case UNSPEC_PLTOFF:
10712 fputs ("@PLTOFF", file);
10713 break;
10714 case UNSPEC_GOTPCREL:
10715 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10716 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10717 break;
10718 case UNSPEC_GOTTPOFF:
10719 /* FIXME: This might be @TPOFF in Sun ld too. */
10720 fputs ("@GOTTPOFF", file);
10721 break;
10722 case UNSPEC_TPOFF:
10723 fputs ("@TPOFF", file);
10724 break;
10725 case UNSPEC_NTPOFF:
10726 if (TARGET_64BIT)
10727 fputs ("@TPOFF", file);
10728 else
10729 fputs ("@NTPOFF", file);
10730 break;
10731 case UNSPEC_DTPOFF:
10732 fputs ("@DTPOFF", file);
10733 break;
10734 case UNSPEC_GOTNTPOFF:
10735 if (TARGET_64BIT)
10736 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10737 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10738 else
10739 fputs ("@GOTNTPOFF", file);
10740 break;
10741 case UNSPEC_INDNTPOFF:
10742 fputs ("@INDNTPOFF", file);
10743 break;
10744 #if TARGET_MACHO
10745 case UNSPEC_MACHOPIC_OFFSET:
10746 putc ('-', file);
10747 machopic_output_function_base_name (file);
10748 break;
10749 #endif
10750 default:
10751 output_operand_lossage ("invalid UNSPEC as operand");
10752 break;
10754 break;
10756 default:
10757 output_operand_lossage ("invalid expression as operand");
10761 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10762 We need to emit DTP-relative relocations. */
10764 static void ATTRIBUTE_UNUSED
10765 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10767 fputs (ASM_LONG, file);
10768 output_addr_const (file, x);
10769 fputs ("@DTPOFF", file);
10770 switch (size)
10772 case 4:
10773 break;
10774 case 8:
10775 fputs (", 0", file);
10776 break;
10777 default:
10778 gcc_unreachable ();
10782 /* Return true if X is a representation of the PIC register. This copes
10783 with calls from ix86_find_base_term, where the register might have
10784 been replaced by a cselib value. */
10786 static bool
10787 ix86_pic_register_p (rtx x)
10789 if (GET_CODE (x) == VALUE)
10790 return (pic_offset_table_rtx
10791 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10792 else
10793 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10796 /* In the name of slightly smaller debug output, and to cater to
10797 general assembler lossage, recognize PIC+GOTOFF and turn it back
10798 into a direct symbol reference.
10800 On Darwin, this is necessary to avoid a crash, because Darwin
10801 has a different PIC label for each routine but the DWARF debugging
10802 information is not associated with any particular routine, so it's
10803 necessary to remove references to the PIC label from RTL stored by
10804 the DWARF output code. */
10806 static rtx
10807 ix86_delegitimize_address (rtx orig_x)
10809 rtx x = orig_x;
10810 /* reg_addend is NULL or a multiple of some register. */
10811 rtx reg_addend = NULL_RTX;
10812 /* const_addend is NULL or a const_int. */
10813 rtx const_addend = NULL_RTX;
10814 /* This is the result, or NULL. */
10815 rtx result = NULL_RTX;
10817 if (MEM_P (x))
10818 x = XEXP (x, 0);
10820 if (TARGET_64BIT)
10822 if (GET_CODE (x) != CONST
10823 || GET_CODE (XEXP (x, 0)) != UNSPEC
10824 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10825 || !MEM_P (orig_x))
10826 return orig_x;
10827 return XVECEXP (XEXP (x, 0), 0, 0);
10830 if (GET_CODE (x) != PLUS
10831 || GET_CODE (XEXP (x, 1)) != CONST)
10832 return orig_x;
10834 if (ix86_pic_register_p (XEXP (x, 0)))
10835 /* %ebx + GOT/GOTOFF */
10837 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10839 /* %ebx + %reg * scale + GOT/GOTOFF */
10840 reg_addend = XEXP (x, 0);
10841 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10842 reg_addend = XEXP (reg_addend, 1);
10843 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10844 reg_addend = XEXP (reg_addend, 0);
10845 else
10846 return orig_x;
10847 if (!REG_P (reg_addend)
10848 && GET_CODE (reg_addend) != MULT
10849 && GET_CODE (reg_addend) != ASHIFT)
10850 return orig_x;
10852 else
10853 return orig_x;
10855 x = XEXP (XEXP (x, 1), 0);
10856 if (GET_CODE (x) == PLUS
10857 && CONST_INT_P (XEXP (x, 1)))
10859 const_addend = XEXP (x, 1);
10860 x = XEXP (x, 0);
10863 if (GET_CODE (x) == UNSPEC
10864 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10865 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10866 result = XVECEXP (x, 0, 0);
10868 if (TARGET_MACHO && darwin_local_data_pic (x)
10869 && !MEM_P (orig_x))
10870 result = XVECEXP (x, 0, 0);
10872 if (! result)
10873 return orig_x;
10875 if (const_addend)
10876 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10877 if (reg_addend)
10878 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10879 return result;
10882 /* If X is a machine specific address (i.e. a symbol or label being
10883 referenced as a displacement from the GOT implemented using an
10884 UNSPEC), then return the base term. Otherwise return X. */
10887 ix86_find_base_term (rtx x)
10889 rtx term;
10891 if (TARGET_64BIT)
10893 if (GET_CODE (x) != CONST)
10894 return x;
10895 term = XEXP (x, 0);
10896 if (GET_CODE (term) == PLUS
10897 && (CONST_INT_P (XEXP (term, 1))
10898 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10899 term = XEXP (term, 0);
10900 if (GET_CODE (term) != UNSPEC
10901 || XINT (term, 1) != UNSPEC_GOTPCREL)
10902 return x;
10904 return XVECEXP (term, 0, 0);
10907 return ix86_delegitimize_address (x);
10910 static void
10911 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10912 int fp, FILE *file)
10914 const char *suffix;
10916 if (mode == CCFPmode || mode == CCFPUmode)
10918 code = ix86_fp_compare_code_to_integer (code);
10919 mode = CCmode;
10921 if (reverse)
10922 code = reverse_condition (code);
10924 switch (code)
10926 case EQ:
10927 switch (mode)
10929 case CCAmode:
10930 suffix = "a";
10931 break;
10933 case CCCmode:
10934 suffix = "c";
10935 break;
10937 case CCOmode:
10938 suffix = "o";
10939 break;
10941 case CCSmode:
10942 suffix = "s";
10943 break;
10945 default:
10946 suffix = "e";
10948 break;
10949 case NE:
10950 switch (mode)
10952 case CCAmode:
10953 suffix = "na";
10954 break;
10956 case CCCmode:
10957 suffix = "nc";
10958 break;
10960 case CCOmode:
10961 suffix = "no";
10962 break;
10964 case CCSmode:
10965 suffix = "ns";
10966 break;
10968 default:
10969 suffix = "ne";
10971 break;
10972 case GT:
10973 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10974 suffix = "g";
10975 break;
10976 case GTU:
10977 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10978 Those same assemblers have the same but opposite lossage on cmov. */
10979 if (mode == CCmode)
10980 suffix = fp ? "nbe" : "a";
10981 else if (mode == CCCmode)
10982 suffix = "b";
10983 else
10984 gcc_unreachable ();
10985 break;
10986 case LT:
10987 switch (mode)
10989 case CCNOmode:
10990 case CCGOCmode:
10991 suffix = "s";
10992 break;
10994 case CCmode:
10995 case CCGCmode:
10996 suffix = "l";
10997 break;
10999 default:
11000 gcc_unreachable ();
11002 break;
11003 case LTU:
11004 gcc_assert (mode == CCmode || mode == CCCmode);
11005 suffix = "b";
11006 break;
11007 case GE:
11008 switch (mode)
11010 case CCNOmode:
11011 case CCGOCmode:
11012 suffix = "ns";
11013 break;
11015 case CCmode:
11016 case CCGCmode:
11017 suffix = "ge";
11018 break;
11020 default:
11021 gcc_unreachable ();
11023 break;
11024 case GEU:
11025 /* ??? As above. */
11026 gcc_assert (mode == CCmode || mode == CCCmode);
11027 suffix = fp ? "nb" : "ae";
11028 break;
11029 case LE:
11030 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11031 suffix = "le";
11032 break;
11033 case LEU:
11034 /* ??? As above. */
11035 if (mode == CCmode)
11036 suffix = "be";
11037 else if (mode == CCCmode)
11038 suffix = fp ? "nb" : "ae";
11039 else
11040 gcc_unreachable ();
11041 break;
11042 case UNORDERED:
11043 suffix = fp ? "u" : "p";
11044 break;
11045 case ORDERED:
11046 suffix = fp ? "nu" : "np";
11047 break;
11048 default:
11049 gcc_unreachable ();
11051 fputs (suffix, file);
11054 /* Print the name of register X to FILE based on its machine mode and number.
11055 If CODE is 'w', pretend the mode is HImode.
11056 If CODE is 'b', pretend the mode is QImode.
11057 If CODE is 'k', pretend the mode is SImode.
11058 If CODE is 'q', pretend the mode is DImode.
11059 If CODE is 'x', pretend the mode is V4SFmode.
11060 If CODE is 't', pretend the mode is V8SFmode.
11061 If CODE is 'h', pretend the reg is the 'high' byte register.
11062 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
11063 If CODE is 'd', duplicate the operand for AVX instruction.
11066 void
11067 print_reg (rtx x, int code, FILE *file)
11069 const char *reg;
11070 bool duplicated = code == 'd' && TARGET_AVX;
11072 gcc_assert (x == pc_rtx
11073 || (REGNO (x) != ARG_POINTER_REGNUM
11074 && REGNO (x) != FRAME_POINTER_REGNUM
11075 && REGNO (x) != FLAGS_REG
11076 && REGNO (x) != FPSR_REG
11077 && REGNO (x) != FPCR_REG));
11079 if (ASSEMBLER_DIALECT == ASM_ATT)
11080 putc ('%', file);
11082 if (x == pc_rtx)
11084 gcc_assert (TARGET_64BIT);
11085 fputs ("rip", file);
11086 return;
11089 if (code == 'w' || MMX_REG_P (x))
11090 code = 2;
11091 else if (code == 'b')
11092 code = 1;
11093 else if (code == 'k')
11094 code = 4;
11095 else if (code == 'q')
11096 code = 8;
11097 else if (code == 'y')
11098 code = 3;
11099 else if (code == 'h')
11100 code = 0;
11101 else if (code == 'x')
11102 code = 16;
11103 else if (code == 't')
11104 code = 32;
11105 else
11106 code = GET_MODE_SIZE (GET_MODE (x));
11108 /* Irritatingly, AMD extended registers use different naming convention
11109 from the normal registers. */
11110 if (REX_INT_REG_P (x))
11112 gcc_assert (TARGET_64BIT);
11113 switch (code)
11115 case 0:
11116 error ("extended registers have no high halves");
11117 break;
11118 case 1:
11119 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
11120 break;
11121 case 2:
11122 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
11123 break;
11124 case 4:
11125 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
11126 break;
11127 case 8:
11128 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
11129 break;
11130 default:
11131 error ("unsupported operand size for extended register");
11132 break;
11134 return;
11137 reg = NULL;
11138 switch (code)
11140 case 3:
11141 if (STACK_TOP_P (x))
11143 reg = "st(0)";
11144 break;
11146 /* FALLTHRU */
11147 case 8:
11148 case 4:
11149 case 12:
11150 if (! ANY_FP_REG_P (x))
11151 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
11152 /* FALLTHRU */
11153 case 16:
11154 case 2:
11155 normal:
11156 reg = hi_reg_name[REGNO (x)];
11157 break;
11158 case 1:
11159 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
11160 goto normal;
11161 reg = qi_reg_name[REGNO (x)];
11162 break;
11163 case 0:
11164 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
11165 goto normal;
11166 reg = qi_high_reg_name[REGNO (x)];
11167 break;
11168 case 32:
11169 if (SSE_REG_P (x))
11171 gcc_assert (!duplicated);
11172 putc ('y', file);
11173 fputs (hi_reg_name[REGNO (x)] + 1, file);
11174 return;
11176 break;
11177 default:
11178 gcc_unreachable ();
11181 fputs (reg, file);
11182 if (duplicated)
11184 if (ASSEMBLER_DIALECT == ASM_ATT)
11185 fprintf (file, ", %%%s", reg);
11186 else
11187 fprintf (file, ", %s", reg);
11191 /* Locate some local-dynamic symbol still in use by this function
11192 so that we can print its name in some tls_local_dynamic_base
11193 pattern. */
11195 static int
11196 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11198 rtx x = *px;
11200 if (GET_CODE (x) == SYMBOL_REF
11201 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11203 cfun->machine->some_ld_name = XSTR (x, 0);
11204 return 1;
11207 return 0;
11210 static const char *
11211 get_some_local_dynamic_name (void)
11213 rtx insn;
11215 if (cfun->machine->some_ld_name)
11216 return cfun->machine->some_ld_name;
11218 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11219 if (INSN_P (insn)
11220 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11221 return cfun->machine->some_ld_name;
11223 gcc_unreachable ();
11226 /* Meaning of CODE:
11227 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11228 C -- print opcode suffix for set/cmov insn.
11229 c -- like C, but print reversed condition
11230 E,e -- likewise, but for compare-and-branch fused insn.
11231 F,f -- likewise, but for floating-point.
11232 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11233 otherwise nothing
11234 R -- print the prefix for register names.
11235 z -- print the opcode suffix for the size of the current operand.
11236 Z -- likewise, with special suffixes for x87 instructions.
11237 * -- print a star (in certain assembler syntax)
11238 A -- print an absolute memory reference.
11239 w -- print the operand as if it's a "word" (HImode) even if it isn't.
11240 s -- print a shift double count, followed by the assemblers argument
11241 delimiter.
11242 b -- print the QImode name of the register for the indicated operand.
11243 %b0 would print %al if operands[0] is reg 0.
11244 w -- likewise, print the HImode name of the register.
11245 k -- likewise, print the SImode name of the register.
11246 q -- likewise, print the DImode name of the register.
11247 x -- likewise, print the V4SFmode name of the register.
11248 t -- likewise, print the V8SFmode name of the register.
11249 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11250 y -- print "st(0)" instead of "st" as a register.
11251 d -- print duplicated register operand for AVX instruction.
11252 D -- print condition for SSE cmp instruction.
11253 P -- if PIC, print an @PLT suffix.
11254 X -- don't print any sort of PIC '@' suffix for a symbol.
11255 & -- print some in-use local-dynamic symbol name.
11256 H -- print a memory address offset by 8; used for sse high-parts
11257 Y -- print condition for SSE5 com* instruction.
11258 + -- print a branch hint as 'cs' or 'ds' prefix
11259 ; -- print a semicolon (after prefixes due to bug in older gas).
11262 void
11263 print_operand (FILE *file, rtx x, int code)
11265 if (code)
11267 switch (code)
11269 case '*':
11270 if (ASSEMBLER_DIALECT == ASM_ATT)
11271 putc ('*', file);
11272 return;
11274 case '&':
11275 assemble_name (file, get_some_local_dynamic_name ());
11276 return;
11278 case 'A':
11279 switch (ASSEMBLER_DIALECT)
11281 case ASM_ATT:
11282 putc ('*', file);
11283 break;
11285 case ASM_INTEL:
11286 /* Intel syntax. For absolute addresses, registers should not
11287 be surrounded by braces. */
11288 if (!REG_P (x))
11290 putc ('[', file);
11291 PRINT_OPERAND (file, x, 0);
11292 putc (']', file);
11293 return;
11295 break;
11297 default:
11298 gcc_unreachable ();
11301 PRINT_OPERAND (file, x, 0);
11302 return;
11305 case 'L':
11306 if (ASSEMBLER_DIALECT == ASM_ATT)
11307 putc ('l', file);
11308 return;
11310 case 'W':
11311 if (ASSEMBLER_DIALECT == ASM_ATT)
11312 putc ('w', file);
11313 return;
11315 case 'B':
11316 if (ASSEMBLER_DIALECT == ASM_ATT)
11317 putc ('b', file);
11318 return;
11320 case 'Q':
11321 if (ASSEMBLER_DIALECT == ASM_ATT)
11322 putc ('l', file);
11323 return;
11325 case 'S':
11326 if (ASSEMBLER_DIALECT == ASM_ATT)
11327 putc ('s', file);
11328 return;
11330 case 'T':
11331 if (ASSEMBLER_DIALECT == ASM_ATT)
11332 putc ('t', file);
11333 return;
11335 case 'z':
11336 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11338 /* Opcodes don't get size suffixes if using Intel opcodes. */
11339 if (ASSEMBLER_DIALECT == ASM_INTEL)
11340 return;
11342 switch (GET_MODE_SIZE (GET_MODE (x)))
11344 case 1:
11345 putc ('b', file);
11346 return;
11348 case 2:
11349 putc ('w', file);
11350 return;
11352 case 4:
11353 putc ('l', file);
11354 return;
11356 case 8:
11357 putc ('q', file);
11358 return;
11360 default:
11361 output_operand_lossage
11362 ("invalid operand size for operand code '%c'", code);
11363 return;
11367 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11368 warning
11369 (0, "non-integer operand used with operand code '%c'", code);
11370 /* FALLTHRU */
11372 case 'Z':
11373 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11374 if (ASSEMBLER_DIALECT == ASM_INTEL)
11375 return;
11377 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11379 switch (GET_MODE_SIZE (GET_MODE (x)))
11381 case 2:
11382 #ifdef HAVE_AS_IX86_FILDS
11383 putc ('s', file);
11384 #endif
11385 return;
11387 case 4:
11388 putc ('l', file);
11389 return;
11391 case 8:
11392 #ifdef HAVE_AS_IX86_FILDQ
11393 putc ('q', file);
11394 #else
11395 fputs ("ll", file);
11396 #endif
11397 return;
11399 default:
11400 break;
11403 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11405 /* 387 opcodes don't get size suffixes
11406 if the operands are registers. */
11407 if (STACK_REG_P (x))
11408 return;
11410 switch (GET_MODE_SIZE (GET_MODE (x)))
11412 case 4:
11413 putc ('s', file);
11414 return;
11416 case 8:
11417 putc ('l', file);
11418 return;
11420 case 12:
11421 case 16:
11422 putc ('t', file);
11423 return;
11425 default:
11426 break;
11429 else
11431 output_operand_lossage
11432 ("invalid operand type used with operand code '%c'", code);
11433 return;
11436 output_operand_lossage
11437 ("invalid operand size for operand code '%c'", code);
11438 return;
11440 case 'd':
11441 case 'b':
11442 case 'w':
11443 case 'k':
11444 case 'q':
11445 case 'h':
11446 case 't':
11447 case 'y':
11448 case 'x':
11449 case 'X':
11450 case 'P':
11451 break;
11453 case 's':
11454 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11456 PRINT_OPERAND (file, x, 0);
11457 fputs (", ", file);
11459 return;
11461 case 'D':
11462 /* Little bit of braindamage here. The SSE compare instructions
11463 does use completely different names for the comparisons that the
11464 fp conditional moves. */
11465 if (TARGET_AVX)
11467 switch (GET_CODE (x))
11469 case EQ:
11470 fputs ("eq", file);
11471 break;
11472 case UNEQ:
11473 fputs ("eq_us", file);
11474 break;
11475 case LT:
11476 fputs ("lt", file);
11477 break;
11478 case UNLT:
11479 fputs ("nge", file);
11480 break;
11481 case LE:
11482 fputs ("le", file);
11483 break;
11484 case UNLE:
11485 fputs ("ngt", file);
11486 break;
11487 case UNORDERED:
11488 fputs ("unord", file);
11489 break;
11490 case NE:
11491 fputs ("neq", file);
11492 break;
11493 case LTGT:
11494 fputs ("neq_oq", file);
11495 break;
11496 case GE:
11497 fputs ("ge", file);
11498 break;
11499 case UNGE:
11500 fputs ("nlt", file);
11501 break;
11502 case GT:
11503 fputs ("gt", file);
11504 break;
11505 case UNGT:
11506 fputs ("nle", file);
11507 break;
11508 case ORDERED:
11509 fputs ("ord", file);
11510 break;
11511 default:
11512 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11513 return;
11516 else
11518 switch (GET_CODE (x))
11520 case EQ:
11521 case UNEQ:
11522 fputs ("eq", file);
11523 break;
11524 case LT:
11525 case UNLT:
11526 fputs ("lt", file);
11527 break;
11528 case LE:
11529 case UNLE:
11530 fputs ("le", file);
11531 break;
11532 case UNORDERED:
11533 fputs ("unord", file);
11534 break;
11535 case NE:
11536 case LTGT:
11537 fputs ("neq", file);
11538 break;
11539 case UNGE:
11540 case GE:
11541 fputs ("nlt", file);
11542 break;
11543 case UNGT:
11544 case GT:
11545 fputs ("nle", file);
11546 break;
11547 case ORDERED:
11548 fputs ("ord", file);
11549 break;
11550 default:
11551 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11552 return;
11555 return;
11556 case 'O':
11557 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11558 if (ASSEMBLER_DIALECT == ASM_ATT)
11560 switch (GET_MODE (x))
11562 case HImode: putc ('w', file); break;
11563 case SImode:
11564 case SFmode: putc ('l', file); break;
11565 case DImode:
11566 case DFmode: putc ('q', file); break;
11567 default: gcc_unreachable ();
11569 putc ('.', file);
11571 #endif
11572 return;
11573 case 'C':
11574 if (!COMPARISON_P (x))
11576 output_operand_lossage ("operand is neither a constant nor a "
11577 "condition code, invalid operand code "
11578 "'C'");
11579 return;
11581 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11582 return;
11583 case 'F':
11584 if (!COMPARISON_P (x))
11586 output_operand_lossage ("operand is neither a constant nor a "
11587 "condition code, invalid operand code "
11588 "'F'");
11589 return;
11591 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11592 if (ASSEMBLER_DIALECT == ASM_ATT)
11593 putc ('.', file);
11594 #endif
11595 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11596 return;
11598 /* Like above, but reverse condition */
11599 case 'c':
11600 /* Check to see if argument to %c is really a constant
11601 and not a condition code which needs to be reversed. */
11602 if (!COMPARISON_P (x))
11604 output_operand_lossage ("operand is neither a constant nor a "
11605 "condition code, invalid operand "
11606 "code 'c'");
11607 return;
11609 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11610 return;
11611 case 'f':
11612 if (!COMPARISON_P (x))
11614 output_operand_lossage ("operand is neither a constant nor a "
11615 "condition code, invalid operand "
11616 "code 'f'");
11617 return;
11619 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11620 if (ASSEMBLER_DIALECT == ASM_ATT)
11621 putc ('.', file);
11622 #endif
11623 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11624 return;
11626 case 'E':
11627 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11628 return;
11630 case 'e':
11631 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11632 return;
11634 case 'H':
11635 /* It doesn't actually matter what mode we use here, as we're
11636 only going to use this for printing. */
11637 x = adjust_address_nv (x, DImode, 8);
11638 break;
11640 case '+':
11642 rtx x;
11644 if (!optimize
11645 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11646 return;
11648 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11649 if (x)
11651 int pred_val = INTVAL (XEXP (x, 0));
11653 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11654 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11656 int taken = pred_val > REG_BR_PROB_BASE / 2;
11657 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11659 /* Emit hints only in the case default branch prediction
11660 heuristics would fail. */
11661 if (taken != cputaken)
11663 /* We use 3e (DS) prefix for taken branches and
11664 2e (CS) prefix for not taken branches. */
11665 if (taken)
11666 fputs ("ds ; ", file);
11667 else
11668 fputs ("cs ; ", file);
11672 return;
11675 case 'Y':
11676 switch (GET_CODE (x))
11678 case NE:
11679 fputs ("neq", file);
11680 break;
11681 case EQ:
11682 fputs ("eq", file);
11683 break;
11684 case GE:
11685 case GEU:
11686 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11687 break;
11688 case GT:
11689 case GTU:
11690 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11691 break;
11692 case LE:
11693 case LEU:
11694 fputs ("le", file);
11695 break;
11696 case LT:
11697 case LTU:
11698 fputs ("lt", file);
11699 break;
11700 case UNORDERED:
11701 fputs ("unord", file);
11702 break;
11703 case ORDERED:
11704 fputs ("ord", file);
11705 break;
11706 case UNEQ:
11707 fputs ("ueq", file);
11708 break;
11709 case UNGE:
11710 fputs ("nlt", file);
11711 break;
11712 case UNGT:
11713 fputs ("nle", file);
11714 break;
11715 case UNLE:
11716 fputs ("ule", file);
11717 break;
11718 case UNLT:
11719 fputs ("ult", file);
11720 break;
11721 case LTGT:
11722 fputs ("une", file);
11723 break;
11724 default:
11725 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11726 return;
11728 return;
11730 case ';':
11731 #if TARGET_MACHO
11732 fputs (" ; ", file);
11733 #else
11734 fputc (' ', file);
11735 #endif
11736 return;
11738 default:
11739 output_operand_lossage ("invalid operand code '%c'", code);
11743 if (REG_P (x))
11744 print_reg (x, code, file);
11746 else if (MEM_P (x))
11748 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11749 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11750 && GET_MODE (x) != BLKmode)
11752 const char * size;
11753 switch (GET_MODE_SIZE (GET_MODE (x)))
11755 case 1: size = "BYTE"; break;
11756 case 2: size = "WORD"; break;
11757 case 4: size = "DWORD"; break;
11758 case 8: size = "QWORD"; break;
11759 case 12: size = "XWORD"; break;
11760 case 16:
11761 if (GET_MODE (x) == XFmode)
11762 size = "XWORD";
11763 else
11764 size = "XMMWORD";
11765 break;
11766 default:
11767 gcc_unreachable ();
11770 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11771 if (code == 'b')
11772 size = "BYTE";
11773 else if (code == 'w')
11774 size = "WORD";
11775 else if (code == 'k')
11776 size = "DWORD";
11778 fputs (size, file);
11779 fputs (" PTR ", file);
11782 x = XEXP (x, 0);
11783 /* Avoid (%rip) for call operands. */
11784 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11785 && !CONST_INT_P (x))
11786 output_addr_const (file, x);
11787 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11788 output_operand_lossage ("invalid constraints for operand");
11789 else
11790 output_address (x);
11793 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11795 REAL_VALUE_TYPE r;
11796 long l;
11798 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11799 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11801 if (ASSEMBLER_DIALECT == ASM_ATT)
11802 putc ('$', file);
11803 fprintf (file, "0x%08lx", (long unsigned int) l);
11806 /* These float cases don't actually occur as immediate operands. */
11807 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11809 char dstr[30];
11811 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11812 fprintf (file, "%s", dstr);
11815 else if (GET_CODE (x) == CONST_DOUBLE
11816 && GET_MODE (x) == XFmode)
11818 char dstr[30];
11820 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11821 fprintf (file, "%s", dstr);
11824 else
11826 /* We have patterns that allow zero sets of memory, for instance.
11827 In 64-bit mode, we should probably support all 8-byte vectors,
11828 since we can in fact encode that into an immediate. */
11829 if (GET_CODE (x) == CONST_VECTOR)
11831 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11832 x = const0_rtx;
11835 if (code != 'P')
11837 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11839 if (ASSEMBLER_DIALECT == ASM_ATT)
11840 putc ('$', file);
11842 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11843 || GET_CODE (x) == LABEL_REF)
11845 if (ASSEMBLER_DIALECT == ASM_ATT)
11846 putc ('$', file);
11847 else
11848 fputs ("OFFSET FLAT:", file);
11851 if (CONST_INT_P (x))
11852 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11853 else if (flag_pic)
11854 output_pic_addr_const (file, x, code);
11855 else
11856 output_addr_const (file, x);
11860 /* Print a memory operand whose address is ADDR. */
11862 void
11863 print_operand_address (FILE *file, rtx addr)
11865 struct ix86_address parts;
11866 rtx base, index, disp;
11867 int scale;
11868 int ok = ix86_decompose_address (addr, &parts);
11870 gcc_assert (ok);
11872 base = parts.base;
11873 index = parts.index;
11874 disp = parts.disp;
11875 scale = parts.scale;
11877 switch (parts.seg)
11879 case SEG_DEFAULT:
11880 break;
11881 case SEG_FS:
11882 case SEG_GS:
11883 if (ASSEMBLER_DIALECT == ASM_ATT)
11884 putc ('%', file);
11885 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11886 break;
11887 default:
11888 gcc_unreachable ();
11891 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11892 if (TARGET_64BIT && !base && !index)
11894 rtx symbol = disp;
11896 if (GET_CODE (disp) == CONST
11897 && GET_CODE (XEXP (disp, 0)) == PLUS
11898 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11899 symbol = XEXP (XEXP (disp, 0), 0);
11901 if (GET_CODE (symbol) == LABEL_REF
11902 || (GET_CODE (symbol) == SYMBOL_REF
11903 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11904 base = pc_rtx;
11906 if (!base && !index)
11908 /* Displacement only requires special attention. */
11910 if (CONST_INT_P (disp))
11912 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11913 fputs ("ds:", file);
11914 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11916 else if (flag_pic)
11917 output_pic_addr_const (file, disp, 0);
11918 else
11919 output_addr_const (file, disp);
11921 else
11923 if (ASSEMBLER_DIALECT == ASM_ATT)
11925 if (disp)
11927 if (flag_pic)
11928 output_pic_addr_const (file, disp, 0);
11929 else if (GET_CODE (disp) == LABEL_REF)
11930 output_asm_label (disp);
11931 else
11932 output_addr_const (file, disp);
11935 putc ('(', file);
11936 if (base)
11937 print_reg (base, 0, file);
11938 if (index)
11940 putc (',', file);
11941 print_reg (index, 0, file);
11942 if (scale != 1)
11943 fprintf (file, ",%d", scale);
11945 putc (')', file);
11947 else
11949 rtx offset = NULL_RTX;
11951 if (disp)
11953 /* Pull out the offset of a symbol; print any symbol itself. */
11954 if (GET_CODE (disp) == CONST
11955 && GET_CODE (XEXP (disp, 0)) == PLUS
11956 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11958 offset = XEXP (XEXP (disp, 0), 1);
11959 disp = gen_rtx_CONST (VOIDmode,
11960 XEXP (XEXP (disp, 0), 0));
11963 if (flag_pic)
11964 output_pic_addr_const (file, disp, 0);
11965 else if (GET_CODE (disp) == LABEL_REF)
11966 output_asm_label (disp);
11967 else if (CONST_INT_P (disp))
11968 offset = disp;
11969 else
11970 output_addr_const (file, disp);
11973 putc ('[', file);
11974 if (base)
11976 print_reg (base, 0, file);
11977 if (offset)
11979 if (INTVAL (offset) >= 0)
11980 putc ('+', file);
11981 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11984 else if (offset)
11985 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11986 else
11987 putc ('0', file);
11989 if (index)
11991 putc ('+', file);
11992 print_reg (index, 0, file);
11993 if (scale != 1)
11994 fprintf (file, "*%d", scale);
11996 putc (']', file);
12001 bool
12002 output_addr_const_extra (FILE *file, rtx x)
12004 rtx op;
12006 if (GET_CODE (x) != UNSPEC)
12007 return false;
12009 op = XVECEXP (x, 0, 0);
12010 switch (XINT (x, 1))
12012 case UNSPEC_GOTTPOFF:
12013 output_addr_const (file, op);
12014 /* FIXME: This might be @TPOFF in Sun ld. */
12015 fputs ("@GOTTPOFF", file);
12016 break;
12017 case UNSPEC_TPOFF:
12018 output_addr_const (file, op);
12019 fputs ("@TPOFF", file);
12020 break;
12021 case UNSPEC_NTPOFF:
12022 output_addr_const (file, op);
12023 if (TARGET_64BIT)
12024 fputs ("@TPOFF", file);
12025 else
12026 fputs ("@NTPOFF", file);
12027 break;
12028 case UNSPEC_DTPOFF:
12029 output_addr_const (file, op);
12030 fputs ("@DTPOFF", file);
12031 break;
12032 case UNSPEC_GOTNTPOFF:
12033 output_addr_const (file, op);
12034 if (TARGET_64BIT)
12035 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12036 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
12037 else
12038 fputs ("@GOTNTPOFF", file);
12039 break;
12040 case UNSPEC_INDNTPOFF:
12041 output_addr_const (file, op);
12042 fputs ("@INDNTPOFF", file);
12043 break;
12044 #if TARGET_MACHO
12045 case UNSPEC_MACHOPIC_OFFSET:
12046 output_addr_const (file, op);
12047 putc ('-', file);
12048 machopic_output_function_base_name (file);
12049 break;
12050 #endif
12052 default:
12053 return false;
12056 return true;
12059 /* Split one or more DImode RTL references into pairs of SImode
12060 references. The RTL can be REG, offsettable MEM, integer constant, or
12061 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12062 split and "num" is its length. lo_half and hi_half are output arrays
12063 that parallel "operands". */
12065 void
12066 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12068 while (num--)
12070 rtx op = operands[num];
12072 /* simplify_subreg refuse to split volatile memory addresses,
12073 but we still have to handle it. */
12074 if (MEM_P (op))
12076 lo_half[num] = adjust_address (op, SImode, 0);
12077 hi_half[num] = adjust_address (op, SImode, 4);
12079 else
12081 lo_half[num] = simplify_gen_subreg (SImode, op,
12082 GET_MODE (op) == VOIDmode
12083 ? DImode : GET_MODE (op), 0);
12084 hi_half[num] = simplify_gen_subreg (SImode, op,
12085 GET_MODE (op) == VOIDmode
12086 ? DImode : GET_MODE (op), 4);
12090 /* Split one or more TImode RTL references into pairs of DImode
12091 references. The RTL can be REG, offsettable MEM, integer constant, or
12092 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12093 split and "num" is its length. lo_half and hi_half are output arrays
12094 that parallel "operands". */
12096 void
12097 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12099 while (num--)
12101 rtx op = operands[num];
12103 /* simplify_subreg refuse to split volatile memory addresses, but we
12104 still have to handle it. */
12105 if (MEM_P (op))
12107 lo_half[num] = adjust_address (op, DImode, 0);
12108 hi_half[num] = adjust_address (op, DImode, 8);
12110 else
12112 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
12113 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
12118 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
12119 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
12120 is the expression of the binary operation. The output may either be
12121 emitted here, or returned to the caller, like all output_* functions.
12123 There is no guarantee that the operands are the same mode, as they
12124 might be within FLOAT or FLOAT_EXTEND expressions. */
12126 #ifndef SYSV386_COMPAT
12127 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
12128 wants to fix the assemblers because that causes incompatibility
12129 with gcc. No-one wants to fix gcc because that causes
12130 incompatibility with assemblers... You can use the option of
12131 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
12132 #define SYSV386_COMPAT 1
12133 #endif
12135 const char *
12136 output_387_binary_op (rtx insn, rtx *operands)
12138 static char buf[40];
12139 const char *p;
12140 const char *ssep;
12141 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
12143 #ifdef ENABLE_CHECKING
12144 /* Even if we do not want to check the inputs, this documents input
12145 constraints. Which helps in understanding the following code. */
12146 if (STACK_REG_P (operands[0])
12147 && ((REG_P (operands[1])
12148 && REGNO (operands[0]) == REGNO (operands[1])
12149 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
12150 || (REG_P (operands[2])
12151 && REGNO (operands[0]) == REGNO (operands[2])
12152 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
12153 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
12154 ; /* ok */
12155 else
12156 gcc_assert (is_sse);
12157 #endif
12159 switch (GET_CODE (operands[3]))
12161 case PLUS:
12162 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12163 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12164 p = "fiadd";
12165 else
12166 p = "fadd";
12167 ssep = "vadd";
12168 break;
12170 case MINUS:
12171 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12172 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12173 p = "fisub";
12174 else
12175 p = "fsub";
12176 ssep = "vsub";
12177 break;
12179 case MULT:
12180 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12181 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12182 p = "fimul";
12183 else
12184 p = "fmul";
12185 ssep = "vmul";
12186 break;
12188 case DIV:
12189 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12190 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12191 p = "fidiv";
12192 else
12193 p = "fdiv";
12194 ssep = "vdiv";
12195 break;
12197 default:
12198 gcc_unreachable ();
12201 if (is_sse)
12203 if (TARGET_AVX)
12205 strcpy (buf, ssep);
12206 if (GET_MODE (operands[0]) == SFmode)
12207 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12208 else
12209 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12211 else
12213 strcpy (buf, ssep + 1);
12214 if (GET_MODE (operands[0]) == SFmode)
12215 strcat (buf, "ss\t{%2, %0|%0, %2}");
12216 else
12217 strcat (buf, "sd\t{%2, %0|%0, %2}");
12219 return buf;
12221 strcpy (buf, p);
12223 switch (GET_CODE (operands[3]))
12225 case MULT:
12226 case PLUS:
12227 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12229 rtx temp = operands[2];
12230 operands[2] = operands[1];
12231 operands[1] = temp;
12234 /* know operands[0] == operands[1]. */
12236 if (MEM_P (operands[2]))
12238 p = "%Z2\t%2";
12239 break;
12242 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12244 if (STACK_TOP_P (operands[0]))
12245 /* How is it that we are storing to a dead operand[2]?
12246 Well, presumably operands[1] is dead too. We can't
12247 store the result to st(0) as st(0) gets popped on this
12248 instruction. Instead store to operands[2] (which I
12249 think has to be st(1)). st(1) will be popped later.
12250 gcc <= 2.8.1 didn't have this check and generated
12251 assembly code that the Unixware assembler rejected. */
12252 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12253 else
12254 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12255 break;
12258 if (STACK_TOP_P (operands[0]))
12259 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12260 else
12261 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12262 break;
12264 case MINUS:
12265 case DIV:
12266 if (MEM_P (operands[1]))
12268 p = "r%Z1\t%1";
12269 break;
12272 if (MEM_P (operands[2]))
12274 p = "%Z2\t%2";
12275 break;
12278 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12280 #if SYSV386_COMPAT
12281 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12282 derived assemblers, confusingly reverse the direction of
12283 the operation for fsub{r} and fdiv{r} when the
12284 destination register is not st(0). The Intel assembler
12285 doesn't have this brain damage. Read !SYSV386_COMPAT to
12286 figure out what the hardware really does. */
12287 if (STACK_TOP_P (operands[0]))
12288 p = "{p\t%0, %2|rp\t%2, %0}";
12289 else
12290 p = "{rp\t%2, %0|p\t%0, %2}";
12291 #else
12292 if (STACK_TOP_P (operands[0]))
12293 /* As above for fmul/fadd, we can't store to st(0). */
12294 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12295 else
12296 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12297 #endif
12298 break;
12301 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12303 #if SYSV386_COMPAT
12304 if (STACK_TOP_P (operands[0]))
12305 p = "{rp\t%0, %1|p\t%1, %0}";
12306 else
12307 p = "{p\t%1, %0|rp\t%0, %1}";
12308 #else
12309 if (STACK_TOP_P (operands[0]))
12310 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
12311 else
12312 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
12313 #endif
12314 break;
12317 if (STACK_TOP_P (operands[0]))
12319 if (STACK_TOP_P (operands[1]))
12320 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12321 else
12322 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12323 break;
12325 else if (STACK_TOP_P (operands[1]))
12327 #if SYSV386_COMPAT
12328 p = "{\t%1, %0|r\t%0, %1}";
12329 #else
12330 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
12331 #endif
12333 else
12335 #if SYSV386_COMPAT
12336 p = "{r\t%2, %0|\t%0, %2}";
12337 #else
12338 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12339 #endif
12341 break;
12343 default:
12344 gcc_unreachable ();
12347 strcat (buf, p);
12348 return buf;
12351 /* Return needed mode for entity in optimize_mode_switching pass. */
12354 ix86_mode_needed (int entity, rtx insn)
12356 enum attr_i387_cw mode;
12358 /* The mode UNINITIALIZED is used to store control word after a
12359 function call or ASM pattern. The mode ANY specify that function
12360 has no requirements on the control word and make no changes in the
12361 bits we are interested in. */
12363 if (CALL_P (insn)
12364 || (NONJUMP_INSN_P (insn)
12365 && (asm_noperands (PATTERN (insn)) >= 0
12366 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12367 return I387_CW_UNINITIALIZED;
12369 if (recog_memoized (insn) < 0)
12370 return I387_CW_ANY;
12372 mode = get_attr_i387_cw (insn);
12374 switch (entity)
12376 case I387_TRUNC:
12377 if (mode == I387_CW_TRUNC)
12378 return mode;
12379 break;
12381 case I387_FLOOR:
12382 if (mode == I387_CW_FLOOR)
12383 return mode;
12384 break;
12386 case I387_CEIL:
12387 if (mode == I387_CW_CEIL)
12388 return mode;
12389 break;
12391 case I387_MASK_PM:
12392 if (mode == I387_CW_MASK_PM)
12393 return mode;
12394 break;
12396 default:
12397 gcc_unreachable ();
12400 return I387_CW_ANY;
12403 /* Output code to initialize control word copies used by trunc?f?i and
12404 rounding patterns. CURRENT_MODE is set to current control word,
12405 while NEW_MODE is set to new control word. */
12407 void
12408 emit_i387_cw_initialization (int mode)
12410 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12411 rtx new_mode;
12413 enum ix86_stack_slot slot;
12415 rtx reg = gen_reg_rtx (HImode);
12417 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12418 emit_move_insn (reg, copy_rtx (stored_mode));
12420 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12421 || optimize_function_for_size_p (cfun))
12423 switch (mode)
12425 case I387_CW_TRUNC:
12426 /* round toward zero (truncate) */
12427 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12428 slot = SLOT_CW_TRUNC;
12429 break;
12431 case I387_CW_FLOOR:
12432 /* round down toward -oo */
12433 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12434 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12435 slot = SLOT_CW_FLOOR;
12436 break;
12438 case I387_CW_CEIL:
12439 /* round up toward +oo */
12440 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12441 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12442 slot = SLOT_CW_CEIL;
12443 break;
12445 case I387_CW_MASK_PM:
12446 /* mask precision exception for nearbyint() */
12447 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12448 slot = SLOT_CW_MASK_PM;
12449 break;
12451 default:
12452 gcc_unreachable ();
12455 else
12457 switch (mode)
12459 case I387_CW_TRUNC:
12460 /* round toward zero (truncate) */
12461 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12462 slot = SLOT_CW_TRUNC;
12463 break;
12465 case I387_CW_FLOOR:
12466 /* round down toward -oo */
12467 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12468 slot = SLOT_CW_FLOOR;
12469 break;
12471 case I387_CW_CEIL:
12472 /* round up toward +oo */
12473 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12474 slot = SLOT_CW_CEIL;
12475 break;
12477 case I387_CW_MASK_PM:
12478 /* mask precision exception for nearbyint() */
12479 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12480 slot = SLOT_CW_MASK_PM;
12481 break;
12483 default:
12484 gcc_unreachable ();
12488 gcc_assert (slot < MAX_386_STACK_LOCALS);
12490 new_mode = assign_386_stack_local (HImode, slot);
12491 emit_move_insn (new_mode, reg);
12494 /* Output code for INSN to convert a float to a signed int. OPERANDS
12495 are the insn operands. The output may be [HSD]Imode and the input
12496 operand may be [SDX]Fmode. */
12498 const char *
12499 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12501 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12502 int dimode_p = GET_MODE (operands[0]) == DImode;
12503 int round_mode = get_attr_i387_cw (insn);
12505 /* Jump through a hoop or two for DImode, since the hardware has no
12506 non-popping instruction. We used to do this a different way, but
12507 that was somewhat fragile and broke with post-reload splitters. */
12508 if ((dimode_p || fisttp) && !stack_top_dies)
12509 output_asm_insn ("fld\t%y1", operands);
12511 gcc_assert (STACK_TOP_P (operands[1]));
12512 gcc_assert (MEM_P (operands[0]));
12513 gcc_assert (GET_MODE (operands[1]) != TFmode);
12515 if (fisttp)
12516 output_asm_insn ("fisttp%Z0\t%0", operands);
12517 else
12519 if (round_mode != I387_CW_ANY)
12520 output_asm_insn ("fldcw\t%3", operands);
12521 if (stack_top_dies || dimode_p)
12522 output_asm_insn ("fistp%Z0\t%0", operands);
12523 else
12524 output_asm_insn ("fist%Z0\t%0", operands);
12525 if (round_mode != I387_CW_ANY)
12526 output_asm_insn ("fldcw\t%2", operands);
12529 return "";
12532 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12533 have the values zero or one, indicates the ffreep insn's operand
12534 from the OPERANDS array. */
12536 static const char *
12537 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12539 if (TARGET_USE_FFREEP)
12540 #if HAVE_AS_IX86_FFREEP
12541 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12542 #else
12544 static char retval[] = ".word\t0xc_df";
12545 int regno = REGNO (operands[opno]);
12547 gcc_assert (FP_REGNO_P (regno));
12549 retval[9] = '0' + (regno - FIRST_STACK_REG);
12550 return retval;
12552 #endif
12554 return opno ? "fstp\t%y1" : "fstp\t%y0";
12558 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12559 should be used. UNORDERED_P is true when fucom should be used. */
12561 const char *
12562 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12564 int stack_top_dies;
12565 rtx cmp_op0, cmp_op1;
12566 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12568 if (eflags_p)
12570 cmp_op0 = operands[0];
12571 cmp_op1 = operands[1];
12573 else
12575 cmp_op0 = operands[1];
12576 cmp_op1 = operands[2];
12579 if (is_sse)
12581 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12582 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12583 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12584 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12586 if (GET_MODE (operands[0]) == SFmode)
12587 if (unordered_p)
12588 return &ucomiss[TARGET_AVX ? 0 : 1];
12589 else
12590 return &comiss[TARGET_AVX ? 0 : 1];
12591 else
12592 if (unordered_p)
12593 return &ucomisd[TARGET_AVX ? 0 : 1];
12594 else
12595 return &comisd[TARGET_AVX ? 0 : 1];
12598 gcc_assert (STACK_TOP_P (cmp_op0));
12600 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12602 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12604 if (stack_top_dies)
12606 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12607 return output_387_ffreep (operands, 1);
12609 else
12610 return "ftst\n\tfnstsw\t%0";
12613 if (STACK_REG_P (cmp_op1)
12614 && stack_top_dies
12615 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12616 && REGNO (cmp_op1) != FIRST_STACK_REG)
12618 /* If both the top of the 387 stack dies, and the other operand
12619 is also a stack register that dies, then this must be a
12620 `fcompp' float compare */
12622 if (eflags_p)
12624 /* There is no double popping fcomi variant. Fortunately,
12625 eflags is immune from the fstp's cc clobbering. */
12626 if (unordered_p)
12627 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12628 else
12629 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12630 return output_387_ffreep (operands, 0);
12632 else
12634 if (unordered_p)
12635 return "fucompp\n\tfnstsw\t%0";
12636 else
12637 return "fcompp\n\tfnstsw\t%0";
12640 else
12642 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12644 static const char * const alt[16] =
12646 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12647 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12648 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12649 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12651 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12652 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12653 NULL,
12654 NULL,
12656 "fcomi\t{%y1, %0|%0, %y1}",
12657 "fcomip\t{%y1, %0|%0, %y1}",
12658 "fucomi\t{%y1, %0|%0, %y1}",
12659 "fucomip\t{%y1, %0|%0, %y1}",
12661 NULL,
12662 NULL,
12663 NULL,
12664 NULL
12667 int mask;
12668 const char *ret;
12670 mask = eflags_p << 3;
12671 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12672 mask |= unordered_p << 1;
12673 mask |= stack_top_dies;
12675 gcc_assert (mask < 16);
12676 ret = alt[mask];
12677 gcc_assert (ret);
12679 return ret;
12683 void
12684 ix86_output_addr_vec_elt (FILE *file, int value)
12686 const char *directive = ASM_LONG;
12688 #ifdef ASM_QUAD
12689 if (TARGET_64BIT)
12690 directive = ASM_QUAD;
12691 #else
12692 gcc_assert (!TARGET_64BIT);
12693 #endif
12695 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12698 void
12699 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12701 const char *directive = ASM_LONG;
12703 #ifdef ASM_QUAD
12704 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12705 directive = ASM_QUAD;
12706 #else
12707 gcc_assert (!TARGET_64BIT);
12708 #endif
12709 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12710 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12711 fprintf (file, "%s%s%d-%s%d\n",
12712 directive, LPREFIX, value, LPREFIX, rel);
12713 else if (HAVE_AS_GOTOFF_IN_DATA)
12714 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12715 #if TARGET_MACHO
12716 else if (TARGET_MACHO)
12718 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12719 machopic_output_function_base_name (file);
12720 fprintf(file, "\n");
12722 #endif
12723 else
12724 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12725 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12728 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12729 for the target. */
12731 void
12732 ix86_expand_clear (rtx dest)
12734 rtx tmp;
12736 /* We play register width games, which are only valid after reload. */
12737 gcc_assert (reload_completed);
12739 /* Avoid HImode and its attendant prefix byte. */
12740 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12741 dest = gen_rtx_REG (SImode, REGNO (dest));
12742 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12744 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12745 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12747 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12748 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12751 emit_insn (tmp);
12754 /* X is an unchanging MEM. If it is a constant pool reference, return
12755 the constant pool rtx, else NULL. */
12758 maybe_get_pool_constant (rtx x)
12760 x = ix86_delegitimize_address (XEXP (x, 0));
12762 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12763 return get_pool_constant (x);
12765 return NULL_RTX;
12768 void
12769 ix86_expand_move (enum machine_mode mode, rtx operands[])
12771 rtx op0, op1;
12772 enum tls_model model;
12774 op0 = operands[0];
12775 op1 = operands[1];
12777 if (GET_CODE (op1) == SYMBOL_REF)
12779 model = SYMBOL_REF_TLS_MODEL (op1);
12780 if (model)
12782 op1 = legitimize_tls_address (op1, model, true);
12783 op1 = force_operand (op1, op0);
12784 if (op1 == op0)
12785 return;
12787 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12788 && SYMBOL_REF_DLLIMPORT_P (op1))
12789 op1 = legitimize_dllimport_symbol (op1, false);
12791 else if (GET_CODE (op1) == CONST
12792 && GET_CODE (XEXP (op1, 0)) == PLUS
12793 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12795 rtx addend = XEXP (XEXP (op1, 0), 1);
12796 rtx symbol = XEXP (XEXP (op1, 0), 0);
12797 rtx tmp = NULL;
12799 model = SYMBOL_REF_TLS_MODEL (symbol);
12800 if (model)
12801 tmp = legitimize_tls_address (symbol, model, true);
12802 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12803 && SYMBOL_REF_DLLIMPORT_P (symbol))
12804 tmp = legitimize_dllimport_symbol (symbol, true);
12806 if (tmp)
12808 tmp = force_operand (tmp, NULL);
12809 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12810 op0, 1, OPTAB_DIRECT);
12811 if (tmp == op0)
12812 return;
12816 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12818 if (TARGET_MACHO && !TARGET_64BIT)
12820 #if TARGET_MACHO
12821 if (MACHOPIC_PURE)
12823 rtx temp = ((reload_in_progress
12824 || ((op0 && REG_P (op0))
12825 && mode == Pmode))
12826 ? op0 : gen_reg_rtx (Pmode));
12827 op1 = machopic_indirect_data_reference (op1, temp);
12828 op1 = machopic_legitimize_pic_address (op1, mode,
12829 temp == op1 ? 0 : temp);
12831 else if (MACHOPIC_INDIRECT)
12832 op1 = machopic_indirect_data_reference (op1, 0);
12833 if (op0 == op1)
12834 return;
12835 #endif
12837 else
12839 if (MEM_P (op0))
12840 op1 = force_reg (Pmode, op1);
12841 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12843 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
12844 op1 = legitimize_pic_address (op1, reg);
12845 if (op0 == op1)
12846 return;
12850 else
12852 if (MEM_P (op0)
12853 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12854 || !push_operand (op0, mode))
12855 && MEM_P (op1))
12856 op1 = force_reg (mode, op1);
12858 if (push_operand (op0, mode)
12859 && ! general_no_elim_operand (op1, mode))
12860 op1 = copy_to_mode_reg (mode, op1);
12862 /* Force large constants in 64bit compilation into register
12863 to get them CSEed. */
12864 if (can_create_pseudo_p ()
12865 && (mode == DImode) && TARGET_64BIT
12866 && immediate_operand (op1, mode)
12867 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12868 && !register_operand (op0, mode)
12869 && optimize)
12870 op1 = copy_to_mode_reg (mode, op1);
12872 if (can_create_pseudo_p ()
12873 && FLOAT_MODE_P (mode)
12874 && GET_CODE (op1) == CONST_DOUBLE)
12876 /* If we are loading a floating point constant to a register,
12877 force the value to memory now, since we'll get better code
12878 out the back end. */
12880 op1 = validize_mem (force_const_mem (mode, op1));
12881 if (!register_operand (op0, mode))
12883 rtx temp = gen_reg_rtx (mode);
12884 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12885 emit_move_insn (op0, temp);
12886 return;
12891 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12894 void
12895 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12897 rtx op0 = operands[0], op1 = operands[1];
12898 unsigned int align = GET_MODE_ALIGNMENT (mode);
12900 /* Force constants other than zero into memory. We do not know how
12901 the instructions used to build constants modify the upper 64 bits
12902 of the register, once we have that information we may be able
12903 to handle some of them more efficiently. */
12904 if (can_create_pseudo_p ()
12905 && register_operand (op0, mode)
12906 && (CONSTANT_P (op1)
12907 || (GET_CODE (op1) == SUBREG
12908 && CONSTANT_P (SUBREG_REG (op1))))
12909 && standard_sse_constant_p (op1) <= 0)
12910 op1 = validize_mem (force_const_mem (mode, op1));
12912 /* We need to check memory alignment for SSE mode since attribute
12913 can make operands unaligned. */
12914 if (can_create_pseudo_p ()
12915 && SSE_REG_MODE_P (mode)
12916 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12917 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12919 rtx tmp[2];
12921 /* ix86_expand_vector_move_misalign() does not like constants ... */
12922 if (CONSTANT_P (op1)
12923 || (GET_CODE (op1) == SUBREG
12924 && CONSTANT_P (SUBREG_REG (op1))))
12925 op1 = validize_mem (force_const_mem (mode, op1));
12927 /* ... nor both arguments in memory. */
12928 if (!register_operand (op0, mode)
12929 && !register_operand (op1, mode))
12930 op1 = force_reg (mode, op1);
12932 tmp[0] = op0; tmp[1] = op1;
12933 ix86_expand_vector_move_misalign (mode, tmp);
12934 return;
12937 /* Make operand1 a register if it isn't already. */
12938 if (can_create_pseudo_p ()
12939 && !register_operand (op0, mode)
12940 && !register_operand (op1, mode))
12942 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12943 return;
12946 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12949 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12950 straight to ix86_expand_vector_move. */
12951 /* Code generation for scalar reg-reg moves of single and double precision data:
12952 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12953 movaps reg, reg
12954 else
12955 movss reg, reg
12956 if (x86_sse_partial_reg_dependency == true)
12957 movapd reg, reg
12958 else
12959 movsd reg, reg
12961 Code generation for scalar loads of double precision data:
12962 if (x86_sse_split_regs == true)
12963 movlpd mem, reg (gas syntax)
12964 else
12965 movsd mem, reg
12967 Code generation for unaligned packed loads of single precision data
12968 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12969 if (x86_sse_unaligned_move_optimal)
12970 movups mem, reg
12972 if (x86_sse_partial_reg_dependency == true)
12974 xorps reg, reg
12975 movlps mem, reg
12976 movhps mem+8, reg
12978 else
12980 movlps mem, reg
12981 movhps mem+8, reg
12984 Code generation for unaligned packed loads of double precision data
12985 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12986 if (x86_sse_unaligned_move_optimal)
12987 movupd mem, reg
12989 if (x86_sse_split_regs == true)
12991 movlpd mem, reg
12992 movhpd mem+8, reg
12994 else
12996 movsd mem, reg
12997 movhpd mem+8, reg
13001 void
13002 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
13004 rtx op0, op1, m;
13006 op0 = operands[0];
13007 op1 = operands[1];
13009 if (TARGET_AVX)
13011 switch (GET_MODE_CLASS (mode))
13013 case MODE_VECTOR_INT:
13014 case MODE_INT:
13015 switch (GET_MODE_SIZE (mode))
13017 case 16:
13018 op0 = gen_lowpart (V16QImode, op0);
13019 op1 = gen_lowpart (V16QImode, op1);
13020 emit_insn (gen_avx_movdqu (op0, op1));
13021 break;
13022 case 32:
13023 op0 = gen_lowpart (V32QImode, op0);
13024 op1 = gen_lowpart (V32QImode, op1);
13025 emit_insn (gen_avx_movdqu256 (op0, op1));
13026 break;
13027 default:
13028 gcc_unreachable ();
13030 break;
13031 case MODE_VECTOR_FLOAT:
13032 op0 = gen_lowpart (mode, op0);
13033 op1 = gen_lowpart (mode, op1);
13035 switch (mode)
13037 case V4SFmode:
13038 emit_insn (gen_avx_movups (op0, op1));
13039 break;
13040 case V8SFmode:
13041 emit_insn (gen_avx_movups256 (op0, op1));
13042 break;
13043 case V2DFmode:
13044 emit_insn (gen_avx_movupd (op0, op1));
13045 break;
13046 case V4DFmode:
13047 emit_insn (gen_avx_movupd256 (op0, op1));
13048 break;
13049 default:
13050 gcc_unreachable ();
13052 break;
13054 default:
13055 gcc_unreachable ();
13058 return;
13061 if (MEM_P (op1))
13063 /* If we're optimizing for size, movups is the smallest. */
13064 if (optimize_insn_for_size_p ())
13066 op0 = gen_lowpart (V4SFmode, op0);
13067 op1 = gen_lowpart (V4SFmode, op1);
13068 emit_insn (gen_sse_movups (op0, op1));
13069 return;
13072 /* ??? If we have typed data, then it would appear that using
13073 movdqu is the only way to get unaligned data loaded with
13074 integer type. */
13075 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13077 op0 = gen_lowpart (V16QImode, op0);
13078 op1 = gen_lowpart (V16QImode, op1);
13079 emit_insn (gen_sse2_movdqu (op0, op1));
13080 return;
13083 if (TARGET_SSE2 && mode == V2DFmode)
13085 rtx zero;
13087 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13089 op0 = gen_lowpart (V2DFmode, op0);
13090 op1 = gen_lowpart (V2DFmode, op1);
13091 emit_insn (gen_sse2_movupd (op0, op1));
13092 return;
13095 /* When SSE registers are split into halves, we can avoid
13096 writing to the top half twice. */
13097 if (TARGET_SSE_SPLIT_REGS)
13099 emit_clobber (op0);
13100 zero = op0;
13102 else
13104 /* ??? Not sure about the best option for the Intel chips.
13105 The following would seem to satisfy; the register is
13106 entirely cleared, breaking the dependency chain. We
13107 then store to the upper half, with a dependency depth
13108 of one. A rumor has it that Intel recommends two movsd
13109 followed by an unpacklpd, but this is unconfirmed. And
13110 given that the dependency depth of the unpacklpd would
13111 still be one, I'm not sure why this would be better. */
13112 zero = CONST0_RTX (V2DFmode);
13115 m = adjust_address (op1, DFmode, 0);
13116 emit_insn (gen_sse2_loadlpd (op0, zero, m));
13117 m = adjust_address (op1, DFmode, 8);
13118 emit_insn (gen_sse2_loadhpd (op0, op0, m));
13120 else
13122 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13124 op0 = gen_lowpart (V4SFmode, op0);
13125 op1 = gen_lowpart (V4SFmode, op1);
13126 emit_insn (gen_sse_movups (op0, op1));
13127 return;
13130 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
13131 emit_move_insn (op0, CONST0_RTX (mode));
13132 else
13133 emit_clobber (op0);
13135 if (mode != V4SFmode)
13136 op0 = gen_lowpart (V4SFmode, op0);
13137 m = adjust_address (op1, V2SFmode, 0);
13138 emit_insn (gen_sse_loadlps (op0, op0, m));
13139 m = adjust_address (op1, V2SFmode, 8);
13140 emit_insn (gen_sse_loadhps (op0, op0, m));
13143 else if (MEM_P (op0))
13145 /* If we're optimizing for size, movups is the smallest. */
13146 if (optimize_insn_for_size_p ())
13148 op0 = gen_lowpart (V4SFmode, op0);
13149 op1 = gen_lowpart (V4SFmode, op1);
13150 emit_insn (gen_sse_movups (op0, op1));
13151 return;
13154 /* ??? Similar to above, only less clear because of quote
13155 typeless stores unquote. */
13156 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
13157 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13159 op0 = gen_lowpart (V16QImode, op0);
13160 op1 = gen_lowpart (V16QImode, op1);
13161 emit_insn (gen_sse2_movdqu (op0, op1));
13162 return;
13165 if (TARGET_SSE2 && mode == V2DFmode)
13167 m = adjust_address (op0, DFmode, 0);
13168 emit_insn (gen_sse2_storelpd (m, op1));
13169 m = adjust_address (op0, DFmode, 8);
13170 emit_insn (gen_sse2_storehpd (m, op1));
13172 else
13174 if (mode != V4SFmode)
13175 op1 = gen_lowpart (V4SFmode, op1);
13176 m = adjust_address (op0, V2SFmode, 0);
13177 emit_insn (gen_sse_storelps (m, op1));
13178 m = adjust_address (op0, V2SFmode, 8);
13179 emit_insn (gen_sse_storehps (m, op1));
13182 else
13183 gcc_unreachable ();
13186 /* Expand a push in MODE. This is some mode for which we do not support
13187 proper push instructions, at least from the registers that we expect
13188 the value to live in. */
13190 void
13191 ix86_expand_push (enum machine_mode mode, rtx x)
13193 rtx tmp;
13195 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13196 GEN_INT (-GET_MODE_SIZE (mode)),
13197 stack_pointer_rtx, 1, OPTAB_DIRECT);
13198 if (tmp != stack_pointer_rtx)
13199 emit_move_insn (stack_pointer_rtx, tmp);
13201 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13203 /* When we push an operand onto stack, it has to be aligned at least
13204 at the function argument boundary. However since we don't have
13205 the argument type, we can't determine the actual argument
13206 boundary. */
13207 emit_move_insn (tmp, x);
13210 /* Helper function of ix86_fixup_binary_operands to canonicalize
13211 operand order. Returns true if the operands should be swapped. */
13213 static bool
13214 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13215 rtx operands[])
13217 rtx dst = operands[0];
13218 rtx src1 = operands[1];
13219 rtx src2 = operands[2];
13221 /* If the operation is not commutative, we can't do anything. */
13222 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13223 return false;
13225 /* Highest priority is that src1 should match dst. */
13226 if (rtx_equal_p (dst, src1))
13227 return false;
13228 if (rtx_equal_p (dst, src2))
13229 return true;
13231 /* Next highest priority is that immediate constants come second. */
13232 if (immediate_operand (src2, mode))
13233 return false;
13234 if (immediate_operand (src1, mode))
13235 return true;
13237 /* Lowest priority is that memory references should come second. */
13238 if (MEM_P (src2))
13239 return false;
13240 if (MEM_P (src1))
13241 return true;
13243 return false;
13247 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
13248 destination to use for the operation. If different from the true
13249 destination in operands[0], a copy operation will be required. */
13252 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13253 rtx operands[])
13255 rtx dst = operands[0];
13256 rtx src1 = operands[1];
13257 rtx src2 = operands[2];
13259 /* Canonicalize operand order. */
13260 if (ix86_swap_binary_operands_p (code, mode, operands))
13262 rtx temp;
13264 /* It is invalid to swap operands of different modes. */
13265 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13267 temp = src1;
13268 src1 = src2;
13269 src2 = temp;
13272 /* Both source operands cannot be in memory. */
13273 if (MEM_P (src1) && MEM_P (src2))
13275 /* Optimization: Only read from memory once. */
13276 if (rtx_equal_p (src1, src2))
13278 src2 = force_reg (mode, src2);
13279 src1 = src2;
13281 else
13282 src2 = force_reg (mode, src2);
13285 /* If the destination is memory, and we do not have matching source
13286 operands, do things in registers. */
13287 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13288 dst = gen_reg_rtx (mode);
13290 /* Source 1 cannot be a constant. */
13291 if (CONSTANT_P (src1))
13292 src1 = force_reg (mode, src1);
13294 /* Source 1 cannot be a non-matching memory. */
13295 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13296 src1 = force_reg (mode, src1);
13298 operands[1] = src1;
13299 operands[2] = src2;
13300 return dst;
13303 /* Similarly, but assume that the destination has already been
13304 set up properly. */
13306 void
13307 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13308 enum machine_mode mode, rtx operands[])
13310 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13311 gcc_assert (dst == operands[0]);
13314 /* Attempt to expand a binary operator. Make the expansion closer to the
13315 actual machine, then just general_operand, which will allow 3 separate
13316 memory references (one output, two input) in a single insn. */
13318 void
13319 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13320 rtx operands[])
13322 rtx src1, src2, dst, op, clob;
13324 dst = ix86_fixup_binary_operands (code, mode, operands);
13325 src1 = operands[1];
13326 src2 = operands[2];
13328 /* Emit the instruction. */
13330 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13331 if (reload_in_progress)
13333 /* Reload doesn't know about the flags register, and doesn't know that
13334 it doesn't want to clobber it. We can only do this with PLUS. */
13335 gcc_assert (code == PLUS);
13336 emit_insn (op);
13338 else
13340 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13341 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13344 /* Fix up the destination if needed. */
13345 if (dst != operands[0])
13346 emit_move_insn (operands[0], dst);
13349 /* Return TRUE or FALSE depending on whether the binary operator meets the
13350 appropriate constraints. */
13353 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13354 rtx operands[3])
13356 rtx dst = operands[0];
13357 rtx src1 = operands[1];
13358 rtx src2 = operands[2];
13360 /* Both source operands cannot be in memory. */
13361 if (MEM_P (src1) && MEM_P (src2))
13362 return 0;
13364 /* Canonicalize operand order for commutative operators. */
13365 if (ix86_swap_binary_operands_p (code, mode, operands))
13367 rtx temp = src1;
13368 src1 = src2;
13369 src2 = temp;
13372 /* If the destination is memory, we must have a matching source operand. */
13373 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13374 return 0;
13376 /* Source 1 cannot be a constant. */
13377 if (CONSTANT_P (src1))
13378 return 0;
13380 /* Source 1 cannot be a non-matching memory. */
13381 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13382 return 0;
13384 return 1;
13387 /* Attempt to expand a unary operator. Make the expansion closer to the
13388 actual machine, then just general_operand, which will allow 2 separate
13389 memory references (one output, one input) in a single insn. */
13391 void
13392 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13393 rtx operands[])
13395 int matching_memory;
13396 rtx src, dst, op, clob;
13398 dst = operands[0];
13399 src = operands[1];
13401 /* If the destination is memory, and we do not have matching source
13402 operands, do things in registers. */
13403 matching_memory = 0;
13404 if (MEM_P (dst))
13406 if (rtx_equal_p (dst, src))
13407 matching_memory = 1;
13408 else
13409 dst = gen_reg_rtx (mode);
13412 /* When source operand is memory, destination must match. */
13413 if (MEM_P (src) && !matching_memory)
13414 src = force_reg (mode, src);
13416 /* Emit the instruction. */
13418 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13419 if (reload_in_progress || code == NOT)
13421 /* Reload doesn't know about the flags register, and doesn't know that
13422 it doesn't want to clobber it. */
13423 gcc_assert (code == NOT);
13424 emit_insn (op);
13426 else
13428 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13429 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13432 /* Fix up the destination if needed. */
13433 if (dst != operands[0])
13434 emit_move_insn (operands[0], dst);
13437 #define LEA_SEARCH_THRESHOLD 12
13439 /* Search backward for non-agu definition of register number REGNO1
13440 or register number REGNO2 in INSN's basic block until
13441 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13442 2. Reach BB boundary, or
13443 3. Reach agu definition.
13444 Returns the distance between the non-agu definition point and INSN.
13445 If no definition point, returns -1. */
13447 static int
13448 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13449 rtx insn)
13451 basic_block bb = BLOCK_FOR_INSN (insn);
13452 int distance = 0;
13453 df_ref *def_rec;
13454 enum attr_type insn_type;
13456 if (insn != BB_HEAD (bb))
13458 rtx prev = PREV_INSN (insn);
13459 while (prev && distance < LEA_SEARCH_THRESHOLD)
13461 if (INSN_P (prev))
13463 distance++;
13464 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13465 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13466 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13467 && (regno1 == DF_REF_REGNO (*def_rec)
13468 || regno2 == DF_REF_REGNO (*def_rec)))
13470 insn_type = get_attr_type (prev);
13471 if (insn_type != TYPE_LEA)
13472 goto done;
13475 if (prev == BB_HEAD (bb))
13476 break;
13477 prev = PREV_INSN (prev);
13481 if (distance < LEA_SEARCH_THRESHOLD)
13483 edge e;
13484 edge_iterator ei;
13485 bool simple_loop = false;
13487 FOR_EACH_EDGE (e, ei, bb->preds)
13488 if (e->src == bb)
13490 simple_loop = true;
13491 break;
13494 if (simple_loop)
13496 rtx prev = BB_END (bb);
13497 while (prev
13498 && prev != insn
13499 && distance < LEA_SEARCH_THRESHOLD)
13501 if (INSN_P (prev))
13503 distance++;
13504 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13505 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13506 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13507 && (regno1 == DF_REF_REGNO (*def_rec)
13508 || regno2 == DF_REF_REGNO (*def_rec)))
13510 insn_type = get_attr_type (prev);
13511 if (insn_type != TYPE_LEA)
13512 goto done;
13515 prev = PREV_INSN (prev);
13520 distance = -1;
13522 done:
13523 /* get_attr_type may modify recog data. We want to make sure
13524 that recog data is valid for instruction INSN, on which
13525 distance_non_agu_define is called. INSN is unchanged here. */
13526 extract_insn_cached (insn);
13527 return distance;
13530 /* Return the distance between INSN and the next insn that uses
13531 register number REGNO0 in memory address. Return -1 if no such
13532 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13534 static int
13535 distance_agu_use (unsigned int regno0, rtx insn)
13537 basic_block bb = BLOCK_FOR_INSN (insn);
13538 int distance = 0;
13539 df_ref *def_rec;
13540 df_ref *use_rec;
13542 if (insn != BB_END (bb))
13544 rtx next = NEXT_INSN (insn);
13545 while (next && distance < LEA_SEARCH_THRESHOLD)
13547 if (INSN_P (next))
13549 distance++;
13551 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13552 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13553 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13554 && regno0 == DF_REF_REGNO (*use_rec))
13556 /* Return DISTANCE if OP0 is used in memory
13557 address in NEXT. */
13558 return distance;
13561 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13562 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13563 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13564 && regno0 == DF_REF_REGNO (*def_rec))
13566 /* Return -1 if OP0 is set in NEXT. */
13567 return -1;
13570 if (next == BB_END (bb))
13571 break;
13572 next = NEXT_INSN (next);
13576 if (distance < LEA_SEARCH_THRESHOLD)
13578 edge e;
13579 edge_iterator ei;
13580 bool simple_loop = false;
13582 FOR_EACH_EDGE (e, ei, bb->succs)
13583 if (e->dest == bb)
13585 simple_loop = true;
13586 break;
13589 if (simple_loop)
13591 rtx next = BB_HEAD (bb);
13592 while (next
13593 && next != insn
13594 && distance < LEA_SEARCH_THRESHOLD)
13596 if (INSN_P (next))
13598 distance++;
13600 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13601 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13602 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13603 && regno0 == DF_REF_REGNO (*use_rec))
13605 /* Return DISTANCE if OP0 is used in memory
13606 address in NEXT. */
13607 return distance;
13610 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13611 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13612 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13613 && regno0 == DF_REF_REGNO (*def_rec))
13615 /* Return -1 if OP0 is set in NEXT. */
13616 return -1;
13620 next = NEXT_INSN (next);
13625 return -1;
13628 /* Define this macro to tune LEA priority vs ADD, it take effect when
13629 there is a dilemma of choicing LEA or ADD
13630 Negative value: ADD is more preferred than LEA
13631 Zero: Netrual
13632 Positive value: LEA is more preferred than ADD*/
13633 #define IX86_LEA_PRIORITY 2
13635 /* Return true if it is ok to optimize an ADD operation to LEA
13636 operation to avoid flag register consumation. For the processors
13637 like ATOM, if the destination register of LEA holds an actual
13638 address which will be used soon, LEA is better and otherwise ADD
13639 is better. */
13641 bool
13642 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13643 rtx insn, rtx operands[])
13645 unsigned int regno0 = true_regnum (operands[0]);
13646 unsigned int regno1 = true_regnum (operands[1]);
13647 unsigned int regno2;
13649 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13650 return regno0 != regno1;
13652 regno2 = true_regnum (operands[2]);
13654 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13655 if (regno0 != regno1 && regno0 != regno2)
13656 return true;
13657 else
13659 int dist_define, dist_use;
13660 dist_define = distance_non_agu_define (regno1, regno2, insn);
13661 if (dist_define <= 0)
13662 return true;
13664 /* If this insn has both backward non-agu dependence and forward
13665 agu dependence, the one with short distance take effect. */
13666 dist_use = distance_agu_use (regno0, insn);
13667 if (dist_use <= 0
13668 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13669 return false;
13671 return true;
13675 /* Return true if destination reg of SET_BODY is shift count of
13676 USE_BODY. */
13678 static bool
13679 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13681 rtx set_dest;
13682 rtx shift_rtx;
13683 int i;
13685 /* Retrieve destination of SET_BODY. */
13686 switch (GET_CODE (set_body))
13688 case SET:
13689 set_dest = SET_DEST (set_body);
13690 if (!set_dest || !REG_P (set_dest))
13691 return false;
13692 break;
13693 case PARALLEL:
13694 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13695 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13696 use_body))
13697 return true;
13698 default:
13699 return false;
13700 break;
13703 /* Retrieve shift count of USE_BODY. */
13704 switch (GET_CODE (use_body))
13706 case SET:
13707 shift_rtx = XEXP (use_body, 1);
13708 break;
13709 case PARALLEL:
13710 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13711 if (ix86_dep_by_shift_count_body (set_body,
13712 XVECEXP (use_body, 0, i)))
13713 return true;
13714 default:
13715 return false;
13716 break;
13719 if (shift_rtx
13720 && (GET_CODE (shift_rtx) == ASHIFT
13721 || GET_CODE (shift_rtx) == LSHIFTRT
13722 || GET_CODE (shift_rtx) == ASHIFTRT
13723 || GET_CODE (shift_rtx) == ROTATE
13724 || GET_CODE (shift_rtx) == ROTATERT))
13726 rtx shift_count = XEXP (shift_rtx, 1);
13728 /* Return true if shift count is dest of SET_BODY. */
13729 if (REG_P (shift_count)
13730 && true_regnum (set_dest) == true_regnum (shift_count))
13731 return true;
13734 return false;
13737 /* Return true if destination reg of SET_INSN is shift count of
13738 USE_INSN. */
13740 bool
13741 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13743 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13744 PATTERN (use_insn));
13747 /* Return TRUE or FALSE depending on whether the unary operator meets the
13748 appropriate constraints. */
13751 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13752 enum machine_mode mode ATTRIBUTE_UNUSED,
13753 rtx operands[2] ATTRIBUTE_UNUSED)
13755 /* If one of operands is memory, source and destination must match. */
13756 if ((MEM_P (operands[0])
13757 || MEM_P (operands[1]))
13758 && ! rtx_equal_p (operands[0], operands[1]))
13759 return FALSE;
13760 return TRUE;
13763 /* Post-reload splitter for converting an SF or DFmode value in an
13764 SSE register into an unsigned SImode. */
13766 void
13767 ix86_split_convert_uns_si_sse (rtx operands[])
13769 enum machine_mode vecmode;
13770 rtx value, large, zero_or_two31, input, two31, x;
13772 large = operands[1];
13773 zero_or_two31 = operands[2];
13774 input = operands[3];
13775 two31 = operands[4];
13776 vecmode = GET_MODE (large);
13777 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13779 /* Load up the value into the low element. We must ensure that the other
13780 elements are valid floats -- zero is the easiest such value. */
13781 if (MEM_P (input))
13783 if (vecmode == V4SFmode)
13784 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13785 else
13786 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13788 else
13790 input = gen_rtx_REG (vecmode, REGNO (input));
13791 emit_move_insn (value, CONST0_RTX (vecmode));
13792 if (vecmode == V4SFmode)
13793 emit_insn (gen_sse_movss (value, value, input));
13794 else
13795 emit_insn (gen_sse2_movsd (value, value, input));
13798 emit_move_insn (large, two31);
13799 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13801 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13802 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13804 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13805 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13807 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13808 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13810 large = gen_rtx_REG (V4SImode, REGNO (large));
13811 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13813 x = gen_rtx_REG (V4SImode, REGNO (value));
13814 if (vecmode == V4SFmode)
13815 emit_insn (gen_sse2_cvttps2dq (x, value));
13816 else
13817 emit_insn (gen_sse2_cvttpd2dq (x, value));
13818 value = x;
13820 emit_insn (gen_xorv4si3 (value, value, large));
13823 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13824 Expects the 64-bit DImode to be supplied in a pair of integral
13825 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13826 -mfpmath=sse, !optimize_size only. */
13828 void
13829 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13831 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13832 rtx int_xmm, fp_xmm;
13833 rtx biases, exponents;
13834 rtx x;
13836 int_xmm = gen_reg_rtx (V4SImode);
13837 if (TARGET_INTER_UNIT_MOVES)
13838 emit_insn (gen_movdi_to_sse (int_xmm, input));
13839 else if (TARGET_SSE_SPLIT_REGS)
13841 emit_clobber (int_xmm);
13842 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13844 else
13846 x = gen_reg_rtx (V2DImode);
13847 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13848 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13851 x = gen_rtx_CONST_VECTOR (V4SImode,
13852 gen_rtvec (4, GEN_INT (0x43300000UL),
13853 GEN_INT (0x45300000UL),
13854 const0_rtx, const0_rtx));
13855 exponents = validize_mem (force_const_mem (V4SImode, x));
13857 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13858 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13860 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13861 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13862 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13863 (0x1.0p84 + double(fp_value_hi_xmm)).
13864 Note these exponents differ by 32. */
13866 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13868 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13869 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13870 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13871 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13872 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13873 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13874 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13875 biases = validize_mem (force_const_mem (V2DFmode, biases));
13876 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13878 /* Add the upper and lower DFmode values together. */
13879 if (TARGET_SSE3)
13880 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13881 else
13883 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13884 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13885 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13888 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13891 /* Not used, but eases macroization of patterns. */
13892 void
13893 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13894 rtx input ATTRIBUTE_UNUSED)
13896 gcc_unreachable ();
13899 /* Convert an unsigned SImode value into a DFmode. Only currently used
13900 for SSE, but applicable anywhere. */
13902 void
13903 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13905 REAL_VALUE_TYPE TWO31r;
13906 rtx x, fp;
13908 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13909 NULL, 1, OPTAB_DIRECT);
13911 fp = gen_reg_rtx (DFmode);
13912 emit_insn (gen_floatsidf2 (fp, x));
13914 real_ldexp (&TWO31r, &dconst1, 31);
13915 x = const_double_from_real_value (TWO31r, DFmode);
13917 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13918 if (x != target)
13919 emit_move_insn (target, x);
13922 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13923 32-bit mode; otherwise we have a direct convert instruction. */
13925 void
13926 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13928 REAL_VALUE_TYPE TWO32r;
13929 rtx fp_lo, fp_hi, x;
13931 fp_lo = gen_reg_rtx (DFmode);
13932 fp_hi = gen_reg_rtx (DFmode);
13934 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13936 real_ldexp (&TWO32r, &dconst1, 32);
13937 x = const_double_from_real_value (TWO32r, DFmode);
13938 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13940 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13942 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13943 0, OPTAB_DIRECT);
13944 if (x != target)
13945 emit_move_insn (target, x);
13948 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13949 For x86_32, -mfpmath=sse, !optimize_size only. */
13950 void
13951 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13953 REAL_VALUE_TYPE ONE16r;
13954 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13956 real_ldexp (&ONE16r, &dconst1, 16);
13957 x = const_double_from_real_value (ONE16r, SFmode);
13958 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13959 NULL, 0, OPTAB_DIRECT);
13960 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13961 NULL, 0, OPTAB_DIRECT);
13962 fp_hi = gen_reg_rtx (SFmode);
13963 fp_lo = gen_reg_rtx (SFmode);
13964 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13965 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13966 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13967 0, OPTAB_DIRECT);
13968 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13969 0, OPTAB_DIRECT);
13970 if (!rtx_equal_p (target, fp_hi))
13971 emit_move_insn (target, fp_hi);
13974 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13975 then replicate the value for all elements of the vector
13976 register. */
13979 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13981 rtvec v;
13982 switch (mode)
13984 case SImode:
13985 gcc_assert (vect);
13986 v = gen_rtvec (4, value, value, value, value);
13987 return gen_rtx_CONST_VECTOR (V4SImode, v);
13989 case DImode:
13990 gcc_assert (vect);
13991 v = gen_rtvec (2, value, value);
13992 return gen_rtx_CONST_VECTOR (V2DImode, v);
13994 case SFmode:
13995 if (vect)
13996 v = gen_rtvec (4, value, value, value, value);
13997 else
13998 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13999 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14000 return gen_rtx_CONST_VECTOR (V4SFmode, v);
14002 case DFmode:
14003 if (vect)
14004 v = gen_rtvec (2, value, value);
14005 else
14006 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
14007 return gen_rtx_CONST_VECTOR (V2DFmode, v);
14009 default:
14010 gcc_unreachable ();
14014 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14015 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
14016 for an SSE register. If VECT is true, then replicate the mask for
14017 all elements of the vector register. If INVERT is true, then create
14018 a mask excluding the sign bit. */
14020 static rtx
14021 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
14023 enum machine_mode vec_mode, imode;
14024 HOST_WIDE_INT hi, lo;
14025 int shift = 63;
14026 rtx v;
14027 rtx mask;
14029 /* Find the sign bit, sign extended to 2*HWI. */
14030 switch (mode)
14032 case SImode:
14033 case SFmode:
14034 imode = SImode;
14035 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
14036 lo = 0x80000000, hi = lo < 0;
14037 break;
14039 case DImode:
14040 case DFmode:
14041 imode = DImode;
14042 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
14043 if (HOST_BITS_PER_WIDE_INT >= 64)
14044 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
14045 else
14046 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14047 break;
14049 case TImode:
14050 case TFmode:
14051 vec_mode = VOIDmode;
14052 if (HOST_BITS_PER_WIDE_INT >= 64)
14054 imode = TImode;
14055 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
14057 else
14059 rtvec vec;
14061 imode = DImode;
14062 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14064 if (invert)
14066 lo = ~lo, hi = ~hi;
14067 v = constm1_rtx;
14069 else
14070 v = const0_rtx;
14072 mask = immed_double_const (lo, hi, imode);
14074 vec = gen_rtvec (2, v, mask);
14075 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
14076 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
14078 return v;
14080 break;
14082 default:
14083 gcc_unreachable ();
14086 if (invert)
14087 lo = ~lo, hi = ~hi;
14089 /* Force this value into the low part of a fp vector constant. */
14090 mask = immed_double_const (lo, hi, imode);
14091 mask = gen_lowpart (mode, mask);
14093 if (vec_mode == VOIDmode)
14094 return force_reg (mode, mask);
14096 v = ix86_build_const_vector (mode, vect, mask);
14097 return force_reg (vec_mode, v);
14100 /* Generate code for floating point ABS or NEG. */
14102 void
14103 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
14104 rtx operands[])
14106 rtx mask, set, use, clob, dst, src;
14107 bool use_sse = false;
14108 bool vector_mode = VECTOR_MODE_P (mode);
14109 enum machine_mode elt_mode = mode;
14111 if (vector_mode)
14113 elt_mode = GET_MODE_INNER (mode);
14114 use_sse = true;
14116 else if (mode == TFmode)
14117 use_sse = true;
14118 else if (TARGET_SSE_MATH)
14119 use_sse = SSE_FLOAT_MODE_P (mode);
14121 /* NEG and ABS performed with SSE use bitwise mask operations.
14122 Create the appropriate mask now. */
14123 if (use_sse)
14124 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
14125 else
14126 mask = NULL_RTX;
14128 dst = operands[0];
14129 src = operands[1];
14131 if (vector_mode)
14133 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
14134 set = gen_rtx_SET (VOIDmode, dst, set);
14135 emit_insn (set);
14137 else
14139 set = gen_rtx_fmt_e (code, mode, src);
14140 set = gen_rtx_SET (VOIDmode, dst, set);
14141 if (mask)
14143 use = gen_rtx_USE (VOIDmode, mask);
14144 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14145 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14146 gen_rtvec (3, set, use, clob)));
14148 else
14149 emit_insn (set);
14153 /* Expand a copysign operation. Special case operand 0 being a constant. */
14155 void
14156 ix86_expand_copysign (rtx operands[])
14158 enum machine_mode mode;
14159 rtx dest, op0, op1, mask, nmask;
14161 dest = operands[0];
14162 op0 = operands[1];
14163 op1 = operands[2];
14165 mode = GET_MODE (dest);
14167 if (GET_CODE (op0) == CONST_DOUBLE)
14169 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
14171 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
14172 op0 = simplify_unary_operation (ABS, mode, op0, mode);
14174 if (mode == SFmode || mode == DFmode)
14176 enum machine_mode vmode;
14178 vmode = mode == SFmode ? V4SFmode : V2DFmode;
14180 if (op0 == CONST0_RTX (mode))
14181 op0 = CONST0_RTX (vmode);
14182 else
14184 rtvec v;
14186 if (mode == SFmode)
14187 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
14188 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14189 else
14190 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
14192 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
14195 else if (op0 != CONST0_RTX (mode))
14196 op0 = force_reg (mode, op0);
14198 mask = ix86_build_signbit_mask (mode, 0, 0);
14200 if (mode == SFmode)
14201 copysign_insn = gen_copysignsf3_const;
14202 else if (mode == DFmode)
14203 copysign_insn = gen_copysigndf3_const;
14204 else
14205 copysign_insn = gen_copysigntf3_const;
14207 emit_insn (copysign_insn (dest, op0, op1, mask));
14209 else
14211 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14213 nmask = ix86_build_signbit_mask (mode, 0, 1);
14214 mask = ix86_build_signbit_mask (mode, 0, 0);
14216 if (mode == SFmode)
14217 copysign_insn = gen_copysignsf3_var;
14218 else if (mode == DFmode)
14219 copysign_insn = gen_copysigndf3_var;
14220 else
14221 copysign_insn = gen_copysigntf3_var;
14223 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14227 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
14228 be a constant, and so has already been expanded into a vector constant. */
14230 void
14231 ix86_split_copysign_const (rtx operands[])
14233 enum machine_mode mode, vmode;
14234 rtx dest, op0, op1, mask, x;
14236 dest = operands[0];
14237 op0 = operands[1];
14238 op1 = operands[2];
14239 mask = operands[3];
14241 mode = GET_MODE (dest);
14242 vmode = GET_MODE (mask);
14244 dest = simplify_gen_subreg (vmode, dest, mode, 0);
14245 x = gen_rtx_AND (vmode, dest, mask);
14246 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14248 if (op0 != CONST0_RTX (vmode))
14250 x = gen_rtx_IOR (vmode, dest, op0);
14251 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14255 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
14256 so we have to do two masks. */
14258 void
14259 ix86_split_copysign_var (rtx operands[])
14261 enum machine_mode mode, vmode;
14262 rtx dest, scratch, op0, op1, mask, nmask, x;
14264 dest = operands[0];
14265 scratch = operands[1];
14266 op0 = operands[2];
14267 op1 = operands[3];
14268 nmask = operands[4];
14269 mask = operands[5];
14271 mode = GET_MODE (dest);
14272 vmode = GET_MODE (mask);
14274 if (rtx_equal_p (op0, op1))
14276 /* Shouldn't happen often (it's useless, obviously), but when it does
14277 we'd generate incorrect code if we continue below. */
14278 emit_move_insn (dest, op0);
14279 return;
14282 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
14284 gcc_assert (REGNO (op1) == REGNO (scratch));
14286 x = gen_rtx_AND (vmode, scratch, mask);
14287 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14289 dest = mask;
14290 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14291 x = gen_rtx_NOT (vmode, dest);
14292 x = gen_rtx_AND (vmode, x, op0);
14293 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14295 else
14297 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
14299 x = gen_rtx_AND (vmode, scratch, mask);
14301 else /* alternative 2,4 */
14303 gcc_assert (REGNO (mask) == REGNO (scratch));
14304 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14305 x = gen_rtx_AND (vmode, scratch, op1);
14307 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14309 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
14311 dest = simplify_gen_subreg (vmode, op0, mode, 0);
14312 x = gen_rtx_AND (vmode, dest, nmask);
14314 else /* alternative 3,4 */
14316 gcc_assert (REGNO (nmask) == REGNO (dest));
14317 dest = nmask;
14318 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14319 x = gen_rtx_AND (vmode, dest, op0);
14321 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14324 x = gen_rtx_IOR (vmode, dest, scratch);
14325 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14328 /* Return TRUE or FALSE depending on whether the first SET in INSN
14329 has source and destination with matching CC modes, and that the
14330 CC mode is at least as constrained as REQ_MODE. */
14333 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14335 rtx set;
14336 enum machine_mode set_mode;
14338 set = PATTERN (insn);
14339 if (GET_CODE (set) == PARALLEL)
14340 set = XVECEXP (set, 0, 0);
14341 gcc_assert (GET_CODE (set) == SET);
14342 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14344 set_mode = GET_MODE (SET_DEST (set));
14345 switch (set_mode)
14347 case CCNOmode:
14348 if (req_mode != CCNOmode
14349 && (req_mode != CCmode
14350 || XEXP (SET_SRC (set), 1) != const0_rtx))
14351 return 0;
14352 break;
14353 case CCmode:
14354 if (req_mode == CCGCmode)
14355 return 0;
14356 /* FALLTHRU */
14357 case CCGCmode:
14358 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14359 return 0;
14360 /* FALLTHRU */
14361 case CCGOCmode:
14362 if (req_mode == CCZmode)
14363 return 0;
14364 /* FALLTHRU */
14365 case CCAmode:
14366 case CCCmode:
14367 case CCOmode:
14368 case CCSmode:
14369 case CCZmode:
14370 break;
14372 default:
14373 gcc_unreachable ();
14376 return (GET_MODE (SET_SRC (set)) == set_mode);
14379 /* Generate insn patterns to do an integer compare of OPERANDS. */
14381 static rtx
14382 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14384 enum machine_mode cmpmode;
14385 rtx tmp, flags;
14387 cmpmode = SELECT_CC_MODE (code, op0, op1);
14388 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14390 /* This is very simple, but making the interface the same as in the
14391 FP case makes the rest of the code easier. */
14392 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14393 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14395 /* Return the test that should be put into the flags user, i.e.
14396 the bcc, scc, or cmov instruction. */
14397 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14400 /* Figure out whether to use ordered or unordered fp comparisons.
14401 Return the appropriate mode to use. */
14403 enum machine_mode
14404 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14406 /* ??? In order to make all comparisons reversible, we do all comparisons
14407 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14408 all forms trapping and nontrapping comparisons, we can make inequality
14409 comparisons trapping again, since it results in better code when using
14410 FCOM based compares. */
14411 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14414 enum machine_mode
14415 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14417 enum machine_mode mode = GET_MODE (op0);
14419 if (SCALAR_FLOAT_MODE_P (mode))
14421 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14422 return ix86_fp_compare_mode (code);
14425 switch (code)
14427 /* Only zero flag is needed. */
14428 case EQ: /* ZF=0 */
14429 case NE: /* ZF!=0 */
14430 return CCZmode;
14431 /* Codes needing carry flag. */
14432 case GEU: /* CF=0 */
14433 case LTU: /* CF=1 */
14434 /* Detect overflow checks. They need just the carry flag. */
14435 if (GET_CODE (op0) == PLUS
14436 && rtx_equal_p (op1, XEXP (op0, 0)))
14437 return CCCmode;
14438 else
14439 return CCmode;
14440 case GTU: /* CF=0 & ZF=0 */
14441 case LEU: /* CF=1 | ZF=1 */
14442 /* Detect overflow checks. They need just the carry flag. */
14443 if (GET_CODE (op0) == MINUS
14444 && rtx_equal_p (op1, XEXP (op0, 0)))
14445 return CCCmode;
14446 else
14447 return CCmode;
14448 /* Codes possibly doable only with sign flag when
14449 comparing against zero. */
14450 case GE: /* SF=OF or SF=0 */
14451 case LT: /* SF<>OF or SF=1 */
14452 if (op1 == const0_rtx)
14453 return CCGOCmode;
14454 else
14455 /* For other cases Carry flag is not required. */
14456 return CCGCmode;
14457 /* Codes doable only with sign flag when comparing
14458 against zero, but we miss jump instruction for it
14459 so we need to use relational tests against overflow
14460 that thus needs to be zero. */
14461 case GT: /* ZF=0 & SF=OF */
14462 case LE: /* ZF=1 | SF<>OF */
14463 if (op1 == const0_rtx)
14464 return CCNOmode;
14465 else
14466 return CCGCmode;
14467 /* strcmp pattern do (use flags) and combine may ask us for proper
14468 mode. */
14469 case USE:
14470 return CCmode;
14471 default:
14472 gcc_unreachable ();
14476 /* Return the fixed registers used for condition codes. */
14478 static bool
14479 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14481 *p1 = FLAGS_REG;
14482 *p2 = FPSR_REG;
14483 return true;
14486 /* If two condition code modes are compatible, return a condition code
14487 mode which is compatible with both. Otherwise, return
14488 VOIDmode. */
14490 static enum machine_mode
14491 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14493 if (m1 == m2)
14494 return m1;
14496 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14497 return VOIDmode;
14499 if ((m1 == CCGCmode && m2 == CCGOCmode)
14500 || (m1 == CCGOCmode && m2 == CCGCmode))
14501 return CCGCmode;
14503 switch (m1)
14505 default:
14506 gcc_unreachable ();
14508 case CCmode:
14509 case CCGCmode:
14510 case CCGOCmode:
14511 case CCNOmode:
14512 case CCAmode:
14513 case CCCmode:
14514 case CCOmode:
14515 case CCSmode:
14516 case CCZmode:
14517 switch (m2)
14519 default:
14520 return VOIDmode;
14522 case CCmode:
14523 case CCGCmode:
14524 case CCGOCmode:
14525 case CCNOmode:
14526 case CCAmode:
14527 case CCCmode:
14528 case CCOmode:
14529 case CCSmode:
14530 case CCZmode:
14531 return CCmode;
14534 case CCFPmode:
14535 case CCFPUmode:
14536 /* These are only compatible with themselves, which we already
14537 checked above. */
14538 return VOIDmode;
14543 /* Return a comparison we can do and that it is equivalent to
14544 swap_condition (code) apart possibly from orderedness.
14545 But, never change orderedness if TARGET_IEEE_FP, returning
14546 UNKNOWN in that case if necessary. */
14548 static enum rtx_code
14549 ix86_fp_swap_condition (enum rtx_code code)
14551 switch (code)
14553 case GT: /* GTU - CF=0 & ZF=0 */
14554 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
14555 case GE: /* GEU - CF=0 */
14556 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
14557 case UNLT: /* LTU - CF=1 */
14558 return TARGET_IEEE_FP ? UNKNOWN : GT;
14559 case UNLE: /* LEU - CF=1 | ZF=1 */
14560 return TARGET_IEEE_FP ? UNKNOWN : GE;
14561 default:
14562 return swap_condition (code);
14566 /* Return cost of comparison CODE using the best strategy for performance.
14567 All following functions do use number of instructions as a cost metrics.
14568 In future this should be tweaked to compute bytes for optimize_size and
14569 take into account performance of various instructions on various CPUs. */
14571 static int
14572 ix86_fp_comparison_cost (enum rtx_code code)
14574 int arith_cost;
14576 /* The cost of code using bit-twiddling on %ah. */
14577 switch (code)
14579 case UNLE:
14580 case UNLT:
14581 case LTGT:
14582 case GT:
14583 case GE:
14584 case UNORDERED:
14585 case ORDERED:
14586 case UNEQ:
14587 arith_cost = 4;
14588 break;
14589 case LT:
14590 case NE:
14591 case EQ:
14592 case UNGE:
14593 arith_cost = TARGET_IEEE_FP ? 5 : 4;
14594 break;
14595 case LE:
14596 case UNGT:
14597 arith_cost = TARGET_IEEE_FP ? 6 : 4;
14598 break;
14599 default:
14600 gcc_unreachable ();
14603 switch (ix86_fp_comparison_strategy (code))
14605 case IX86_FPCMP_COMI:
14606 return arith_cost > 4 ? 3 : 2;
14607 case IX86_FPCMP_SAHF:
14608 return arith_cost > 4 ? 4 : 3;
14609 default:
14610 return arith_cost;
14614 /* Return strategy to use for floating-point. We assume that fcomi is always
14615 preferrable where available, since that is also true when looking at size
14616 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
14618 enum ix86_fpcmp_strategy
14619 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
14621 /* Do fcomi/sahf based test when profitable. */
14623 if (TARGET_CMOVE)
14624 return IX86_FPCMP_COMI;
14626 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
14627 return IX86_FPCMP_SAHF;
14629 return IX86_FPCMP_ARITH;
14632 /* Swap, force into registers, or otherwise massage the two operands
14633 to a fp comparison. The operands are updated in place; the new
14634 comparison code is returned. */
14636 static enum rtx_code
14637 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14639 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14640 rtx op0 = *pop0, op1 = *pop1;
14641 enum machine_mode op_mode = GET_MODE (op0);
14642 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14644 /* All of the unordered compare instructions only work on registers.
14645 The same is true of the fcomi compare instructions. The XFmode
14646 compare instructions require registers except when comparing
14647 against zero or when converting operand 1 from fixed point to
14648 floating point. */
14650 if (!is_sse
14651 && (fpcmp_mode == CCFPUmode
14652 || (op_mode == XFmode
14653 && ! (standard_80387_constant_p (op0) == 1
14654 || standard_80387_constant_p (op1) == 1)
14655 && GET_CODE (op1) != FLOAT)
14656 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
14658 op0 = force_reg (op_mode, op0);
14659 op1 = force_reg (op_mode, op1);
14661 else
14663 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14664 things around if they appear profitable, otherwise force op0
14665 into a register. */
14667 if (standard_80387_constant_p (op0) == 0
14668 || (MEM_P (op0)
14669 && ! (standard_80387_constant_p (op1) == 0
14670 || MEM_P (op1))))
14672 enum rtx_code new_code = ix86_fp_swap_condition (code);
14673 if (new_code != UNKNOWN)
14675 rtx tmp;
14676 tmp = op0, op0 = op1, op1 = tmp;
14677 code = new_code;
14681 if (!REG_P (op0))
14682 op0 = force_reg (op_mode, op0);
14684 if (CONSTANT_P (op1))
14686 int tmp = standard_80387_constant_p (op1);
14687 if (tmp == 0)
14688 op1 = validize_mem (force_const_mem (op_mode, op1));
14689 else if (tmp == 1)
14691 if (TARGET_CMOVE)
14692 op1 = force_reg (op_mode, op1);
14694 else
14695 op1 = force_reg (op_mode, op1);
14699 /* Try to rearrange the comparison to make it cheaper. */
14700 if (ix86_fp_comparison_cost (code)
14701 > ix86_fp_comparison_cost (swap_condition (code))
14702 && (REG_P (op1) || can_create_pseudo_p ()))
14704 rtx tmp;
14705 tmp = op0, op0 = op1, op1 = tmp;
14706 code = swap_condition (code);
14707 if (!REG_P (op0))
14708 op0 = force_reg (op_mode, op0);
14711 *pop0 = op0;
14712 *pop1 = op1;
14713 return code;
14716 /* Convert comparison codes we use to represent FP comparison to integer
14717 code that will result in proper branch. Return UNKNOWN if no such code
14718 is available. */
14720 enum rtx_code
14721 ix86_fp_compare_code_to_integer (enum rtx_code code)
14723 switch (code)
14725 case GT:
14726 return GTU;
14727 case GE:
14728 return GEU;
14729 case ORDERED:
14730 case UNORDERED:
14731 return code;
14732 break;
14733 case UNEQ:
14734 return EQ;
14735 break;
14736 case UNLT:
14737 return LTU;
14738 break;
14739 case UNLE:
14740 return LEU;
14741 break;
14742 case LTGT:
14743 return NE;
14744 break;
14745 default:
14746 return UNKNOWN;
14750 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14752 static rtx
14753 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
14755 enum machine_mode fpcmp_mode, intcmp_mode;
14756 rtx tmp, tmp2;
14758 fpcmp_mode = ix86_fp_compare_mode (code);
14759 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14761 /* Do fcomi/sahf based test when profitable. */
14762 switch (ix86_fp_comparison_strategy (code))
14764 case IX86_FPCMP_COMI:
14765 intcmp_mode = fpcmp_mode;
14766 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14767 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14768 tmp);
14769 emit_insn (tmp);
14770 break;
14772 case IX86_FPCMP_SAHF:
14773 intcmp_mode = fpcmp_mode;
14774 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14775 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14776 tmp);
14778 if (!scratch)
14779 scratch = gen_reg_rtx (HImode);
14780 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14781 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14782 break;
14784 case IX86_FPCMP_ARITH:
14785 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14786 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14787 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14788 if (!scratch)
14789 scratch = gen_reg_rtx (HImode);
14790 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14792 /* In the unordered case, we have to check C2 for NaN's, which
14793 doesn't happen to work out to anything nice combination-wise.
14794 So do some bit twiddling on the value we've got in AH to come
14795 up with an appropriate set of condition codes. */
14797 intcmp_mode = CCNOmode;
14798 switch (code)
14800 case GT:
14801 case UNGT:
14802 if (code == GT || !TARGET_IEEE_FP)
14804 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14805 code = EQ;
14807 else
14809 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14810 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14811 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14812 intcmp_mode = CCmode;
14813 code = GEU;
14815 break;
14816 case LT:
14817 case UNLT:
14818 if (code == LT && TARGET_IEEE_FP)
14820 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14821 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14822 intcmp_mode = CCmode;
14823 code = EQ;
14825 else
14827 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14828 code = NE;
14830 break;
14831 case GE:
14832 case UNGE:
14833 if (code == GE || !TARGET_IEEE_FP)
14835 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14836 code = EQ;
14838 else
14840 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14841 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14842 GEN_INT (0x01)));
14843 code = NE;
14845 break;
14846 case LE:
14847 case UNLE:
14848 if (code == LE && TARGET_IEEE_FP)
14850 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14851 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14852 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14853 intcmp_mode = CCmode;
14854 code = LTU;
14856 else
14858 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14859 code = NE;
14861 break;
14862 case EQ:
14863 case UNEQ:
14864 if (code == EQ && TARGET_IEEE_FP)
14866 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14867 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14868 intcmp_mode = CCmode;
14869 code = EQ;
14871 else
14873 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14874 code = NE;
14875 break;
14877 break;
14878 case NE:
14879 case LTGT:
14880 if (code == NE && TARGET_IEEE_FP)
14882 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14883 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14884 GEN_INT (0x40)));
14885 code = NE;
14887 else
14889 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14890 code = EQ;
14892 break;
14894 case UNORDERED:
14895 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14896 code = NE;
14897 break;
14898 case ORDERED:
14899 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14900 code = EQ;
14901 break;
14903 default:
14904 gcc_unreachable ();
14906 break;
14908 default:
14909 gcc_unreachable();
14912 /* Return the test that should be put into the flags user, i.e.
14913 the bcc, scc, or cmov instruction. */
14914 return gen_rtx_fmt_ee (code, VOIDmode,
14915 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14916 const0_rtx);
14920 ix86_expand_compare (enum rtx_code code)
14922 rtx op0, op1, ret;
14923 op0 = ix86_compare_op0;
14924 op1 = ix86_compare_op1;
14926 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14927 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14929 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14931 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14932 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
14934 else
14935 ret = ix86_expand_int_compare (code, op0, op1);
14937 return ret;
14940 void
14941 ix86_expand_branch (enum rtx_code code, rtx label)
14943 rtx tmp;
14945 switch (GET_MODE (ix86_compare_op0))
14947 case SFmode:
14948 case DFmode:
14949 case XFmode:
14950 case QImode:
14951 case HImode:
14952 case SImode:
14953 simple:
14954 tmp = ix86_expand_compare (code);
14955 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14956 gen_rtx_LABEL_REF (VOIDmode, label),
14957 pc_rtx);
14958 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14959 return;
14961 case DImode:
14962 if (TARGET_64BIT)
14963 goto simple;
14964 case TImode:
14965 /* Expand DImode branch into multiple compare+branch. */
14967 rtx lo[2], hi[2], label2;
14968 enum rtx_code code1, code2, code3;
14969 enum machine_mode submode;
14971 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14973 tmp = ix86_compare_op0;
14974 ix86_compare_op0 = ix86_compare_op1;
14975 ix86_compare_op1 = tmp;
14976 code = swap_condition (code);
14978 if (GET_MODE (ix86_compare_op0) == DImode)
14980 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14981 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14982 submode = SImode;
14984 else
14986 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14987 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14988 submode = DImode;
14991 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14992 avoid two branches. This costs one extra insn, so disable when
14993 optimizing for size. */
14995 if ((code == EQ || code == NE)
14996 && (!optimize_insn_for_size_p ()
14997 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14999 rtx xor0, xor1;
15001 xor1 = hi[0];
15002 if (hi[1] != const0_rtx)
15003 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15004 NULL_RTX, 0, OPTAB_WIDEN);
15006 xor0 = lo[0];
15007 if (lo[1] != const0_rtx)
15008 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15009 NULL_RTX, 0, OPTAB_WIDEN);
15011 tmp = expand_binop (submode, ior_optab, xor1, xor0,
15012 NULL_RTX, 0, OPTAB_WIDEN);
15014 ix86_compare_op0 = tmp;
15015 ix86_compare_op1 = const0_rtx;
15016 ix86_expand_branch (code, label);
15017 return;
15020 /* Otherwise, if we are doing less-than or greater-or-equal-than,
15021 op1 is a constant and the low word is zero, then we can just
15022 examine the high word. Similarly for low word -1 and
15023 less-or-equal-than or greater-than. */
15025 if (CONST_INT_P (hi[1]))
15026 switch (code)
15028 case LT: case LTU: case GE: case GEU:
15029 if (lo[1] == const0_rtx)
15031 ix86_compare_op0 = hi[0];
15032 ix86_compare_op1 = hi[1];
15033 ix86_expand_branch (code, label);
15034 return;
15036 break;
15037 case LE: case LEU: case GT: case GTU:
15038 if (lo[1] == constm1_rtx)
15040 ix86_compare_op0 = hi[0];
15041 ix86_compare_op1 = hi[1];
15042 ix86_expand_branch (code, label);
15043 return;
15045 break;
15046 default:
15047 break;
15050 /* Otherwise, we need two or three jumps. */
15052 label2 = gen_label_rtx ();
15054 code1 = code;
15055 code2 = swap_condition (code);
15056 code3 = unsigned_condition (code);
15058 switch (code)
15060 case LT: case GT: case LTU: case GTU:
15061 break;
15063 case LE: code1 = LT; code2 = GT; break;
15064 case GE: code1 = GT; code2 = LT; break;
15065 case LEU: code1 = LTU; code2 = GTU; break;
15066 case GEU: code1 = GTU; code2 = LTU; break;
15068 case EQ: code1 = UNKNOWN; code2 = NE; break;
15069 case NE: code2 = UNKNOWN; break;
15071 default:
15072 gcc_unreachable ();
15076 * a < b =>
15077 * if (hi(a) < hi(b)) goto true;
15078 * if (hi(a) > hi(b)) goto false;
15079 * if (lo(a) < lo(b)) goto true;
15080 * false:
15083 ix86_compare_op0 = hi[0];
15084 ix86_compare_op1 = hi[1];
15086 if (code1 != UNKNOWN)
15087 ix86_expand_branch (code1, label);
15088 if (code2 != UNKNOWN)
15089 ix86_expand_branch (code2, label2);
15091 ix86_compare_op0 = lo[0];
15092 ix86_compare_op1 = lo[1];
15093 ix86_expand_branch (code3, label);
15095 if (code2 != UNKNOWN)
15096 emit_label (label2);
15097 return;
15100 default:
15101 /* If we have already emitted a compare insn, go straight to simple.
15102 ix86_expand_compare won't emit anything if ix86_compare_emitted
15103 is non NULL. */
15104 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15105 goto simple;
15109 /* Split branch based on floating point condition. */
15110 void
15111 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15112 rtx target1, rtx target2, rtx tmp, rtx pushed)
15114 rtx condition;
15115 rtx i;
15117 if (target2 != pc_rtx)
15119 rtx tmp = target2;
15120 code = reverse_condition_maybe_unordered (code);
15121 target2 = target1;
15122 target1 = tmp;
15125 condition = ix86_expand_fp_compare (code, op1, op2,
15126 tmp);
15128 /* Remove pushed operand from stack. */
15129 if (pushed)
15130 ix86_free_from_memory (GET_MODE (pushed));
15132 i = emit_jump_insn (gen_rtx_SET
15133 (VOIDmode, pc_rtx,
15134 gen_rtx_IF_THEN_ELSE (VOIDmode,
15135 condition, target1, target2)));
15136 if (split_branch_probability >= 0)
15137 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
15140 void
15141 ix86_expand_setcc (enum rtx_code code, rtx dest)
15143 rtx ret;
15145 gcc_assert (GET_MODE (dest) == QImode);
15147 ret = ix86_expand_compare (code);
15148 PUT_MODE (ret, QImode);
15149 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
15152 /* Expand comparison setting or clearing carry flag. Return true when
15153 successful and set pop for the operation. */
15154 static bool
15155 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15157 enum machine_mode mode =
15158 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15160 /* Do not handle DImode compares that go through special path. */
15161 if (mode == (TARGET_64BIT ? TImode : DImode))
15162 return false;
15164 if (SCALAR_FLOAT_MODE_P (mode))
15166 rtx compare_op, compare_seq;
15168 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15170 /* Shortcut: following common codes never translate
15171 into carry flag compares. */
15172 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15173 || code == ORDERED || code == UNORDERED)
15174 return false;
15176 /* These comparisons require zero flag; swap operands so they won't. */
15177 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15178 && !TARGET_IEEE_FP)
15180 rtx tmp = op0;
15181 op0 = op1;
15182 op1 = tmp;
15183 code = swap_condition (code);
15186 /* Try to expand the comparison and verify that we end up with
15187 carry flag based comparison. This fails to be true only when
15188 we decide to expand comparison using arithmetic that is not
15189 too common scenario. */
15190 start_sequence ();
15191 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15192 compare_seq = get_insns ();
15193 end_sequence ();
15195 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15196 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15197 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15198 else
15199 code = GET_CODE (compare_op);
15201 if (code != LTU && code != GEU)
15202 return false;
15204 emit_insn (compare_seq);
15205 *pop = compare_op;
15206 return true;
15209 if (!INTEGRAL_MODE_P (mode))
15210 return false;
15212 switch (code)
15214 case LTU:
15215 case GEU:
15216 break;
15218 /* Convert a==0 into (unsigned)a<1. */
15219 case EQ:
15220 case NE:
15221 if (op1 != const0_rtx)
15222 return false;
15223 op1 = const1_rtx;
15224 code = (code == EQ ? LTU : GEU);
15225 break;
15227 /* Convert a>b into b<a or a>=b-1. */
15228 case GTU:
15229 case LEU:
15230 if (CONST_INT_P (op1))
15232 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15233 /* Bail out on overflow. We still can swap operands but that
15234 would force loading of the constant into register. */
15235 if (op1 == const0_rtx
15236 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15237 return false;
15238 code = (code == GTU ? GEU : LTU);
15240 else
15242 rtx tmp = op1;
15243 op1 = op0;
15244 op0 = tmp;
15245 code = (code == GTU ? LTU : GEU);
15247 break;
15249 /* Convert a>=0 into (unsigned)a<0x80000000. */
15250 case LT:
15251 case GE:
15252 if (mode == DImode || op1 != const0_rtx)
15253 return false;
15254 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15255 code = (code == LT ? GEU : LTU);
15256 break;
15257 case LE:
15258 case GT:
15259 if (mode == DImode || op1 != constm1_rtx)
15260 return false;
15261 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15262 code = (code == LE ? GEU : LTU);
15263 break;
15265 default:
15266 return false;
15268 /* Swapping operands may cause constant to appear as first operand. */
15269 if (!nonimmediate_operand (op0, VOIDmode))
15271 if (!can_create_pseudo_p ())
15272 return false;
15273 op0 = force_reg (mode, op0);
15275 ix86_compare_op0 = op0;
15276 ix86_compare_op1 = op1;
15277 *pop = ix86_expand_compare (code);
15278 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15279 return true;
15283 ix86_expand_int_movcc (rtx operands[])
15285 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15286 rtx compare_seq, compare_op;
15287 enum machine_mode mode = GET_MODE (operands[0]);
15288 bool sign_bit_compare_p = false;;
15290 start_sequence ();
15291 ix86_compare_op0 = XEXP (operands[1], 0);
15292 ix86_compare_op1 = XEXP (operands[1], 1);
15293 compare_op = ix86_expand_compare (code);
15294 compare_seq = get_insns ();
15295 end_sequence ();
15297 compare_code = GET_CODE (compare_op);
15299 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15300 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15301 sign_bit_compare_p = true;
15303 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15304 HImode insns, we'd be swallowed in word prefix ops. */
15306 if ((mode != HImode || TARGET_FAST_PREFIX)
15307 && (mode != (TARGET_64BIT ? TImode : DImode))
15308 && CONST_INT_P (operands[2])
15309 && CONST_INT_P (operands[3]))
15311 rtx out = operands[0];
15312 HOST_WIDE_INT ct = INTVAL (operands[2]);
15313 HOST_WIDE_INT cf = INTVAL (operands[3]);
15314 HOST_WIDE_INT diff;
15316 diff = ct - cf;
15317 /* Sign bit compares are better done using shifts than we do by using
15318 sbb. */
15319 if (sign_bit_compare_p
15320 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15321 ix86_compare_op1, &compare_op))
15323 /* Detect overlap between destination and compare sources. */
15324 rtx tmp = out;
15326 if (!sign_bit_compare_p)
15328 bool fpcmp = false;
15330 compare_code = GET_CODE (compare_op);
15332 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15333 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15335 fpcmp = true;
15336 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15339 /* To simplify rest of code, restrict to the GEU case. */
15340 if (compare_code == LTU)
15342 HOST_WIDE_INT tmp = ct;
15343 ct = cf;
15344 cf = tmp;
15345 compare_code = reverse_condition (compare_code);
15346 code = reverse_condition (code);
15348 else
15350 if (fpcmp)
15351 PUT_CODE (compare_op,
15352 reverse_condition_maybe_unordered
15353 (GET_CODE (compare_op)));
15354 else
15355 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15357 diff = ct - cf;
15359 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15360 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15361 tmp = gen_reg_rtx (mode);
15363 if (mode == DImode)
15364 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15365 else
15366 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15368 else
15370 if (code == GT || code == GE)
15371 code = reverse_condition (code);
15372 else
15374 HOST_WIDE_INT tmp = ct;
15375 ct = cf;
15376 cf = tmp;
15377 diff = ct - cf;
15379 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15380 ix86_compare_op1, VOIDmode, 0, -1);
15383 if (diff == 1)
15386 * cmpl op0,op1
15387 * sbbl dest,dest
15388 * [addl dest, ct]
15390 * Size 5 - 8.
15392 if (ct)
15393 tmp = expand_simple_binop (mode, PLUS,
15394 tmp, GEN_INT (ct),
15395 copy_rtx (tmp), 1, OPTAB_DIRECT);
15397 else if (cf == -1)
15400 * cmpl op0,op1
15401 * sbbl dest,dest
15402 * orl $ct, dest
15404 * Size 8.
15406 tmp = expand_simple_binop (mode, IOR,
15407 tmp, GEN_INT (ct),
15408 copy_rtx (tmp), 1, OPTAB_DIRECT);
15410 else if (diff == -1 && ct)
15413 * cmpl op0,op1
15414 * sbbl dest,dest
15415 * notl dest
15416 * [addl dest, cf]
15418 * Size 8 - 11.
15420 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15421 if (cf)
15422 tmp = expand_simple_binop (mode, PLUS,
15423 copy_rtx (tmp), GEN_INT (cf),
15424 copy_rtx (tmp), 1, OPTAB_DIRECT);
15426 else
15429 * cmpl op0,op1
15430 * sbbl dest,dest
15431 * [notl dest]
15432 * andl cf - ct, dest
15433 * [addl dest, ct]
15435 * Size 8 - 11.
15438 if (cf == 0)
15440 cf = ct;
15441 ct = 0;
15442 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15445 tmp = expand_simple_binop (mode, AND,
15446 copy_rtx (tmp),
15447 gen_int_mode (cf - ct, mode),
15448 copy_rtx (tmp), 1, OPTAB_DIRECT);
15449 if (ct)
15450 tmp = expand_simple_binop (mode, PLUS,
15451 copy_rtx (tmp), GEN_INT (ct),
15452 copy_rtx (tmp), 1, OPTAB_DIRECT);
15455 if (!rtx_equal_p (tmp, out))
15456 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15458 return 1; /* DONE */
15461 if (diff < 0)
15463 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15465 HOST_WIDE_INT tmp;
15466 tmp = ct, ct = cf, cf = tmp;
15467 diff = -diff;
15469 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15471 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15473 /* We may be reversing unordered compare to normal compare, that
15474 is not valid in general (we may convert non-trapping condition
15475 to trapping one), however on i386 we currently emit all
15476 comparisons unordered. */
15477 compare_code = reverse_condition_maybe_unordered (compare_code);
15478 code = reverse_condition_maybe_unordered (code);
15480 else
15482 compare_code = reverse_condition (compare_code);
15483 code = reverse_condition (code);
15487 compare_code = UNKNOWN;
15488 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15489 && CONST_INT_P (ix86_compare_op1))
15491 if (ix86_compare_op1 == const0_rtx
15492 && (code == LT || code == GE))
15493 compare_code = code;
15494 else if (ix86_compare_op1 == constm1_rtx)
15496 if (code == LE)
15497 compare_code = LT;
15498 else if (code == GT)
15499 compare_code = GE;
15503 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15504 if (compare_code != UNKNOWN
15505 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15506 && (cf == -1 || ct == -1))
15508 /* If lea code below could be used, only optimize
15509 if it results in a 2 insn sequence. */
15511 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15512 || diff == 3 || diff == 5 || diff == 9)
15513 || (compare_code == LT && ct == -1)
15514 || (compare_code == GE && cf == -1))
15517 * notl op1 (if necessary)
15518 * sarl $31, op1
15519 * orl cf, op1
15521 if (ct != -1)
15523 cf = ct;
15524 ct = -1;
15525 code = reverse_condition (code);
15528 out = emit_store_flag (out, code, ix86_compare_op0,
15529 ix86_compare_op1, VOIDmode, 0, -1);
15531 out = expand_simple_binop (mode, IOR,
15532 out, GEN_INT (cf),
15533 out, 1, OPTAB_DIRECT);
15534 if (out != operands[0])
15535 emit_move_insn (operands[0], out);
15537 return 1; /* DONE */
15542 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15543 || diff == 3 || diff == 5 || diff == 9)
15544 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15545 && (mode != DImode
15546 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15549 * xorl dest,dest
15550 * cmpl op1,op2
15551 * setcc dest
15552 * lea cf(dest*(ct-cf)),dest
15554 * Size 14.
15556 * This also catches the degenerate setcc-only case.
15559 rtx tmp;
15560 int nops;
15562 out = emit_store_flag (out, code, ix86_compare_op0,
15563 ix86_compare_op1, VOIDmode, 0, 1);
15565 nops = 0;
15566 /* On x86_64 the lea instruction operates on Pmode, so we need
15567 to get arithmetics done in proper mode to match. */
15568 if (diff == 1)
15569 tmp = copy_rtx (out);
15570 else
15572 rtx out1;
15573 out1 = copy_rtx (out);
15574 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15575 nops++;
15576 if (diff & 1)
15578 tmp = gen_rtx_PLUS (mode, tmp, out1);
15579 nops++;
15582 if (cf != 0)
15584 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15585 nops++;
15587 if (!rtx_equal_p (tmp, out))
15589 if (nops == 1)
15590 out = force_operand (tmp, copy_rtx (out));
15591 else
15592 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15594 if (!rtx_equal_p (out, operands[0]))
15595 emit_move_insn (operands[0], copy_rtx (out));
15597 return 1; /* DONE */
15601 * General case: Jumpful:
15602 * xorl dest,dest cmpl op1, op2
15603 * cmpl op1, op2 movl ct, dest
15604 * setcc dest jcc 1f
15605 * decl dest movl cf, dest
15606 * andl (cf-ct),dest 1:
15607 * addl ct,dest
15609 * Size 20. Size 14.
15611 * This is reasonably steep, but branch mispredict costs are
15612 * high on modern cpus, so consider failing only if optimizing
15613 * for space.
15616 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15617 && BRANCH_COST (optimize_insn_for_speed_p (),
15618 false) >= 2)
15620 if (cf == 0)
15622 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15624 cf = ct;
15625 ct = 0;
15627 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15629 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15631 /* We may be reversing unordered compare to normal compare,
15632 that is not valid in general (we may convert non-trapping
15633 condition to trapping one), however on i386 we currently
15634 emit all comparisons unordered. */
15635 code = reverse_condition_maybe_unordered (code);
15637 else
15639 code = reverse_condition (code);
15640 if (compare_code != UNKNOWN)
15641 compare_code = reverse_condition (compare_code);
15645 if (compare_code != UNKNOWN)
15647 /* notl op1 (if needed)
15648 sarl $31, op1
15649 andl (cf-ct), op1
15650 addl ct, op1
15652 For x < 0 (resp. x <= -1) there will be no notl,
15653 so if possible swap the constants to get rid of the
15654 complement.
15655 True/false will be -1/0 while code below (store flag
15656 followed by decrement) is 0/-1, so the constants need
15657 to be exchanged once more. */
15659 if (compare_code == GE || !cf)
15661 code = reverse_condition (code);
15662 compare_code = LT;
15664 else
15666 HOST_WIDE_INT tmp = cf;
15667 cf = ct;
15668 ct = tmp;
15671 out = emit_store_flag (out, code, ix86_compare_op0,
15672 ix86_compare_op1, VOIDmode, 0, -1);
15674 else
15676 out = emit_store_flag (out, code, ix86_compare_op0,
15677 ix86_compare_op1, VOIDmode, 0, 1);
15679 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15680 copy_rtx (out), 1, OPTAB_DIRECT);
15683 out = expand_simple_binop (mode, AND, copy_rtx (out),
15684 gen_int_mode (cf - ct, mode),
15685 copy_rtx (out), 1, OPTAB_DIRECT);
15686 if (ct)
15687 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15688 copy_rtx (out), 1, OPTAB_DIRECT);
15689 if (!rtx_equal_p (out, operands[0]))
15690 emit_move_insn (operands[0], copy_rtx (out));
15692 return 1; /* DONE */
15696 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15698 /* Try a few things more with specific constants and a variable. */
15700 optab op;
15701 rtx var, orig_out, out, tmp;
15703 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15704 return 0; /* FAIL */
15706 /* If one of the two operands is an interesting constant, load a
15707 constant with the above and mask it in with a logical operation. */
15709 if (CONST_INT_P (operands[2]))
15711 var = operands[3];
15712 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15713 operands[3] = constm1_rtx, op = and_optab;
15714 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15715 operands[3] = const0_rtx, op = ior_optab;
15716 else
15717 return 0; /* FAIL */
15719 else if (CONST_INT_P (operands[3]))
15721 var = operands[2];
15722 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15723 operands[2] = constm1_rtx, op = and_optab;
15724 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15725 operands[2] = const0_rtx, op = ior_optab;
15726 else
15727 return 0; /* FAIL */
15729 else
15730 return 0; /* FAIL */
15732 orig_out = operands[0];
15733 tmp = gen_reg_rtx (mode);
15734 operands[0] = tmp;
15736 /* Recurse to get the constant loaded. */
15737 if (ix86_expand_int_movcc (operands) == 0)
15738 return 0; /* FAIL */
15740 /* Mask in the interesting variable. */
15741 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15742 OPTAB_WIDEN);
15743 if (!rtx_equal_p (out, orig_out))
15744 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15746 return 1; /* DONE */
15750 * For comparison with above,
15752 * movl cf,dest
15753 * movl ct,tmp
15754 * cmpl op1,op2
15755 * cmovcc tmp,dest
15757 * Size 15.
15760 if (! nonimmediate_operand (operands[2], mode))
15761 operands[2] = force_reg (mode, operands[2]);
15762 if (! nonimmediate_operand (operands[3], mode))
15763 operands[3] = force_reg (mode, operands[3]);
15765 if (! register_operand (operands[2], VOIDmode)
15766 && (mode == QImode
15767 || ! register_operand (operands[3], VOIDmode)))
15768 operands[2] = force_reg (mode, operands[2]);
15770 if (mode == QImode
15771 && ! register_operand (operands[3], VOIDmode))
15772 operands[3] = force_reg (mode, operands[3]);
15774 emit_insn (compare_seq);
15775 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15776 gen_rtx_IF_THEN_ELSE (mode,
15777 compare_op, operands[2],
15778 operands[3])));
15780 return 1; /* DONE */
15783 /* Swap, force into registers, or otherwise massage the two operands
15784 to an sse comparison with a mask result. Thus we differ a bit from
15785 ix86_prepare_fp_compare_args which expects to produce a flags result.
15787 The DEST operand exists to help determine whether to commute commutative
15788 operators. The POP0/POP1 operands are updated in place. The new
15789 comparison code is returned, or UNKNOWN if not implementable. */
15791 static enum rtx_code
15792 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15793 rtx *pop0, rtx *pop1)
15795 rtx tmp;
15797 switch (code)
15799 case LTGT:
15800 case UNEQ:
15801 /* We have no LTGT as an operator. We could implement it with
15802 NE & ORDERED, but this requires an extra temporary. It's
15803 not clear that it's worth it. */
15804 return UNKNOWN;
15806 case LT:
15807 case LE:
15808 case UNGT:
15809 case UNGE:
15810 /* These are supported directly. */
15811 break;
15813 case EQ:
15814 case NE:
15815 case UNORDERED:
15816 case ORDERED:
15817 /* For commutative operators, try to canonicalize the destination
15818 operand to be first in the comparison - this helps reload to
15819 avoid extra moves. */
15820 if (!dest || !rtx_equal_p (dest, *pop1))
15821 break;
15822 /* FALLTHRU */
15824 case GE:
15825 case GT:
15826 case UNLE:
15827 case UNLT:
15828 /* These are not supported directly. Swap the comparison operands
15829 to transform into something that is supported. */
15830 tmp = *pop0;
15831 *pop0 = *pop1;
15832 *pop1 = tmp;
15833 code = swap_condition (code);
15834 break;
15836 default:
15837 gcc_unreachable ();
15840 return code;
15843 /* Detect conditional moves that exactly match min/max operational
15844 semantics. Note that this is IEEE safe, as long as we don't
15845 interchange the operands.
15847 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15848 and TRUE if the operation is successful and instructions are emitted. */
15850 static bool
15851 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15852 rtx cmp_op1, rtx if_true, rtx if_false)
15854 enum machine_mode mode;
15855 bool is_min;
15856 rtx tmp;
15858 if (code == LT)
15860 else if (code == UNGE)
15862 tmp = if_true;
15863 if_true = if_false;
15864 if_false = tmp;
15866 else
15867 return false;
15869 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15870 is_min = true;
15871 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15872 is_min = false;
15873 else
15874 return false;
15876 mode = GET_MODE (dest);
15878 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15879 but MODE may be a vector mode and thus not appropriate. */
15880 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15882 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15883 rtvec v;
15885 if_true = force_reg (mode, if_true);
15886 v = gen_rtvec (2, if_true, if_false);
15887 tmp = gen_rtx_UNSPEC (mode, v, u);
15889 else
15891 code = is_min ? SMIN : SMAX;
15892 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15895 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15896 return true;
15899 /* Expand an sse vector comparison. Return the register with the result. */
15901 static rtx
15902 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15903 rtx op_true, rtx op_false)
15905 enum machine_mode mode = GET_MODE (dest);
15906 rtx x;
15908 cmp_op0 = force_reg (mode, cmp_op0);
15909 if (!nonimmediate_operand (cmp_op1, mode))
15910 cmp_op1 = force_reg (mode, cmp_op1);
15912 if (optimize
15913 || reg_overlap_mentioned_p (dest, op_true)
15914 || reg_overlap_mentioned_p (dest, op_false))
15915 dest = gen_reg_rtx (mode);
15917 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15918 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15920 return dest;
15923 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15924 operations. This is used for both scalar and vector conditional moves. */
15926 static void
15927 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15929 enum machine_mode mode = GET_MODE (dest);
15930 rtx t2, t3, x;
15932 if (op_false == CONST0_RTX (mode))
15934 op_true = force_reg (mode, op_true);
15935 x = gen_rtx_AND (mode, cmp, op_true);
15936 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15938 else if (op_true == CONST0_RTX (mode))
15940 op_false = force_reg (mode, op_false);
15941 x = gen_rtx_NOT (mode, cmp);
15942 x = gen_rtx_AND (mode, x, op_false);
15943 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15945 else if (TARGET_SSE5)
15947 rtx pcmov = gen_rtx_SET (mode, dest,
15948 gen_rtx_IF_THEN_ELSE (mode, cmp,
15949 op_true,
15950 op_false));
15951 emit_insn (pcmov);
15953 else
15955 op_true = force_reg (mode, op_true);
15956 op_false = force_reg (mode, op_false);
15958 t2 = gen_reg_rtx (mode);
15959 if (optimize)
15960 t3 = gen_reg_rtx (mode);
15961 else
15962 t3 = dest;
15964 x = gen_rtx_AND (mode, op_true, cmp);
15965 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15967 x = gen_rtx_NOT (mode, cmp);
15968 x = gen_rtx_AND (mode, x, op_false);
15969 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15971 x = gen_rtx_IOR (mode, t3, t2);
15972 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15976 /* Expand a floating-point conditional move. Return true if successful. */
15979 ix86_expand_fp_movcc (rtx operands[])
15981 enum machine_mode mode = GET_MODE (operands[0]);
15982 enum rtx_code code = GET_CODE (operands[1]);
15983 rtx tmp, compare_op;
15985 ix86_compare_op0 = XEXP (operands[1], 0);
15986 ix86_compare_op1 = XEXP (operands[1], 1);
15987 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15989 enum machine_mode cmode;
15991 /* Since we've no cmove for sse registers, don't force bad register
15992 allocation just to gain access to it. Deny movcc when the
15993 comparison mode doesn't match the move mode. */
15994 cmode = GET_MODE (ix86_compare_op0);
15995 if (cmode == VOIDmode)
15996 cmode = GET_MODE (ix86_compare_op1);
15997 if (cmode != mode)
15998 return 0;
16000 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16001 &ix86_compare_op0,
16002 &ix86_compare_op1);
16003 if (code == UNKNOWN)
16004 return 0;
16006 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16007 ix86_compare_op1, operands[2],
16008 operands[3]))
16009 return 1;
16011 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16012 ix86_compare_op1, operands[2], operands[3]);
16013 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16014 return 1;
16017 /* The floating point conditional move instructions don't directly
16018 support conditions resulting from a signed integer comparison. */
16020 compare_op = ix86_expand_compare (code);
16021 if (!fcmov_comparison_operator (compare_op, VOIDmode))
16023 tmp = gen_reg_rtx (QImode);
16024 ix86_expand_setcc (code, tmp);
16025 code = NE;
16026 ix86_compare_op0 = tmp;
16027 ix86_compare_op1 = const0_rtx;
16028 compare_op = ix86_expand_compare (code);
16031 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16032 gen_rtx_IF_THEN_ELSE (mode, compare_op,
16033 operands[2], operands[3])));
16035 return 1;
16038 /* Expand a floating-point vector conditional move; a vcond operation
16039 rather than a movcc operation. */
16041 bool
16042 ix86_expand_fp_vcond (rtx operands[])
16044 enum rtx_code code = GET_CODE (operands[3]);
16045 rtx cmp;
16047 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16048 &operands[4], &operands[5]);
16049 if (code == UNKNOWN)
16050 return false;
16052 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
16053 operands[5], operands[1], operands[2]))
16054 return true;
16056 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
16057 operands[1], operands[2]);
16058 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
16059 return true;
16062 /* Expand a signed/unsigned integral vector conditional move. */
16064 bool
16065 ix86_expand_int_vcond (rtx operands[])
16067 enum machine_mode mode = GET_MODE (operands[0]);
16068 enum rtx_code code = GET_CODE (operands[3]);
16069 bool negate = false;
16070 rtx x, cop0, cop1;
16072 cop0 = operands[4];
16073 cop1 = operands[5];
16075 /* SSE5 supports all of the comparisons on all vector int types. */
16076 if (!TARGET_SSE5)
16078 /* Canonicalize the comparison to EQ, GT, GTU. */
16079 switch (code)
16081 case EQ:
16082 case GT:
16083 case GTU:
16084 break;
16086 case NE:
16087 case LE:
16088 case LEU:
16089 code = reverse_condition (code);
16090 negate = true;
16091 break;
16093 case GE:
16094 case GEU:
16095 code = reverse_condition (code);
16096 negate = true;
16097 /* FALLTHRU */
16099 case LT:
16100 case LTU:
16101 code = swap_condition (code);
16102 x = cop0, cop0 = cop1, cop1 = x;
16103 break;
16105 default:
16106 gcc_unreachable ();
16109 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16110 if (mode == V2DImode)
16112 switch (code)
16114 case EQ:
16115 /* SSE4.1 supports EQ. */
16116 if (!TARGET_SSE4_1)
16117 return false;
16118 break;
16120 case GT:
16121 case GTU:
16122 /* SSE4.2 supports GT/GTU. */
16123 if (!TARGET_SSE4_2)
16124 return false;
16125 break;
16127 default:
16128 gcc_unreachable ();
16132 /* Unsigned parallel compare is not supported by the hardware. Play some
16133 tricks to turn this into a signed comparison against 0. */
16134 if (code == GTU)
16136 cop0 = force_reg (mode, cop0);
16138 switch (mode)
16140 case V4SImode:
16141 case V2DImode:
16143 rtx t1, t2, mask;
16145 /* Perform a parallel modulo subtraction. */
16146 t1 = gen_reg_rtx (mode);
16147 emit_insn ((mode == V4SImode
16148 ? gen_subv4si3
16149 : gen_subv2di3) (t1, cop0, cop1));
16151 /* Extract the original sign bit of op0. */
16152 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16153 true, false);
16154 t2 = gen_reg_rtx (mode);
16155 emit_insn ((mode == V4SImode
16156 ? gen_andv4si3
16157 : gen_andv2di3) (t2, cop0, mask));
16159 /* XOR it back into the result of the subtraction. This results
16160 in the sign bit set iff we saw unsigned underflow. */
16161 x = gen_reg_rtx (mode);
16162 emit_insn ((mode == V4SImode
16163 ? gen_xorv4si3
16164 : gen_xorv2di3) (x, t1, t2));
16166 code = GT;
16168 break;
16170 case V16QImode:
16171 case V8HImode:
16172 /* Perform a parallel unsigned saturating subtraction. */
16173 x = gen_reg_rtx (mode);
16174 emit_insn (gen_rtx_SET (VOIDmode, x,
16175 gen_rtx_US_MINUS (mode, cop0, cop1)));
16177 code = EQ;
16178 negate = !negate;
16179 break;
16181 default:
16182 gcc_unreachable ();
16185 cop0 = x;
16186 cop1 = CONST0_RTX (mode);
16190 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16191 operands[1+negate], operands[2-negate]);
16193 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16194 operands[2-negate]);
16195 return true;
16198 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16199 true if we should do zero extension, else sign extension. HIGH_P is
16200 true if we want the N/2 high elements, else the low elements. */
16202 void
16203 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16205 enum machine_mode imode = GET_MODE (operands[1]);
16206 rtx (*unpack)(rtx, rtx, rtx);
16207 rtx se, dest;
16209 switch (imode)
16211 case V16QImode:
16212 if (high_p)
16213 unpack = gen_vec_interleave_highv16qi;
16214 else
16215 unpack = gen_vec_interleave_lowv16qi;
16216 break;
16217 case V8HImode:
16218 if (high_p)
16219 unpack = gen_vec_interleave_highv8hi;
16220 else
16221 unpack = gen_vec_interleave_lowv8hi;
16222 break;
16223 case V4SImode:
16224 if (high_p)
16225 unpack = gen_vec_interleave_highv4si;
16226 else
16227 unpack = gen_vec_interleave_lowv4si;
16228 break;
16229 default:
16230 gcc_unreachable ();
16233 dest = gen_lowpart (imode, operands[0]);
16235 if (unsigned_p)
16236 se = force_reg (imode, CONST0_RTX (imode));
16237 else
16238 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16239 operands[1], pc_rtx, pc_rtx);
16241 emit_insn (unpack (dest, operands[1], se));
16244 /* This function performs the same task as ix86_expand_sse_unpack,
16245 but with SSE4.1 instructions. */
16247 void
16248 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16250 enum machine_mode imode = GET_MODE (operands[1]);
16251 rtx (*unpack)(rtx, rtx);
16252 rtx src, dest;
16254 switch (imode)
16256 case V16QImode:
16257 if (unsigned_p)
16258 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16259 else
16260 unpack = gen_sse4_1_extendv8qiv8hi2;
16261 break;
16262 case V8HImode:
16263 if (unsigned_p)
16264 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16265 else
16266 unpack = gen_sse4_1_extendv4hiv4si2;
16267 break;
16268 case V4SImode:
16269 if (unsigned_p)
16270 unpack = gen_sse4_1_zero_extendv2siv2di2;
16271 else
16272 unpack = gen_sse4_1_extendv2siv2di2;
16273 break;
16274 default:
16275 gcc_unreachable ();
16278 dest = operands[0];
16279 if (high_p)
16281 /* Shift higher 8 bytes to lower 8 bytes. */
16282 src = gen_reg_rtx (imode);
16283 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16284 gen_lowpart (TImode, operands[1]),
16285 GEN_INT (64)));
16287 else
16288 src = operands[1];
16290 emit_insn (unpack (dest, src));
16293 /* This function performs the same task as ix86_expand_sse_unpack,
16294 but with sse5 instructions. */
16296 void
16297 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16299 enum machine_mode imode = GET_MODE (operands[1]);
16300 int pperm_bytes[16];
16301 int i;
16302 int h = (high_p) ? 8 : 0;
16303 int h2;
16304 int sign_extend;
16305 rtvec v = rtvec_alloc (16);
16306 rtvec vs;
16307 rtx x, p;
16308 rtx op0 = operands[0], op1 = operands[1];
16310 switch (imode)
16312 case V16QImode:
16313 vs = rtvec_alloc (8);
16314 h2 = (high_p) ? 8 : 0;
16315 for (i = 0; i < 8; i++)
16317 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16318 pperm_bytes[2*i+1] = ((unsigned_p)
16319 ? PPERM_ZERO
16320 : PPERM_SIGN | PPERM_SRC2 | i | h);
16323 for (i = 0; i < 16; i++)
16324 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16326 for (i = 0; i < 8; i++)
16327 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16329 p = gen_rtx_PARALLEL (VOIDmode, vs);
16330 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16331 if (unsigned_p)
16332 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16333 else
16334 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16335 break;
16337 case V8HImode:
16338 vs = rtvec_alloc (4);
16339 h2 = (high_p) ? 4 : 0;
16340 for (i = 0; i < 4; i++)
16342 sign_extend = ((unsigned_p)
16343 ? PPERM_ZERO
16344 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16345 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16346 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16347 pperm_bytes[4*i+2] = sign_extend;
16348 pperm_bytes[4*i+3] = sign_extend;
16351 for (i = 0; i < 16; i++)
16352 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16354 for (i = 0; i < 4; i++)
16355 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16357 p = gen_rtx_PARALLEL (VOIDmode, vs);
16358 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16359 if (unsigned_p)
16360 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16361 else
16362 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16363 break;
16365 case V4SImode:
16366 vs = rtvec_alloc (2);
16367 h2 = (high_p) ? 2 : 0;
16368 for (i = 0; i < 2; i++)
16370 sign_extend = ((unsigned_p)
16371 ? PPERM_ZERO
16372 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16373 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16374 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16375 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16376 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16377 pperm_bytes[8*i+4] = sign_extend;
16378 pperm_bytes[8*i+5] = sign_extend;
16379 pperm_bytes[8*i+6] = sign_extend;
16380 pperm_bytes[8*i+7] = sign_extend;
16383 for (i = 0; i < 16; i++)
16384 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16386 for (i = 0; i < 2; i++)
16387 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16389 p = gen_rtx_PARALLEL (VOIDmode, vs);
16390 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16391 if (unsigned_p)
16392 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16393 else
16394 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16395 break;
16397 default:
16398 gcc_unreachable ();
16401 return;
16404 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16405 next narrower integer vector type */
16406 void
16407 ix86_expand_sse5_pack (rtx operands[3])
16409 enum machine_mode imode = GET_MODE (operands[0]);
16410 int pperm_bytes[16];
16411 int i;
16412 rtvec v = rtvec_alloc (16);
16413 rtx x;
16414 rtx op0 = operands[0];
16415 rtx op1 = operands[1];
16416 rtx op2 = operands[2];
16418 switch (imode)
16420 case V16QImode:
16421 for (i = 0; i < 8; i++)
16423 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16424 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16427 for (i = 0; i < 16; i++)
16428 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16430 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16431 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16432 break;
16434 case V8HImode:
16435 for (i = 0; i < 4; i++)
16437 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16438 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16439 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16440 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16443 for (i = 0; i < 16; i++)
16444 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16446 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16447 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16448 break;
16450 case V4SImode:
16451 for (i = 0; i < 2; i++)
16453 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16454 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16455 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16456 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16457 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16458 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16459 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16460 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16463 for (i = 0; i < 16; i++)
16464 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16466 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16467 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16468 break;
16470 default:
16471 gcc_unreachable ();
16474 return;
16477 /* Expand conditional increment or decrement using adb/sbb instructions.
16478 The default case using setcc followed by the conditional move can be
16479 done by generic code. */
16481 ix86_expand_int_addcc (rtx operands[])
16483 enum rtx_code code = GET_CODE (operands[1]);
16484 rtx compare_op;
16485 rtx val = const0_rtx;
16486 bool fpcmp = false;
16487 enum machine_mode mode = GET_MODE (operands[0]);
16489 ix86_compare_op0 = XEXP (operands[1], 0);
16490 ix86_compare_op1 = XEXP (operands[1], 1);
16491 if (operands[3] != const1_rtx
16492 && operands[3] != constm1_rtx)
16493 return 0;
16494 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16495 ix86_compare_op1, &compare_op))
16496 return 0;
16497 code = GET_CODE (compare_op);
16499 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16500 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16502 fpcmp = true;
16503 code = ix86_fp_compare_code_to_integer (code);
16506 if (code != LTU)
16508 val = constm1_rtx;
16509 if (fpcmp)
16510 PUT_CODE (compare_op,
16511 reverse_condition_maybe_unordered
16512 (GET_CODE (compare_op)));
16513 else
16514 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16516 PUT_MODE (compare_op, mode);
16518 /* Construct either adc or sbb insn. */
16519 if ((code == LTU) == (operands[3] == constm1_rtx))
16521 switch (GET_MODE (operands[0]))
16523 case QImode:
16524 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16525 break;
16526 case HImode:
16527 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16528 break;
16529 case SImode:
16530 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16531 break;
16532 case DImode:
16533 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16534 break;
16535 default:
16536 gcc_unreachable ();
16539 else
16541 switch (GET_MODE (operands[0]))
16543 case QImode:
16544 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16545 break;
16546 case HImode:
16547 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16548 break;
16549 case SImode:
16550 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16551 break;
16552 case DImode:
16553 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16554 break;
16555 default:
16556 gcc_unreachable ();
16559 return 1; /* DONE */
16563 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16564 works for floating pointer parameters and nonoffsetable memories.
16565 For pushes, it returns just stack offsets; the values will be saved
16566 in the right order. Maximally three parts are generated. */
16568 static int
16569 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16571 int size;
16573 if (!TARGET_64BIT)
16574 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16575 else
16576 size = (GET_MODE_SIZE (mode) + 4) / 8;
16578 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16579 gcc_assert (size >= 2 && size <= 4);
16581 /* Optimize constant pool reference to immediates. This is used by fp
16582 moves, that force all constants to memory to allow combining. */
16583 if (MEM_P (operand) && MEM_READONLY_P (operand))
16585 rtx tmp = maybe_get_pool_constant (operand);
16586 if (tmp)
16587 operand = tmp;
16590 if (MEM_P (operand) && !offsettable_memref_p (operand))
16592 /* The only non-offsetable memories we handle are pushes. */
16593 int ok = push_operand (operand, VOIDmode);
16595 gcc_assert (ok);
16597 operand = copy_rtx (operand);
16598 PUT_MODE (operand, Pmode);
16599 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16600 return size;
16603 if (GET_CODE (operand) == CONST_VECTOR)
16605 enum machine_mode imode = int_mode_for_mode (mode);
16606 /* Caution: if we looked through a constant pool memory above,
16607 the operand may actually have a different mode now. That's
16608 ok, since we want to pun this all the way back to an integer. */
16609 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16610 gcc_assert (operand != NULL);
16611 mode = imode;
16614 if (!TARGET_64BIT)
16616 if (mode == DImode)
16617 split_di (&operand, 1, &parts[0], &parts[1]);
16618 else
16620 int i;
16622 if (REG_P (operand))
16624 gcc_assert (reload_completed);
16625 for (i = 0; i < size; i++)
16626 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16628 else if (offsettable_memref_p (operand))
16630 operand = adjust_address (operand, SImode, 0);
16631 parts[0] = operand;
16632 for (i = 1; i < size; i++)
16633 parts[i] = adjust_address (operand, SImode, 4 * i);
16635 else if (GET_CODE (operand) == CONST_DOUBLE)
16637 REAL_VALUE_TYPE r;
16638 long l[4];
16640 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16641 switch (mode)
16643 case TFmode:
16644 real_to_target (l, &r, mode);
16645 parts[3] = gen_int_mode (l[3], SImode);
16646 parts[2] = gen_int_mode (l[2], SImode);
16647 break;
16648 case XFmode:
16649 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16650 parts[2] = gen_int_mode (l[2], SImode);
16651 break;
16652 case DFmode:
16653 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16654 break;
16655 default:
16656 gcc_unreachable ();
16658 parts[1] = gen_int_mode (l[1], SImode);
16659 parts[0] = gen_int_mode (l[0], SImode);
16661 else
16662 gcc_unreachable ();
16665 else
16667 if (mode == TImode)
16668 split_ti (&operand, 1, &parts[0], &parts[1]);
16669 if (mode == XFmode || mode == TFmode)
16671 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16672 if (REG_P (operand))
16674 gcc_assert (reload_completed);
16675 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16676 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16678 else if (offsettable_memref_p (operand))
16680 operand = adjust_address (operand, DImode, 0);
16681 parts[0] = operand;
16682 parts[1] = adjust_address (operand, upper_mode, 8);
16684 else if (GET_CODE (operand) == CONST_DOUBLE)
16686 REAL_VALUE_TYPE r;
16687 long l[4];
16689 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16690 real_to_target (l, &r, mode);
16692 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16693 if (HOST_BITS_PER_WIDE_INT >= 64)
16694 parts[0]
16695 = gen_int_mode
16696 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16697 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16698 DImode);
16699 else
16700 parts[0] = immed_double_const (l[0], l[1], DImode);
16702 if (upper_mode == SImode)
16703 parts[1] = gen_int_mode (l[2], SImode);
16704 else if (HOST_BITS_PER_WIDE_INT >= 64)
16705 parts[1]
16706 = gen_int_mode
16707 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16708 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16709 DImode);
16710 else
16711 parts[1] = immed_double_const (l[2], l[3], DImode);
16713 else
16714 gcc_unreachable ();
16718 return size;
16721 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16722 Return false when normal moves are needed; true when all required
16723 insns have been emitted. Operands 2-4 contain the input values
16724 int the correct order; operands 5-7 contain the output values. */
16726 void
16727 ix86_split_long_move (rtx operands[])
16729 rtx part[2][4];
16730 int nparts, i, j;
16731 int push = 0;
16732 int collisions = 0;
16733 enum machine_mode mode = GET_MODE (operands[0]);
16734 bool collisionparts[4];
16736 /* The DFmode expanders may ask us to move double.
16737 For 64bit target this is single move. By hiding the fact
16738 here we simplify i386.md splitters. */
16739 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16741 /* Optimize constant pool reference to immediates. This is used by
16742 fp moves, that force all constants to memory to allow combining. */
16744 if (MEM_P (operands[1])
16745 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16746 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16747 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16748 if (push_operand (operands[0], VOIDmode))
16750 operands[0] = copy_rtx (operands[0]);
16751 PUT_MODE (operands[0], Pmode);
16753 else
16754 operands[0] = gen_lowpart (DImode, operands[0]);
16755 operands[1] = gen_lowpart (DImode, operands[1]);
16756 emit_move_insn (operands[0], operands[1]);
16757 return;
16760 /* The only non-offsettable memory we handle is push. */
16761 if (push_operand (operands[0], VOIDmode))
16762 push = 1;
16763 else
16764 gcc_assert (!MEM_P (operands[0])
16765 || offsettable_memref_p (operands[0]));
16767 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16768 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16770 /* When emitting push, take care for source operands on the stack. */
16771 if (push && MEM_P (operands[1])
16772 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16773 for (i = 0; i < nparts - 1; i++)
16774 part[1][i] = change_address (part[1][i],
16775 GET_MODE (part[1][i]),
16776 XEXP (part[1][i + 1], 0));
16778 /* We need to do copy in the right order in case an address register
16779 of the source overlaps the destination. */
16780 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16782 rtx tmp;
16784 for (i = 0; i < nparts; i++)
16786 collisionparts[i]
16787 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16788 if (collisionparts[i])
16789 collisions++;
16792 /* Collision in the middle part can be handled by reordering. */
16793 if (collisions == 1 && nparts == 3 && collisionparts [1])
16795 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16796 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16798 else if (collisions == 1
16799 && nparts == 4
16800 && (collisionparts [1] || collisionparts [2]))
16802 if (collisionparts [1])
16804 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16805 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16807 else
16809 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16810 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16814 /* If there are more collisions, we can't handle it by reordering.
16815 Do an lea to the last part and use only one colliding move. */
16816 else if (collisions > 1)
16818 rtx base;
16820 collisions = 1;
16822 base = part[0][nparts - 1];
16824 /* Handle the case when the last part isn't valid for lea.
16825 Happens in 64-bit mode storing the 12-byte XFmode. */
16826 if (GET_MODE (base) != Pmode)
16827 base = gen_rtx_REG (Pmode, REGNO (base));
16829 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16830 part[1][0] = replace_equiv_address (part[1][0], base);
16831 for (i = 1; i < nparts; i++)
16833 tmp = plus_constant (base, UNITS_PER_WORD * i);
16834 part[1][i] = replace_equiv_address (part[1][i], tmp);
16839 if (push)
16841 if (!TARGET_64BIT)
16843 if (nparts == 3)
16845 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16846 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16847 emit_move_insn (part[0][2], part[1][2]);
16849 else if (nparts == 4)
16851 emit_move_insn (part[0][3], part[1][3]);
16852 emit_move_insn (part[0][2], part[1][2]);
16855 else
16857 /* In 64bit mode we don't have 32bit push available. In case this is
16858 register, it is OK - we will just use larger counterpart. We also
16859 retype memory - these comes from attempt to avoid REX prefix on
16860 moving of second half of TFmode value. */
16861 if (GET_MODE (part[1][1]) == SImode)
16863 switch (GET_CODE (part[1][1]))
16865 case MEM:
16866 part[1][1] = adjust_address (part[1][1], DImode, 0);
16867 break;
16869 case REG:
16870 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16871 break;
16873 default:
16874 gcc_unreachable ();
16877 if (GET_MODE (part[1][0]) == SImode)
16878 part[1][0] = part[1][1];
16881 emit_move_insn (part[0][1], part[1][1]);
16882 emit_move_insn (part[0][0], part[1][0]);
16883 return;
16886 /* Choose correct order to not overwrite the source before it is copied. */
16887 if ((REG_P (part[0][0])
16888 && REG_P (part[1][1])
16889 && (REGNO (part[0][0]) == REGNO (part[1][1])
16890 || (nparts == 3
16891 && REGNO (part[0][0]) == REGNO (part[1][2]))
16892 || (nparts == 4
16893 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16894 || (collisions > 0
16895 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16897 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16899 operands[2 + i] = part[0][j];
16900 operands[6 + i] = part[1][j];
16903 else
16905 for (i = 0; i < nparts; i++)
16907 operands[2 + i] = part[0][i];
16908 operands[6 + i] = part[1][i];
16912 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16913 if (optimize_insn_for_size_p ())
16915 for (j = 0; j < nparts - 1; j++)
16916 if (CONST_INT_P (operands[6 + j])
16917 && operands[6 + j] != const0_rtx
16918 && REG_P (operands[2 + j]))
16919 for (i = j; i < nparts - 1; i++)
16920 if (CONST_INT_P (operands[7 + i])
16921 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16922 operands[7 + i] = operands[2 + j];
16925 for (i = 0; i < nparts; i++)
16926 emit_move_insn (operands[2 + i], operands[6 + i]);
16928 return;
16931 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16932 left shift by a constant, either using a single shift or
16933 a sequence of add instructions. */
16935 static void
16936 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16938 if (count == 1)
16940 emit_insn ((mode == DImode
16941 ? gen_addsi3
16942 : gen_adddi3) (operand, operand, operand));
16944 else if (!optimize_insn_for_size_p ()
16945 && count * ix86_cost->add <= ix86_cost->shift_const)
16947 int i;
16948 for (i=0; i<count; i++)
16950 emit_insn ((mode == DImode
16951 ? gen_addsi3
16952 : gen_adddi3) (operand, operand, operand));
16955 else
16956 emit_insn ((mode == DImode
16957 ? gen_ashlsi3
16958 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16961 void
16962 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16964 rtx low[2], high[2];
16965 int count;
16966 const int single_width = mode == DImode ? 32 : 64;
16968 if (CONST_INT_P (operands[2]))
16970 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16971 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16973 if (count >= single_width)
16975 emit_move_insn (high[0], low[1]);
16976 emit_move_insn (low[0], const0_rtx);
16978 if (count > single_width)
16979 ix86_expand_ashl_const (high[0], count - single_width, mode);
16981 else
16983 if (!rtx_equal_p (operands[0], operands[1]))
16984 emit_move_insn (operands[0], operands[1]);
16985 emit_insn ((mode == DImode
16986 ? gen_x86_shld
16987 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16988 ix86_expand_ashl_const (low[0], count, mode);
16990 return;
16993 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16995 if (operands[1] == const1_rtx)
16997 /* Assuming we've chosen a QImode capable registers, then 1 << N
16998 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16999 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17001 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17003 ix86_expand_clear (low[0]);
17004 ix86_expand_clear (high[0]);
17005 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17007 d = gen_lowpart (QImode, low[0]);
17008 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17009 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17010 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17012 d = gen_lowpart (QImode, high[0]);
17013 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17014 s = gen_rtx_NE (QImode, flags, const0_rtx);
17015 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17018 /* Otherwise, we can get the same results by manually performing
17019 a bit extract operation on bit 5/6, and then performing the two
17020 shifts. The two methods of getting 0/1 into low/high are exactly
17021 the same size. Avoiding the shift in the bit extract case helps
17022 pentium4 a bit; no one else seems to care much either way. */
17023 else
17025 rtx x;
17027 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17028 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17029 else
17030 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17031 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17033 emit_insn ((mode == DImode
17034 ? gen_lshrsi3
17035 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
17036 emit_insn ((mode == DImode
17037 ? gen_andsi3
17038 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
17039 emit_move_insn (low[0], high[0]);
17040 emit_insn ((mode == DImode
17041 ? gen_xorsi3
17042 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
17045 emit_insn ((mode == DImode
17046 ? gen_ashlsi3
17047 : gen_ashldi3) (low[0], low[0], operands[2]));
17048 emit_insn ((mode == DImode
17049 ? gen_ashlsi3
17050 : gen_ashldi3) (high[0], high[0], operands[2]));
17051 return;
17054 if (operands[1] == constm1_rtx)
17056 /* For -1 << N, we can avoid the shld instruction, because we
17057 know that we're shifting 0...31/63 ones into a -1. */
17058 emit_move_insn (low[0], constm1_rtx);
17059 if (optimize_insn_for_size_p ())
17060 emit_move_insn (high[0], low[0]);
17061 else
17062 emit_move_insn (high[0], constm1_rtx);
17064 else
17066 if (!rtx_equal_p (operands[0], operands[1]))
17067 emit_move_insn (operands[0], operands[1]);
17069 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17070 emit_insn ((mode == DImode
17071 ? gen_x86_shld
17072 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17075 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
17077 if (TARGET_CMOVE && scratch)
17079 ix86_expand_clear (scratch);
17080 emit_insn ((mode == DImode
17081 ? gen_x86_shift_adj_1
17082 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17083 scratch));
17085 else
17086 emit_insn ((mode == DImode
17087 ? gen_x86_shift_adj_2
17088 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17091 void
17092 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17094 rtx low[2], high[2];
17095 int count;
17096 const int single_width = mode == DImode ? 32 : 64;
17098 if (CONST_INT_P (operands[2]))
17100 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17101 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17103 if (count == single_width * 2 - 1)
17105 emit_move_insn (high[0], high[1]);
17106 emit_insn ((mode == DImode
17107 ? gen_ashrsi3
17108 : gen_ashrdi3) (high[0], high[0],
17109 GEN_INT (single_width - 1)));
17110 emit_move_insn (low[0], high[0]);
17113 else if (count >= single_width)
17115 emit_move_insn (low[0], high[1]);
17116 emit_move_insn (high[0], low[0]);
17117 emit_insn ((mode == DImode
17118 ? gen_ashrsi3
17119 : gen_ashrdi3) (high[0], high[0],
17120 GEN_INT (single_width - 1)));
17121 if (count > single_width)
17122 emit_insn ((mode == DImode
17123 ? gen_ashrsi3
17124 : gen_ashrdi3) (low[0], low[0],
17125 GEN_INT (count - single_width)));
17127 else
17129 if (!rtx_equal_p (operands[0], operands[1]))
17130 emit_move_insn (operands[0], operands[1]);
17131 emit_insn ((mode == DImode
17132 ? gen_x86_shrd
17133 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17134 emit_insn ((mode == DImode
17135 ? gen_ashrsi3
17136 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17139 else
17141 if (!rtx_equal_p (operands[0], operands[1]))
17142 emit_move_insn (operands[0], operands[1]);
17144 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17146 emit_insn ((mode == DImode
17147 ? gen_x86_shrd
17148 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17149 emit_insn ((mode == DImode
17150 ? gen_ashrsi3
17151 : gen_ashrdi3) (high[0], high[0], operands[2]));
17153 if (TARGET_CMOVE && scratch)
17155 emit_move_insn (scratch, high[0]);
17156 emit_insn ((mode == DImode
17157 ? gen_ashrsi3
17158 : gen_ashrdi3) (scratch, scratch,
17159 GEN_INT (single_width - 1)));
17160 emit_insn ((mode == DImode
17161 ? gen_x86_shift_adj_1
17162 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17163 scratch));
17165 else
17166 emit_insn ((mode == DImode
17167 ? gen_x86_shift_adj_3
17168 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17172 void
17173 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17175 rtx low[2], high[2];
17176 int count;
17177 const int single_width = mode == DImode ? 32 : 64;
17179 if (CONST_INT_P (operands[2]))
17181 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17182 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17184 if (count >= single_width)
17186 emit_move_insn (low[0], high[1]);
17187 ix86_expand_clear (high[0]);
17189 if (count > single_width)
17190 emit_insn ((mode == DImode
17191 ? gen_lshrsi3
17192 : gen_lshrdi3) (low[0], low[0],
17193 GEN_INT (count - single_width)));
17195 else
17197 if (!rtx_equal_p (operands[0], operands[1]))
17198 emit_move_insn (operands[0], operands[1]);
17199 emit_insn ((mode == DImode
17200 ? gen_x86_shrd
17201 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17202 emit_insn ((mode == DImode
17203 ? gen_lshrsi3
17204 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17207 else
17209 if (!rtx_equal_p (operands[0], operands[1]))
17210 emit_move_insn (operands[0], operands[1]);
17212 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17214 emit_insn ((mode == DImode
17215 ? gen_x86_shrd
17216 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17217 emit_insn ((mode == DImode
17218 ? gen_lshrsi3
17219 : gen_lshrdi3) (high[0], high[0], operands[2]));
17221 /* Heh. By reversing the arguments, we can reuse this pattern. */
17222 if (TARGET_CMOVE && scratch)
17224 ix86_expand_clear (scratch);
17225 emit_insn ((mode == DImode
17226 ? gen_x86_shift_adj_1
17227 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17228 scratch));
17230 else
17231 emit_insn ((mode == DImode
17232 ? gen_x86_shift_adj_2
17233 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17237 /* Predict just emitted jump instruction to be taken with probability PROB. */
17238 static void
17239 predict_jump (int prob)
17241 rtx insn = get_last_insn ();
17242 gcc_assert (JUMP_P (insn));
17243 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17246 /* Helper function for the string operations below. Dest VARIABLE whether
17247 it is aligned to VALUE bytes. If true, jump to the label. */
17248 static rtx
17249 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17251 rtx label = gen_label_rtx ();
17252 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17253 if (GET_MODE (variable) == DImode)
17254 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17255 else
17256 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17257 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17258 1, label);
17259 if (epilogue)
17260 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17261 else
17262 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17263 return label;
17266 /* Adjust COUNTER by the VALUE. */
17267 static void
17268 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17270 if (GET_MODE (countreg) == DImode)
17271 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17272 else
17273 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17276 /* Zero extend possibly SImode EXP to Pmode register. */
17278 ix86_zero_extend_to_Pmode (rtx exp)
17280 rtx r;
17281 if (GET_MODE (exp) == VOIDmode)
17282 return force_reg (Pmode, exp);
17283 if (GET_MODE (exp) == Pmode)
17284 return copy_to_mode_reg (Pmode, exp);
17285 r = gen_reg_rtx (Pmode);
17286 emit_insn (gen_zero_extendsidi2 (r, exp));
17287 return r;
17290 /* Divide COUNTREG by SCALE. */
17291 static rtx
17292 scale_counter (rtx countreg, int scale)
17294 rtx sc;
17295 rtx piece_size_mask;
17297 if (scale == 1)
17298 return countreg;
17299 if (CONST_INT_P (countreg))
17300 return GEN_INT (INTVAL (countreg) / scale);
17301 gcc_assert (REG_P (countreg));
17303 piece_size_mask = GEN_INT (scale - 1);
17304 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17305 GEN_INT (exact_log2 (scale)),
17306 NULL, 1, OPTAB_DIRECT);
17307 return sc;
17310 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17311 DImode for constant loop counts. */
17313 static enum machine_mode
17314 counter_mode (rtx count_exp)
17316 if (GET_MODE (count_exp) != VOIDmode)
17317 return GET_MODE (count_exp);
17318 if (!CONST_INT_P (count_exp))
17319 return Pmode;
17320 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17321 return DImode;
17322 return SImode;
17325 /* When SRCPTR is non-NULL, output simple loop to move memory
17326 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17327 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17328 equivalent loop to set memory by VALUE (supposed to be in MODE).
17330 The size is rounded down to whole number of chunk size moved at once.
17331 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17334 static void
17335 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17336 rtx destptr, rtx srcptr, rtx value,
17337 rtx count, enum machine_mode mode, int unroll,
17338 int expected_size)
17340 rtx out_label, top_label, iter, tmp;
17341 enum machine_mode iter_mode = counter_mode (count);
17342 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17343 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17344 rtx size;
17345 rtx x_addr;
17346 rtx y_addr;
17347 int i;
17349 top_label = gen_label_rtx ();
17350 out_label = gen_label_rtx ();
17351 iter = gen_reg_rtx (iter_mode);
17353 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17354 NULL, 1, OPTAB_DIRECT);
17355 /* Those two should combine. */
17356 if (piece_size == const1_rtx)
17358 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17359 true, out_label);
17360 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17362 emit_move_insn (iter, const0_rtx);
17364 emit_label (top_label);
17366 tmp = convert_modes (Pmode, iter_mode, iter, true);
17367 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17368 destmem = change_address (destmem, mode, x_addr);
17370 if (srcmem)
17372 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17373 srcmem = change_address (srcmem, mode, y_addr);
17375 /* When unrolling for chips that reorder memory reads and writes,
17376 we can save registers by using single temporary.
17377 Also using 4 temporaries is overkill in 32bit mode. */
17378 if (!TARGET_64BIT && 0)
17380 for (i = 0; i < unroll; i++)
17382 if (i)
17384 destmem =
17385 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17386 srcmem =
17387 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17389 emit_move_insn (destmem, srcmem);
17392 else
17394 rtx tmpreg[4];
17395 gcc_assert (unroll <= 4);
17396 for (i = 0; i < unroll; i++)
17398 tmpreg[i] = gen_reg_rtx (mode);
17399 if (i)
17401 srcmem =
17402 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17404 emit_move_insn (tmpreg[i], srcmem);
17406 for (i = 0; i < unroll; i++)
17408 if (i)
17410 destmem =
17411 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17413 emit_move_insn (destmem, tmpreg[i]);
17417 else
17418 for (i = 0; i < unroll; i++)
17420 if (i)
17421 destmem =
17422 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17423 emit_move_insn (destmem, value);
17426 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17427 true, OPTAB_LIB_WIDEN);
17428 if (tmp != iter)
17429 emit_move_insn (iter, tmp);
17431 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17432 true, top_label);
17433 if (expected_size != -1)
17435 expected_size /= GET_MODE_SIZE (mode) * unroll;
17436 if (expected_size == 0)
17437 predict_jump (0);
17438 else if (expected_size > REG_BR_PROB_BASE)
17439 predict_jump (REG_BR_PROB_BASE - 1);
17440 else
17441 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17443 else
17444 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17445 iter = ix86_zero_extend_to_Pmode (iter);
17446 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17447 true, OPTAB_LIB_WIDEN);
17448 if (tmp != destptr)
17449 emit_move_insn (destptr, tmp);
17450 if (srcptr)
17452 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17453 true, OPTAB_LIB_WIDEN);
17454 if (tmp != srcptr)
17455 emit_move_insn (srcptr, tmp);
17457 emit_label (out_label);
17460 /* Output "rep; mov" instruction.
17461 Arguments have same meaning as for previous function */
17462 static void
17463 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17464 rtx destptr, rtx srcptr,
17465 rtx count,
17466 enum machine_mode mode)
17468 rtx destexp;
17469 rtx srcexp;
17470 rtx countreg;
17472 /* If the size is known, it is shorter to use rep movs. */
17473 if (mode == QImode && CONST_INT_P (count)
17474 && !(INTVAL (count) & 3))
17475 mode = SImode;
17477 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17478 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17479 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17480 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17481 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17482 if (mode != QImode)
17484 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17485 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17486 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17487 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17488 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17489 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17491 else
17493 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17494 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17496 if (CONST_INT_P (count))
17498 count = GEN_INT (INTVAL (count)
17499 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17500 destmem = shallow_copy_rtx (destmem);
17501 srcmem = shallow_copy_rtx (srcmem);
17502 set_mem_size (destmem, count);
17503 set_mem_size (srcmem, count);
17505 else
17507 if (MEM_SIZE (destmem))
17508 set_mem_size (destmem, NULL_RTX);
17509 if (MEM_SIZE (srcmem))
17510 set_mem_size (srcmem, NULL_RTX);
17512 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17513 destexp, srcexp));
17516 /* Output "rep; stos" instruction.
17517 Arguments have same meaning as for previous function */
17518 static void
17519 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17520 rtx count, enum machine_mode mode,
17521 rtx orig_value)
17523 rtx destexp;
17524 rtx countreg;
17526 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17527 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17528 value = force_reg (mode, gen_lowpart (mode, value));
17529 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17530 if (mode != QImode)
17532 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17533 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17534 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17536 else
17537 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17538 if (orig_value == const0_rtx && CONST_INT_P (count))
17540 count = GEN_INT (INTVAL (count)
17541 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17542 destmem = shallow_copy_rtx (destmem);
17543 set_mem_size (destmem, count);
17545 else if (MEM_SIZE (destmem))
17546 set_mem_size (destmem, NULL_RTX);
17547 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17550 static void
17551 emit_strmov (rtx destmem, rtx srcmem,
17552 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17554 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17555 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17556 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17559 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17560 static void
17561 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17562 rtx destptr, rtx srcptr, rtx count, int max_size)
17564 rtx src, dest;
17565 if (CONST_INT_P (count))
17567 HOST_WIDE_INT countval = INTVAL (count);
17568 int offset = 0;
17570 if ((countval & 0x10) && max_size > 16)
17572 if (TARGET_64BIT)
17574 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17575 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17577 else
17578 gcc_unreachable ();
17579 offset += 16;
17581 if ((countval & 0x08) && max_size > 8)
17583 if (TARGET_64BIT)
17584 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17585 else
17587 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17588 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17590 offset += 8;
17592 if ((countval & 0x04) && max_size > 4)
17594 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17595 offset += 4;
17597 if ((countval & 0x02) && max_size > 2)
17599 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17600 offset += 2;
17602 if ((countval & 0x01) && max_size > 1)
17604 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17605 offset += 1;
17607 return;
17609 if (max_size > 8)
17611 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17612 count, 1, OPTAB_DIRECT);
17613 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17614 count, QImode, 1, 4);
17615 return;
17618 /* When there are stringops, we can cheaply increase dest and src pointers.
17619 Otherwise we save code size by maintaining offset (zero is readily
17620 available from preceding rep operation) and using x86 addressing modes.
17622 if (TARGET_SINGLE_STRINGOP)
17624 if (max_size > 4)
17626 rtx label = ix86_expand_aligntest (count, 4, true);
17627 src = change_address (srcmem, SImode, srcptr);
17628 dest = change_address (destmem, SImode, destptr);
17629 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17630 emit_label (label);
17631 LABEL_NUSES (label) = 1;
17633 if (max_size > 2)
17635 rtx label = ix86_expand_aligntest (count, 2, true);
17636 src = change_address (srcmem, HImode, srcptr);
17637 dest = change_address (destmem, HImode, destptr);
17638 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17639 emit_label (label);
17640 LABEL_NUSES (label) = 1;
17642 if (max_size > 1)
17644 rtx label = ix86_expand_aligntest (count, 1, true);
17645 src = change_address (srcmem, QImode, srcptr);
17646 dest = change_address (destmem, QImode, destptr);
17647 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17648 emit_label (label);
17649 LABEL_NUSES (label) = 1;
17652 else
17654 rtx offset = force_reg (Pmode, const0_rtx);
17655 rtx tmp;
17657 if (max_size > 4)
17659 rtx label = ix86_expand_aligntest (count, 4, true);
17660 src = change_address (srcmem, SImode, srcptr);
17661 dest = change_address (destmem, SImode, destptr);
17662 emit_move_insn (dest, src);
17663 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17664 true, OPTAB_LIB_WIDEN);
17665 if (tmp != offset)
17666 emit_move_insn (offset, tmp);
17667 emit_label (label);
17668 LABEL_NUSES (label) = 1;
17670 if (max_size > 2)
17672 rtx label = ix86_expand_aligntest (count, 2, true);
17673 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17674 src = change_address (srcmem, HImode, tmp);
17675 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17676 dest = change_address (destmem, HImode, tmp);
17677 emit_move_insn (dest, src);
17678 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17679 true, OPTAB_LIB_WIDEN);
17680 if (tmp != offset)
17681 emit_move_insn (offset, tmp);
17682 emit_label (label);
17683 LABEL_NUSES (label) = 1;
17685 if (max_size > 1)
17687 rtx label = ix86_expand_aligntest (count, 1, true);
17688 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17689 src = change_address (srcmem, QImode, tmp);
17690 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17691 dest = change_address (destmem, QImode, tmp);
17692 emit_move_insn (dest, src);
17693 emit_label (label);
17694 LABEL_NUSES (label) = 1;
17699 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17700 static void
17701 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17702 rtx count, int max_size)
17704 count =
17705 expand_simple_binop (counter_mode (count), AND, count,
17706 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17707 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17708 gen_lowpart (QImode, value), count, QImode,
17709 1, max_size / 2);
17712 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17713 static void
17714 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17716 rtx dest;
17718 if (CONST_INT_P (count))
17720 HOST_WIDE_INT countval = INTVAL (count);
17721 int offset = 0;
17723 if ((countval & 0x10) && max_size > 16)
17725 if (TARGET_64BIT)
17727 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17728 emit_insn (gen_strset (destptr, dest, value));
17729 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17730 emit_insn (gen_strset (destptr, dest, value));
17732 else
17733 gcc_unreachable ();
17734 offset += 16;
17736 if ((countval & 0x08) && max_size > 8)
17738 if (TARGET_64BIT)
17740 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17741 emit_insn (gen_strset (destptr, dest, value));
17743 else
17745 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17746 emit_insn (gen_strset (destptr, dest, value));
17747 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17748 emit_insn (gen_strset (destptr, dest, value));
17750 offset += 8;
17752 if ((countval & 0x04) && max_size > 4)
17754 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17755 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17756 offset += 4;
17758 if ((countval & 0x02) && max_size > 2)
17760 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17761 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17762 offset += 2;
17764 if ((countval & 0x01) && max_size > 1)
17766 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17767 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17768 offset += 1;
17770 return;
17772 if (max_size > 32)
17774 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17775 return;
17777 if (max_size > 16)
17779 rtx label = ix86_expand_aligntest (count, 16, true);
17780 if (TARGET_64BIT)
17782 dest = change_address (destmem, DImode, destptr);
17783 emit_insn (gen_strset (destptr, dest, value));
17784 emit_insn (gen_strset (destptr, dest, value));
17786 else
17788 dest = change_address (destmem, SImode, destptr);
17789 emit_insn (gen_strset (destptr, dest, value));
17790 emit_insn (gen_strset (destptr, dest, value));
17791 emit_insn (gen_strset (destptr, dest, value));
17792 emit_insn (gen_strset (destptr, dest, value));
17794 emit_label (label);
17795 LABEL_NUSES (label) = 1;
17797 if (max_size > 8)
17799 rtx label = ix86_expand_aligntest (count, 8, true);
17800 if (TARGET_64BIT)
17802 dest = change_address (destmem, DImode, destptr);
17803 emit_insn (gen_strset (destptr, dest, value));
17805 else
17807 dest = change_address (destmem, SImode, destptr);
17808 emit_insn (gen_strset (destptr, dest, value));
17809 emit_insn (gen_strset (destptr, dest, value));
17811 emit_label (label);
17812 LABEL_NUSES (label) = 1;
17814 if (max_size > 4)
17816 rtx label = ix86_expand_aligntest (count, 4, true);
17817 dest = change_address (destmem, SImode, destptr);
17818 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17819 emit_label (label);
17820 LABEL_NUSES (label) = 1;
17822 if (max_size > 2)
17824 rtx label = ix86_expand_aligntest (count, 2, true);
17825 dest = change_address (destmem, HImode, destptr);
17826 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17827 emit_label (label);
17828 LABEL_NUSES (label) = 1;
17830 if (max_size > 1)
17832 rtx label = ix86_expand_aligntest (count, 1, true);
17833 dest = change_address (destmem, QImode, destptr);
17834 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17835 emit_label (label);
17836 LABEL_NUSES (label) = 1;
17840 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17841 DESIRED_ALIGNMENT. */
17842 static void
17843 expand_movmem_prologue (rtx destmem, rtx srcmem,
17844 rtx destptr, rtx srcptr, rtx count,
17845 int align, int desired_alignment)
17847 if (align <= 1 && desired_alignment > 1)
17849 rtx label = ix86_expand_aligntest (destptr, 1, false);
17850 srcmem = change_address (srcmem, QImode, srcptr);
17851 destmem = change_address (destmem, QImode, destptr);
17852 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17853 ix86_adjust_counter (count, 1);
17854 emit_label (label);
17855 LABEL_NUSES (label) = 1;
17857 if (align <= 2 && desired_alignment > 2)
17859 rtx label = ix86_expand_aligntest (destptr, 2, false);
17860 srcmem = change_address (srcmem, HImode, srcptr);
17861 destmem = change_address (destmem, HImode, destptr);
17862 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17863 ix86_adjust_counter (count, 2);
17864 emit_label (label);
17865 LABEL_NUSES (label) = 1;
17867 if (align <= 4 && desired_alignment > 4)
17869 rtx label = ix86_expand_aligntest (destptr, 4, false);
17870 srcmem = change_address (srcmem, SImode, srcptr);
17871 destmem = change_address (destmem, SImode, destptr);
17872 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17873 ix86_adjust_counter (count, 4);
17874 emit_label (label);
17875 LABEL_NUSES (label) = 1;
17877 gcc_assert (desired_alignment <= 8);
17880 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17881 ALIGN_BYTES is how many bytes need to be copied. */
17882 static rtx
17883 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17884 int desired_align, int align_bytes)
17886 rtx src = *srcp;
17887 rtx src_size, dst_size;
17888 int off = 0;
17889 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17890 if (src_align_bytes >= 0)
17891 src_align_bytes = desired_align - src_align_bytes;
17892 src_size = MEM_SIZE (src);
17893 dst_size = MEM_SIZE (dst);
17894 if (align_bytes & 1)
17896 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17897 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17898 off = 1;
17899 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17901 if (align_bytes & 2)
17903 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17904 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17905 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17906 set_mem_align (dst, 2 * BITS_PER_UNIT);
17907 if (src_align_bytes >= 0
17908 && (src_align_bytes & 1) == (align_bytes & 1)
17909 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17910 set_mem_align (src, 2 * BITS_PER_UNIT);
17911 off = 2;
17912 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17914 if (align_bytes & 4)
17916 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17917 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17918 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17919 set_mem_align (dst, 4 * BITS_PER_UNIT);
17920 if (src_align_bytes >= 0)
17922 unsigned int src_align = 0;
17923 if ((src_align_bytes & 3) == (align_bytes & 3))
17924 src_align = 4;
17925 else if ((src_align_bytes & 1) == (align_bytes & 1))
17926 src_align = 2;
17927 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17928 set_mem_align (src, src_align * BITS_PER_UNIT);
17930 off = 4;
17931 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17933 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17934 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17935 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17936 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17937 if (src_align_bytes >= 0)
17939 unsigned int src_align = 0;
17940 if ((src_align_bytes & 7) == (align_bytes & 7))
17941 src_align = 8;
17942 else if ((src_align_bytes & 3) == (align_bytes & 3))
17943 src_align = 4;
17944 else if ((src_align_bytes & 1) == (align_bytes & 1))
17945 src_align = 2;
17946 if (src_align > (unsigned int) desired_align)
17947 src_align = desired_align;
17948 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17949 set_mem_align (src, src_align * BITS_PER_UNIT);
17951 if (dst_size)
17952 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17953 if (src_size)
17954 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17955 *srcp = src;
17956 return dst;
17959 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17960 DESIRED_ALIGNMENT. */
17961 static void
17962 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17963 int align, int desired_alignment)
17965 if (align <= 1 && desired_alignment > 1)
17967 rtx label = ix86_expand_aligntest (destptr, 1, false);
17968 destmem = change_address (destmem, QImode, destptr);
17969 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17970 ix86_adjust_counter (count, 1);
17971 emit_label (label);
17972 LABEL_NUSES (label) = 1;
17974 if (align <= 2 && desired_alignment > 2)
17976 rtx label = ix86_expand_aligntest (destptr, 2, false);
17977 destmem = change_address (destmem, HImode, destptr);
17978 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17979 ix86_adjust_counter (count, 2);
17980 emit_label (label);
17981 LABEL_NUSES (label) = 1;
17983 if (align <= 4 && desired_alignment > 4)
17985 rtx label = ix86_expand_aligntest (destptr, 4, false);
17986 destmem = change_address (destmem, SImode, destptr);
17987 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17988 ix86_adjust_counter (count, 4);
17989 emit_label (label);
17990 LABEL_NUSES (label) = 1;
17992 gcc_assert (desired_alignment <= 8);
17995 /* Set enough from DST to align DST known to by aligned by ALIGN to
17996 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17997 static rtx
17998 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17999 int desired_align, int align_bytes)
18001 int off = 0;
18002 rtx dst_size = MEM_SIZE (dst);
18003 if (align_bytes & 1)
18005 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18006 off = 1;
18007 emit_insn (gen_strset (destreg, dst,
18008 gen_lowpart (QImode, value)));
18010 if (align_bytes & 2)
18012 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18013 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18014 set_mem_align (dst, 2 * BITS_PER_UNIT);
18015 off = 2;
18016 emit_insn (gen_strset (destreg, dst,
18017 gen_lowpart (HImode, value)));
18019 if (align_bytes & 4)
18021 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18022 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18023 set_mem_align (dst, 4 * BITS_PER_UNIT);
18024 off = 4;
18025 emit_insn (gen_strset (destreg, dst,
18026 gen_lowpart (SImode, value)));
18028 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18029 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18030 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18031 if (dst_size)
18032 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18033 return dst;
18036 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18037 static enum stringop_alg
18038 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18039 int *dynamic_check)
18041 const struct stringop_algs * algs;
18042 bool optimize_for_speed;
18043 /* Algorithms using the rep prefix want at least edi and ecx;
18044 additionally, memset wants eax and memcpy wants esi. Don't
18045 consider such algorithms if the user has appropriated those
18046 registers for their own purposes. */
18047 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18048 || (memset
18049 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18051 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18052 || (alg != rep_prefix_1_byte \
18053 && alg != rep_prefix_4_byte \
18054 && alg != rep_prefix_8_byte))
18055 const struct processor_costs *cost;
18057 /* Even if the string operation call is cold, we still might spend a lot
18058 of time processing large blocks. */
18059 if (optimize_function_for_size_p (cfun)
18060 || (optimize_insn_for_size_p ()
18061 && expected_size != -1 && expected_size < 256))
18062 optimize_for_speed = false;
18063 else
18064 optimize_for_speed = true;
18066 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18068 *dynamic_check = -1;
18069 if (memset)
18070 algs = &cost->memset[TARGET_64BIT != 0];
18071 else
18072 algs = &cost->memcpy[TARGET_64BIT != 0];
18073 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18074 return stringop_alg;
18075 /* rep; movq or rep; movl is the smallest variant. */
18076 else if (!optimize_for_speed)
18078 if (!count || (count & 3))
18079 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18080 else
18081 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18083 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18085 else if (expected_size != -1 && expected_size < 4)
18086 return loop_1_byte;
18087 else if (expected_size != -1)
18089 unsigned int i;
18090 enum stringop_alg alg = libcall;
18091 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18093 /* We get here if the algorithms that were not libcall-based
18094 were rep-prefix based and we are unable to use rep prefixes
18095 based on global register usage. Break out of the loop and
18096 use the heuristic below. */
18097 if (algs->size[i].max == 0)
18098 break;
18099 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18101 enum stringop_alg candidate = algs->size[i].alg;
18103 if (candidate != libcall && ALG_USABLE_P (candidate))
18104 alg = candidate;
18105 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18106 last non-libcall inline algorithm. */
18107 if (TARGET_INLINE_ALL_STRINGOPS)
18109 /* When the current size is best to be copied by a libcall,
18110 but we are still forced to inline, run the heuristic below
18111 that will pick code for medium sized blocks. */
18112 if (alg != libcall)
18113 return alg;
18114 break;
18116 else if (ALG_USABLE_P (candidate))
18117 return candidate;
18120 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18122 /* When asked to inline the call anyway, try to pick meaningful choice.
18123 We look for maximal size of block that is faster to copy by hand and
18124 take blocks of at most of that size guessing that average size will
18125 be roughly half of the block.
18127 If this turns out to be bad, we might simply specify the preferred
18128 choice in ix86_costs. */
18129 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18130 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18132 int max = -1;
18133 enum stringop_alg alg;
18134 int i;
18135 bool any_alg_usable_p = true;
18137 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18139 enum stringop_alg candidate = algs->size[i].alg;
18140 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18142 if (candidate != libcall && candidate
18143 && ALG_USABLE_P (candidate))
18144 max = algs->size[i].max;
18146 /* If there aren't any usable algorithms, then recursing on
18147 smaller sizes isn't going to find anything. Just return the
18148 simple byte-at-a-time copy loop. */
18149 if (!any_alg_usable_p)
18151 /* Pick something reasonable. */
18152 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18153 *dynamic_check = 128;
18154 return loop_1_byte;
18156 if (max == -1)
18157 max = 4096;
18158 alg = decide_alg (count, max / 2, memset, dynamic_check);
18159 gcc_assert (*dynamic_check == -1);
18160 gcc_assert (alg != libcall);
18161 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18162 *dynamic_check = max;
18163 return alg;
18165 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18166 #undef ALG_USABLE_P
18169 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18170 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18171 static int
18172 decide_alignment (int align,
18173 enum stringop_alg alg,
18174 int expected_size)
18176 int desired_align = 0;
18177 switch (alg)
18179 case no_stringop:
18180 gcc_unreachable ();
18181 case loop:
18182 case unrolled_loop:
18183 desired_align = GET_MODE_SIZE (Pmode);
18184 break;
18185 case rep_prefix_8_byte:
18186 desired_align = 8;
18187 break;
18188 case rep_prefix_4_byte:
18189 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18190 copying whole cacheline at once. */
18191 if (TARGET_PENTIUMPRO)
18192 desired_align = 8;
18193 else
18194 desired_align = 4;
18195 break;
18196 case rep_prefix_1_byte:
18197 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18198 copying whole cacheline at once. */
18199 if (TARGET_PENTIUMPRO)
18200 desired_align = 8;
18201 else
18202 desired_align = 1;
18203 break;
18204 case loop_1_byte:
18205 desired_align = 1;
18206 break;
18207 case libcall:
18208 return 0;
18211 if (optimize_size)
18212 desired_align = 1;
18213 if (desired_align < align)
18214 desired_align = align;
18215 if (expected_size != -1 && expected_size < 4)
18216 desired_align = align;
18217 return desired_align;
18220 /* Return the smallest power of 2 greater than VAL. */
18221 static int
18222 smallest_pow2_greater_than (int val)
18224 int ret = 1;
18225 while (ret <= val)
18226 ret <<= 1;
18227 return ret;
18230 /* Expand string move (memcpy) operation. Use i386 string operations when
18231 profitable. expand_setmem contains similar code. The code depends upon
18232 architecture, block size and alignment, but always has the same
18233 overall structure:
18235 1) Prologue guard: Conditional that jumps up to epilogues for small
18236 blocks that can be handled by epilogue alone. This is faster but
18237 also needed for correctness, since prologue assume the block is larger
18238 than the desired alignment.
18240 Optional dynamic check for size and libcall for large
18241 blocks is emitted here too, with -minline-stringops-dynamically.
18243 2) Prologue: copy first few bytes in order to get destination aligned
18244 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18245 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18246 We emit either a jump tree on power of two sized blocks, or a byte loop.
18248 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18249 with specified algorithm.
18251 4) Epilogue: code copying tail of the block that is too small to be
18252 handled by main body (or up to size guarded by prologue guard). */
18255 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18256 rtx expected_align_exp, rtx expected_size_exp)
18258 rtx destreg;
18259 rtx srcreg;
18260 rtx label = NULL;
18261 rtx tmp;
18262 rtx jump_around_label = NULL;
18263 HOST_WIDE_INT align = 1;
18264 unsigned HOST_WIDE_INT count = 0;
18265 HOST_WIDE_INT expected_size = -1;
18266 int size_needed = 0, epilogue_size_needed;
18267 int desired_align = 0, align_bytes = 0;
18268 enum stringop_alg alg;
18269 int dynamic_check;
18270 bool need_zero_guard = false;
18272 if (CONST_INT_P (align_exp))
18273 align = INTVAL (align_exp);
18274 /* i386 can do misaligned access on reasonably increased cost. */
18275 if (CONST_INT_P (expected_align_exp)
18276 && INTVAL (expected_align_exp) > align)
18277 align = INTVAL (expected_align_exp);
18278 /* ALIGN is the minimum of destination and source alignment, but we care here
18279 just about destination alignment. */
18280 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18281 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18283 if (CONST_INT_P (count_exp))
18284 count = expected_size = INTVAL (count_exp);
18285 if (CONST_INT_P (expected_size_exp) && count == 0)
18286 expected_size = INTVAL (expected_size_exp);
18288 /* Make sure we don't need to care about overflow later on. */
18289 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18290 return 0;
18292 /* Step 0: Decide on preferred algorithm, desired alignment and
18293 size of chunks to be copied by main loop. */
18295 alg = decide_alg (count, expected_size, false, &dynamic_check);
18296 desired_align = decide_alignment (align, alg, expected_size);
18298 if (!TARGET_ALIGN_STRINGOPS)
18299 align = desired_align;
18301 if (alg == libcall)
18302 return 0;
18303 gcc_assert (alg != no_stringop);
18304 if (!count)
18305 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18306 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18307 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18308 switch (alg)
18310 case libcall:
18311 case no_stringop:
18312 gcc_unreachable ();
18313 case loop:
18314 need_zero_guard = true;
18315 size_needed = GET_MODE_SIZE (Pmode);
18316 break;
18317 case unrolled_loop:
18318 need_zero_guard = true;
18319 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18320 break;
18321 case rep_prefix_8_byte:
18322 size_needed = 8;
18323 break;
18324 case rep_prefix_4_byte:
18325 size_needed = 4;
18326 break;
18327 case rep_prefix_1_byte:
18328 size_needed = 1;
18329 break;
18330 case loop_1_byte:
18331 need_zero_guard = true;
18332 size_needed = 1;
18333 break;
18336 epilogue_size_needed = size_needed;
18338 /* Step 1: Prologue guard. */
18340 /* Alignment code needs count to be in register. */
18341 if (CONST_INT_P (count_exp) && desired_align > align)
18343 if (INTVAL (count_exp) > desired_align
18344 && INTVAL (count_exp) > size_needed)
18346 align_bytes
18347 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18348 if (align_bytes <= 0)
18349 align_bytes = 0;
18350 else
18351 align_bytes = desired_align - align_bytes;
18353 if (align_bytes == 0)
18354 count_exp = force_reg (counter_mode (count_exp), count_exp);
18356 gcc_assert (desired_align >= 1 && align >= 1);
18358 /* Ensure that alignment prologue won't copy past end of block. */
18359 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18361 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18362 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18363 Make sure it is power of 2. */
18364 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18366 if (count)
18368 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18370 /* If main algorithm works on QImode, no epilogue is needed.
18371 For small sizes just don't align anything. */
18372 if (size_needed == 1)
18373 desired_align = align;
18374 else
18375 goto epilogue;
18378 else
18380 label = gen_label_rtx ();
18381 emit_cmp_and_jump_insns (count_exp,
18382 GEN_INT (epilogue_size_needed),
18383 LTU, 0, counter_mode (count_exp), 1, label);
18384 if (expected_size == -1 || expected_size < epilogue_size_needed)
18385 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18386 else
18387 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18391 /* Emit code to decide on runtime whether library call or inline should be
18392 used. */
18393 if (dynamic_check != -1)
18395 if (CONST_INT_P (count_exp))
18397 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18399 emit_block_move_via_libcall (dst, src, count_exp, false);
18400 count_exp = const0_rtx;
18401 goto epilogue;
18404 else
18406 rtx hot_label = gen_label_rtx ();
18407 jump_around_label = gen_label_rtx ();
18408 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18409 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18410 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18411 emit_block_move_via_libcall (dst, src, count_exp, false);
18412 emit_jump (jump_around_label);
18413 emit_label (hot_label);
18417 /* Step 2: Alignment prologue. */
18419 if (desired_align > align)
18421 if (align_bytes == 0)
18423 /* Except for the first move in epilogue, we no longer know
18424 constant offset in aliasing info. It don't seems to worth
18425 the pain to maintain it for the first move, so throw away
18426 the info early. */
18427 src = change_address (src, BLKmode, srcreg);
18428 dst = change_address (dst, BLKmode, destreg);
18429 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18430 desired_align);
18432 else
18434 /* If we know how many bytes need to be stored before dst is
18435 sufficiently aligned, maintain aliasing info accurately. */
18436 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18437 desired_align, align_bytes);
18438 count_exp = plus_constant (count_exp, -align_bytes);
18439 count -= align_bytes;
18441 if (need_zero_guard
18442 && (count < (unsigned HOST_WIDE_INT) size_needed
18443 || (align_bytes == 0
18444 && count < ((unsigned HOST_WIDE_INT) size_needed
18445 + desired_align - align))))
18447 /* It is possible that we copied enough so the main loop will not
18448 execute. */
18449 gcc_assert (size_needed > 1);
18450 if (label == NULL_RTX)
18451 label = gen_label_rtx ();
18452 emit_cmp_and_jump_insns (count_exp,
18453 GEN_INT (size_needed),
18454 LTU, 0, counter_mode (count_exp), 1, label);
18455 if (expected_size == -1
18456 || expected_size < (desired_align - align) / 2 + size_needed)
18457 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18458 else
18459 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18462 if (label && size_needed == 1)
18464 emit_label (label);
18465 LABEL_NUSES (label) = 1;
18466 label = NULL;
18467 epilogue_size_needed = 1;
18469 else if (label == NULL_RTX)
18470 epilogue_size_needed = size_needed;
18472 /* Step 3: Main loop. */
18474 switch (alg)
18476 case libcall:
18477 case no_stringop:
18478 gcc_unreachable ();
18479 case loop_1_byte:
18480 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18481 count_exp, QImode, 1, expected_size);
18482 break;
18483 case loop:
18484 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18485 count_exp, Pmode, 1, expected_size);
18486 break;
18487 case unrolled_loop:
18488 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18489 registers for 4 temporaries anyway. */
18490 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18491 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18492 expected_size);
18493 break;
18494 case rep_prefix_8_byte:
18495 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18496 DImode);
18497 break;
18498 case rep_prefix_4_byte:
18499 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18500 SImode);
18501 break;
18502 case rep_prefix_1_byte:
18503 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18504 QImode);
18505 break;
18507 /* Adjust properly the offset of src and dest memory for aliasing. */
18508 if (CONST_INT_P (count_exp))
18510 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18511 (count / size_needed) * size_needed);
18512 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18513 (count / size_needed) * size_needed);
18515 else
18517 src = change_address (src, BLKmode, srcreg);
18518 dst = change_address (dst, BLKmode, destreg);
18521 /* Step 4: Epilogue to copy the remaining bytes. */
18522 epilogue:
18523 if (label)
18525 /* When the main loop is done, COUNT_EXP might hold original count,
18526 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18527 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18528 bytes. Compensate if needed. */
18530 if (size_needed < epilogue_size_needed)
18532 tmp =
18533 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18534 GEN_INT (size_needed - 1), count_exp, 1,
18535 OPTAB_DIRECT);
18536 if (tmp != count_exp)
18537 emit_move_insn (count_exp, tmp);
18539 emit_label (label);
18540 LABEL_NUSES (label) = 1;
18543 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18544 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18545 epilogue_size_needed);
18546 if (jump_around_label)
18547 emit_label (jump_around_label);
18548 return 1;
18551 /* Helper function for memcpy. For QImode value 0xXY produce
18552 0xXYXYXYXY of wide specified by MODE. This is essentially
18553 a * 0x10101010, but we can do slightly better than
18554 synth_mult by unwinding the sequence by hand on CPUs with
18555 slow multiply. */
18556 static rtx
18557 promote_duplicated_reg (enum machine_mode mode, rtx val)
18559 enum machine_mode valmode = GET_MODE (val);
18560 rtx tmp;
18561 int nops = mode == DImode ? 3 : 2;
18563 gcc_assert (mode == SImode || mode == DImode);
18564 if (val == const0_rtx)
18565 return copy_to_mode_reg (mode, const0_rtx);
18566 if (CONST_INT_P (val))
18568 HOST_WIDE_INT v = INTVAL (val) & 255;
18570 v |= v << 8;
18571 v |= v << 16;
18572 if (mode == DImode)
18573 v |= (v << 16) << 16;
18574 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18577 if (valmode == VOIDmode)
18578 valmode = QImode;
18579 if (valmode != QImode)
18580 val = gen_lowpart (QImode, val);
18581 if (mode == QImode)
18582 return val;
18583 if (!TARGET_PARTIAL_REG_STALL)
18584 nops--;
18585 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18586 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18587 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18588 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18590 rtx reg = convert_modes (mode, QImode, val, true);
18591 tmp = promote_duplicated_reg (mode, const1_rtx);
18592 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18593 OPTAB_DIRECT);
18595 else
18597 rtx reg = convert_modes (mode, QImode, val, true);
18599 if (!TARGET_PARTIAL_REG_STALL)
18600 if (mode == SImode)
18601 emit_insn (gen_movsi_insv_1 (reg, reg));
18602 else
18603 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18604 else
18606 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18607 NULL, 1, OPTAB_DIRECT);
18608 reg =
18609 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18611 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18612 NULL, 1, OPTAB_DIRECT);
18613 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18614 if (mode == SImode)
18615 return reg;
18616 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18617 NULL, 1, OPTAB_DIRECT);
18618 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18619 return reg;
18623 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18624 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18625 alignment from ALIGN to DESIRED_ALIGN. */
18626 static rtx
18627 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18629 rtx promoted_val;
18631 if (TARGET_64BIT
18632 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18633 promoted_val = promote_duplicated_reg (DImode, val);
18634 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18635 promoted_val = promote_duplicated_reg (SImode, val);
18636 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18637 promoted_val = promote_duplicated_reg (HImode, val);
18638 else
18639 promoted_val = val;
18641 return promoted_val;
18644 /* Expand string clear operation (bzero). Use i386 string operations when
18645 profitable. See expand_movmem comment for explanation of individual
18646 steps performed. */
18648 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18649 rtx expected_align_exp, rtx expected_size_exp)
18651 rtx destreg;
18652 rtx label = NULL;
18653 rtx tmp;
18654 rtx jump_around_label = NULL;
18655 HOST_WIDE_INT align = 1;
18656 unsigned HOST_WIDE_INT count = 0;
18657 HOST_WIDE_INT expected_size = -1;
18658 int size_needed = 0, epilogue_size_needed;
18659 int desired_align = 0, align_bytes = 0;
18660 enum stringop_alg alg;
18661 rtx promoted_val = NULL;
18662 bool force_loopy_epilogue = false;
18663 int dynamic_check;
18664 bool need_zero_guard = false;
18666 if (CONST_INT_P (align_exp))
18667 align = INTVAL (align_exp);
18668 /* i386 can do misaligned access on reasonably increased cost. */
18669 if (CONST_INT_P (expected_align_exp)
18670 && INTVAL (expected_align_exp) > align)
18671 align = INTVAL (expected_align_exp);
18672 if (CONST_INT_P (count_exp))
18673 count = expected_size = INTVAL (count_exp);
18674 if (CONST_INT_P (expected_size_exp) && count == 0)
18675 expected_size = INTVAL (expected_size_exp);
18677 /* Make sure we don't need to care about overflow later on. */
18678 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18679 return 0;
18681 /* Step 0: Decide on preferred algorithm, desired alignment and
18682 size of chunks to be copied by main loop. */
18684 alg = decide_alg (count, expected_size, true, &dynamic_check);
18685 desired_align = decide_alignment (align, alg, expected_size);
18687 if (!TARGET_ALIGN_STRINGOPS)
18688 align = desired_align;
18690 if (alg == libcall)
18691 return 0;
18692 gcc_assert (alg != no_stringop);
18693 if (!count)
18694 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18695 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18696 switch (alg)
18698 case libcall:
18699 case no_stringop:
18700 gcc_unreachable ();
18701 case loop:
18702 need_zero_guard = true;
18703 size_needed = GET_MODE_SIZE (Pmode);
18704 break;
18705 case unrolled_loop:
18706 need_zero_guard = true;
18707 size_needed = GET_MODE_SIZE (Pmode) * 4;
18708 break;
18709 case rep_prefix_8_byte:
18710 size_needed = 8;
18711 break;
18712 case rep_prefix_4_byte:
18713 size_needed = 4;
18714 break;
18715 case rep_prefix_1_byte:
18716 size_needed = 1;
18717 break;
18718 case loop_1_byte:
18719 need_zero_guard = true;
18720 size_needed = 1;
18721 break;
18723 epilogue_size_needed = size_needed;
18725 /* Step 1: Prologue guard. */
18727 /* Alignment code needs count to be in register. */
18728 if (CONST_INT_P (count_exp) && desired_align > align)
18730 if (INTVAL (count_exp) > desired_align
18731 && INTVAL (count_exp) > size_needed)
18733 align_bytes
18734 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18735 if (align_bytes <= 0)
18736 align_bytes = 0;
18737 else
18738 align_bytes = desired_align - align_bytes;
18740 if (align_bytes == 0)
18742 enum machine_mode mode = SImode;
18743 if (TARGET_64BIT && (count & ~0xffffffff))
18744 mode = DImode;
18745 count_exp = force_reg (mode, count_exp);
18748 /* Do the cheap promotion to allow better CSE across the
18749 main loop and epilogue (ie one load of the big constant in the
18750 front of all code. */
18751 if (CONST_INT_P (val_exp))
18752 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18753 desired_align, align);
18754 /* Ensure that alignment prologue won't copy past end of block. */
18755 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18757 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18758 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18759 Make sure it is power of 2. */
18760 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18762 /* To improve performance of small blocks, we jump around the VAL
18763 promoting mode. This mean that if the promoted VAL is not constant,
18764 we might not use it in the epilogue and have to use byte
18765 loop variant. */
18766 if (epilogue_size_needed > 2 && !promoted_val)
18767 force_loopy_epilogue = true;
18768 if (count)
18770 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18772 /* If main algorithm works on QImode, no epilogue is needed.
18773 For small sizes just don't align anything. */
18774 if (size_needed == 1)
18775 desired_align = align;
18776 else
18777 goto epilogue;
18780 else
18782 label = gen_label_rtx ();
18783 emit_cmp_and_jump_insns (count_exp,
18784 GEN_INT (epilogue_size_needed),
18785 LTU, 0, counter_mode (count_exp), 1, label);
18786 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18787 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18788 else
18789 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18792 if (dynamic_check != -1)
18794 rtx hot_label = gen_label_rtx ();
18795 jump_around_label = gen_label_rtx ();
18796 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18797 LEU, 0, counter_mode (count_exp), 1, hot_label);
18798 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18799 set_storage_via_libcall (dst, count_exp, val_exp, false);
18800 emit_jump (jump_around_label);
18801 emit_label (hot_label);
18804 /* Step 2: Alignment prologue. */
18806 /* Do the expensive promotion once we branched off the small blocks. */
18807 if (!promoted_val)
18808 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18809 desired_align, align);
18810 gcc_assert (desired_align >= 1 && align >= 1);
18812 if (desired_align > align)
18814 if (align_bytes == 0)
18816 /* Except for the first move in epilogue, we no longer know
18817 constant offset in aliasing info. It don't seems to worth
18818 the pain to maintain it for the first move, so throw away
18819 the info early. */
18820 dst = change_address (dst, BLKmode, destreg);
18821 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18822 desired_align);
18824 else
18826 /* If we know how many bytes need to be stored before dst is
18827 sufficiently aligned, maintain aliasing info accurately. */
18828 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18829 desired_align, align_bytes);
18830 count_exp = plus_constant (count_exp, -align_bytes);
18831 count -= align_bytes;
18833 if (need_zero_guard
18834 && (count < (unsigned HOST_WIDE_INT) size_needed
18835 || (align_bytes == 0
18836 && count < ((unsigned HOST_WIDE_INT) size_needed
18837 + desired_align - align))))
18839 /* It is possible that we copied enough so the main loop will not
18840 execute. */
18841 gcc_assert (size_needed > 1);
18842 if (label == NULL_RTX)
18843 label = gen_label_rtx ();
18844 emit_cmp_and_jump_insns (count_exp,
18845 GEN_INT (size_needed),
18846 LTU, 0, counter_mode (count_exp), 1, label);
18847 if (expected_size == -1
18848 || expected_size < (desired_align - align) / 2 + size_needed)
18849 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18850 else
18851 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18854 if (label && size_needed == 1)
18856 emit_label (label);
18857 LABEL_NUSES (label) = 1;
18858 label = NULL;
18859 promoted_val = val_exp;
18860 epilogue_size_needed = 1;
18862 else if (label == NULL_RTX)
18863 epilogue_size_needed = size_needed;
18865 /* Step 3: Main loop. */
18867 switch (alg)
18869 case libcall:
18870 case no_stringop:
18871 gcc_unreachable ();
18872 case loop_1_byte:
18873 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18874 count_exp, QImode, 1, expected_size);
18875 break;
18876 case loop:
18877 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18878 count_exp, Pmode, 1, expected_size);
18879 break;
18880 case unrolled_loop:
18881 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18882 count_exp, Pmode, 4, expected_size);
18883 break;
18884 case rep_prefix_8_byte:
18885 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18886 DImode, val_exp);
18887 break;
18888 case rep_prefix_4_byte:
18889 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18890 SImode, val_exp);
18891 break;
18892 case rep_prefix_1_byte:
18893 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18894 QImode, val_exp);
18895 break;
18897 /* Adjust properly the offset of src and dest memory for aliasing. */
18898 if (CONST_INT_P (count_exp))
18899 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18900 (count / size_needed) * size_needed);
18901 else
18902 dst = change_address (dst, BLKmode, destreg);
18904 /* Step 4: Epilogue to copy the remaining bytes. */
18906 if (label)
18908 /* When the main loop is done, COUNT_EXP might hold original count,
18909 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18910 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18911 bytes. Compensate if needed. */
18913 if (size_needed < epilogue_size_needed)
18915 tmp =
18916 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18917 GEN_INT (size_needed - 1), count_exp, 1,
18918 OPTAB_DIRECT);
18919 if (tmp != count_exp)
18920 emit_move_insn (count_exp, tmp);
18922 emit_label (label);
18923 LABEL_NUSES (label) = 1;
18925 epilogue:
18926 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18928 if (force_loopy_epilogue)
18929 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18930 epilogue_size_needed);
18931 else
18932 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18933 epilogue_size_needed);
18935 if (jump_around_label)
18936 emit_label (jump_around_label);
18937 return 1;
18940 /* Expand the appropriate insns for doing strlen if not just doing
18941 repnz; scasb
18943 out = result, initialized with the start address
18944 align_rtx = alignment of the address.
18945 scratch = scratch register, initialized with the startaddress when
18946 not aligned, otherwise undefined
18948 This is just the body. It needs the initializations mentioned above and
18949 some address computing at the end. These things are done in i386.md. */
18951 static void
18952 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18954 int align;
18955 rtx tmp;
18956 rtx align_2_label = NULL_RTX;
18957 rtx align_3_label = NULL_RTX;
18958 rtx align_4_label = gen_label_rtx ();
18959 rtx end_0_label = gen_label_rtx ();
18960 rtx mem;
18961 rtx tmpreg = gen_reg_rtx (SImode);
18962 rtx scratch = gen_reg_rtx (SImode);
18963 rtx cmp;
18965 align = 0;
18966 if (CONST_INT_P (align_rtx))
18967 align = INTVAL (align_rtx);
18969 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18971 /* Is there a known alignment and is it less than 4? */
18972 if (align < 4)
18974 rtx scratch1 = gen_reg_rtx (Pmode);
18975 emit_move_insn (scratch1, out);
18976 /* Is there a known alignment and is it not 2? */
18977 if (align != 2)
18979 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18980 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18982 /* Leave just the 3 lower bits. */
18983 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18984 NULL_RTX, 0, OPTAB_WIDEN);
18986 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18987 Pmode, 1, align_4_label);
18988 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18989 Pmode, 1, align_2_label);
18990 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18991 Pmode, 1, align_3_label);
18993 else
18995 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18996 check if is aligned to 4 - byte. */
18998 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18999 NULL_RTX, 0, OPTAB_WIDEN);
19001 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19002 Pmode, 1, align_4_label);
19005 mem = change_address (src, QImode, out);
19007 /* Now compare the bytes. */
19009 /* Compare the first n unaligned byte on a byte per byte basis. */
19010 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19011 QImode, 1, end_0_label);
19013 /* Increment the address. */
19014 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19016 /* Not needed with an alignment of 2 */
19017 if (align != 2)
19019 emit_label (align_2_label);
19021 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19022 end_0_label);
19024 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19026 emit_label (align_3_label);
19029 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19030 end_0_label);
19032 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19035 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19036 align this loop. It gives only huge programs, but does not help to
19037 speed up. */
19038 emit_label (align_4_label);
19040 mem = change_address (src, SImode, out);
19041 emit_move_insn (scratch, mem);
19042 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
19044 /* This formula yields a nonzero result iff one of the bytes is zero.
19045 This saves three branches inside loop and many cycles. */
19047 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19048 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19049 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19050 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19051 gen_int_mode (0x80808080, SImode)));
19052 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19053 align_4_label);
19055 if (TARGET_CMOVE)
19057 rtx reg = gen_reg_rtx (SImode);
19058 rtx reg2 = gen_reg_rtx (Pmode);
19059 emit_move_insn (reg, tmpreg);
19060 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19062 /* If zero is not in the first two bytes, move two bytes forward. */
19063 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19064 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19065 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19066 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19067 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19068 reg,
19069 tmpreg)));
19070 /* Emit lea manually to avoid clobbering of flags. */
19071 emit_insn (gen_rtx_SET (SImode, reg2,
19072 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19074 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19075 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19076 emit_insn (gen_rtx_SET (VOIDmode, out,
19077 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19078 reg2,
19079 out)));
19082 else
19084 rtx end_2_label = gen_label_rtx ();
19085 /* Is zero in the first two bytes? */
19087 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19088 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19089 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19090 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19091 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19092 pc_rtx);
19093 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19094 JUMP_LABEL (tmp) = end_2_label;
19096 /* Not in the first two. Move two bytes forward. */
19097 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19098 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19100 emit_label (end_2_label);
19104 /* Avoid branch in fixing the byte. */
19105 tmpreg = gen_lowpart (QImode, tmpreg);
19106 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19107 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19108 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19110 emit_label (end_0_label);
19113 /* Expand strlen. */
19116 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19118 rtx addr, scratch1, scratch2, scratch3, scratch4;
19120 /* The generic case of strlen expander is long. Avoid it's
19121 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19123 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19124 && !TARGET_INLINE_ALL_STRINGOPS
19125 && !optimize_insn_for_size_p ()
19126 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19127 return 0;
19129 addr = force_reg (Pmode, XEXP (src, 0));
19130 scratch1 = gen_reg_rtx (Pmode);
19132 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19133 && !optimize_insn_for_size_p ())
19135 /* Well it seems that some optimizer does not combine a call like
19136 foo(strlen(bar), strlen(bar));
19137 when the move and the subtraction is done here. It does calculate
19138 the length just once when these instructions are done inside of
19139 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19140 often used and I use one fewer register for the lifetime of
19141 output_strlen_unroll() this is better. */
19143 emit_move_insn (out, addr);
19145 ix86_expand_strlensi_unroll_1 (out, src, align);
19147 /* strlensi_unroll_1 returns the address of the zero at the end of
19148 the string, like memchr(), so compute the length by subtracting
19149 the start address. */
19150 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19152 else
19154 rtx unspec;
19156 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19157 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19158 return false;
19160 scratch2 = gen_reg_rtx (Pmode);
19161 scratch3 = gen_reg_rtx (Pmode);
19162 scratch4 = force_reg (Pmode, constm1_rtx);
19164 emit_move_insn (scratch3, addr);
19165 eoschar = force_reg (QImode, eoschar);
19167 src = replace_equiv_address_nv (src, scratch3);
19169 /* If .md starts supporting :P, this can be done in .md. */
19170 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19171 scratch4), UNSPEC_SCAS);
19172 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19173 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19174 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19176 return 1;
19179 /* For given symbol (function) construct code to compute address of it's PLT
19180 entry in large x86-64 PIC model. */
19182 construct_plt_address (rtx symbol)
19184 rtx tmp = gen_reg_rtx (Pmode);
19185 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19187 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19188 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19190 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19191 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19192 return tmp;
19195 void
19196 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19197 rtx callarg2,
19198 rtx pop, int sibcall)
19200 rtx use = NULL, call;
19202 if (pop == const0_rtx)
19203 pop = NULL;
19204 gcc_assert (!TARGET_64BIT || !pop);
19206 if (TARGET_MACHO && !TARGET_64BIT)
19208 #if TARGET_MACHO
19209 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19210 fnaddr = machopic_indirect_call_target (fnaddr);
19211 #endif
19213 else
19215 /* Static functions and indirect calls don't need the pic register. */
19216 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19217 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19218 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19219 use_reg (&use, pic_offset_table_rtx);
19222 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19224 rtx al = gen_rtx_REG (QImode, AX_REG);
19225 emit_move_insn (al, callarg2);
19226 use_reg (&use, al);
19229 if (ix86_cmodel == CM_LARGE_PIC
19230 && MEM_P (fnaddr)
19231 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19232 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19233 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19234 else if (sibcall
19235 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
19236 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
19238 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19239 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19242 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19243 if (retval)
19244 call = gen_rtx_SET (VOIDmode, retval, call);
19245 if (pop)
19247 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19248 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19249 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19251 if (TARGET_64BIT
19252 && ix86_cfun_abi () == MS_ABI
19253 && (!callarg2 || INTVAL (callarg2) != -2))
19255 /* We need to represent that SI and DI registers are clobbered
19256 by SYSV calls. */
19257 static int clobbered_registers[] = {
19258 XMM6_REG, XMM7_REG, XMM8_REG,
19259 XMM9_REG, XMM10_REG, XMM11_REG,
19260 XMM12_REG, XMM13_REG, XMM14_REG,
19261 XMM15_REG, SI_REG, DI_REG
19263 unsigned int i;
19264 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19265 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19266 UNSPEC_MS_TO_SYSV_CALL);
19268 vec[0] = call;
19269 vec[1] = unspec;
19270 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19271 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19272 ? TImode : DImode,
19273 gen_rtx_REG
19274 (SSE_REGNO_P (clobbered_registers[i])
19275 ? TImode : DImode,
19276 clobbered_registers[i]));
19278 call = gen_rtx_PARALLEL (VOIDmode,
19279 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19280 + 2, vec));
19283 call = emit_call_insn (call);
19284 if (use)
19285 CALL_INSN_FUNCTION_USAGE (call) = use;
19289 /* Clear stack slot assignments remembered from previous functions.
19290 This is called from INIT_EXPANDERS once before RTL is emitted for each
19291 function. */
19293 static struct machine_function *
19294 ix86_init_machine_status (void)
19296 struct machine_function *f;
19298 f = GGC_CNEW (struct machine_function);
19299 f->use_fast_prologue_epilogue_nregs = -1;
19300 f->tls_descriptor_call_expanded_p = 0;
19301 f->call_abi = ix86_abi;
19303 return f;
19306 /* Return a MEM corresponding to a stack slot with mode MODE.
19307 Allocate a new slot if necessary.
19309 The RTL for a function can have several slots available: N is
19310 which slot to use. */
19313 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19315 struct stack_local_entry *s;
19317 gcc_assert (n < MAX_386_STACK_LOCALS);
19319 /* Virtual slot is valid only before vregs are instantiated. */
19320 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19322 for (s = ix86_stack_locals; s; s = s->next)
19323 if (s->mode == mode && s->n == n)
19324 return copy_rtx (s->rtl);
19326 s = (struct stack_local_entry *)
19327 ggc_alloc (sizeof (struct stack_local_entry));
19328 s->n = n;
19329 s->mode = mode;
19330 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19332 s->next = ix86_stack_locals;
19333 ix86_stack_locals = s;
19334 return s->rtl;
19337 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19339 static GTY(()) rtx ix86_tls_symbol;
19341 ix86_tls_get_addr (void)
19344 if (!ix86_tls_symbol)
19346 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19347 (TARGET_ANY_GNU_TLS
19348 && !TARGET_64BIT)
19349 ? "___tls_get_addr"
19350 : "__tls_get_addr");
19353 return ix86_tls_symbol;
19356 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19358 static GTY(()) rtx ix86_tls_module_base_symbol;
19360 ix86_tls_module_base (void)
19363 if (!ix86_tls_module_base_symbol)
19365 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19366 "_TLS_MODULE_BASE_");
19367 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19368 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19371 return ix86_tls_module_base_symbol;
19374 /* Calculate the length of the memory address in the instruction
19375 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19378 memory_address_length (rtx addr)
19380 struct ix86_address parts;
19381 rtx base, index, disp;
19382 int len;
19383 int ok;
19385 if (GET_CODE (addr) == PRE_DEC
19386 || GET_CODE (addr) == POST_INC
19387 || GET_CODE (addr) == PRE_MODIFY
19388 || GET_CODE (addr) == POST_MODIFY)
19389 return 0;
19391 ok = ix86_decompose_address (addr, &parts);
19392 gcc_assert (ok);
19394 if (parts.base && GET_CODE (parts.base) == SUBREG)
19395 parts.base = SUBREG_REG (parts.base);
19396 if (parts.index && GET_CODE (parts.index) == SUBREG)
19397 parts.index = SUBREG_REG (parts.index);
19399 base = parts.base;
19400 index = parts.index;
19401 disp = parts.disp;
19402 len = 0;
19404 /* Rule of thumb:
19405 - esp as the base always wants an index,
19406 - ebp as the base always wants a displacement,
19407 - r12 as the base always wants an index,
19408 - r13 as the base always wants a displacement. */
19410 /* Register Indirect. */
19411 if (base && !index && !disp)
19413 /* esp (for its index) and ebp (for its displacement) need
19414 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19415 code. */
19416 if (REG_P (addr)
19417 && (addr == arg_pointer_rtx
19418 || addr == frame_pointer_rtx
19419 || REGNO (addr) == SP_REG
19420 || REGNO (addr) == BP_REG
19421 || REGNO (addr) == R12_REG
19422 || REGNO (addr) == R13_REG))
19423 len = 1;
19426 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19427 is not disp32, but disp32(%rip), so for disp32
19428 SIB byte is needed, unless print_operand_address
19429 optimizes it into disp32(%rip) or (%rip) is implied
19430 by UNSPEC. */
19431 else if (disp && !base && !index)
19433 len = 4;
19434 if (TARGET_64BIT)
19436 rtx symbol = disp;
19438 if (GET_CODE (disp) == CONST)
19439 symbol = XEXP (disp, 0);
19440 if (GET_CODE (symbol) == PLUS
19441 && CONST_INT_P (XEXP (symbol, 1)))
19442 symbol = XEXP (symbol, 0);
19444 if (GET_CODE (symbol) != LABEL_REF
19445 && (GET_CODE (symbol) != SYMBOL_REF
19446 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19447 && (GET_CODE (symbol) != UNSPEC
19448 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19449 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19450 len += 1;
19454 else
19456 /* Find the length of the displacement constant. */
19457 if (disp)
19459 if (base && satisfies_constraint_K (disp))
19460 len = 1;
19461 else
19462 len = 4;
19464 /* ebp always wants a displacement. Similarly r13. */
19465 else if (base && REG_P (base)
19466 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19467 len = 1;
19469 /* An index requires the two-byte modrm form.... */
19470 if (index
19471 /* ...like esp (or r12), which always wants an index. */
19472 || base == arg_pointer_rtx
19473 || base == frame_pointer_rtx
19474 || (base && REG_P (base)
19475 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19476 len += 1;
19479 switch (parts.seg)
19481 case SEG_FS:
19482 case SEG_GS:
19483 len += 1;
19484 break;
19485 default:
19486 break;
19489 return len;
19492 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19493 is set, expect that insn have 8bit immediate alternative. */
19495 ix86_attr_length_immediate_default (rtx insn, int shortform)
19497 int len = 0;
19498 int i;
19499 extract_insn_cached (insn);
19500 for (i = recog_data.n_operands - 1; i >= 0; --i)
19501 if (CONSTANT_P (recog_data.operand[i]))
19503 enum attr_mode mode = get_attr_mode (insn);
19505 gcc_assert (!len);
19506 if (shortform && CONST_INT_P (recog_data.operand[i]))
19508 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19509 switch (mode)
19511 case MODE_QI:
19512 len = 1;
19513 continue;
19514 case MODE_HI:
19515 ival = trunc_int_for_mode (ival, HImode);
19516 break;
19517 case MODE_SI:
19518 ival = trunc_int_for_mode (ival, SImode);
19519 break;
19520 default:
19521 break;
19523 if (IN_RANGE (ival, -128, 127))
19525 len = 1;
19526 continue;
19529 switch (mode)
19531 case MODE_QI:
19532 len = 1;
19533 break;
19534 case MODE_HI:
19535 len = 2;
19536 break;
19537 case MODE_SI:
19538 len = 4;
19539 break;
19540 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19541 case MODE_DI:
19542 len = 4;
19543 break;
19544 default:
19545 fatal_insn ("unknown insn mode", insn);
19548 return len;
19550 /* Compute default value for "length_address" attribute. */
19552 ix86_attr_length_address_default (rtx insn)
19554 int i;
19556 if (get_attr_type (insn) == TYPE_LEA)
19558 rtx set = PATTERN (insn), addr;
19560 if (GET_CODE (set) == PARALLEL)
19561 set = XVECEXP (set, 0, 0);
19563 gcc_assert (GET_CODE (set) == SET);
19565 addr = SET_SRC (set);
19566 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19568 if (GET_CODE (addr) == ZERO_EXTEND)
19569 addr = XEXP (addr, 0);
19570 if (GET_CODE (addr) == SUBREG)
19571 addr = SUBREG_REG (addr);
19574 return memory_address_length (addr);
19577 extract_insn_cached (insn);
19578 for (i = recog_data.n_operands - 1; i >= 0; --i)
19579 if (MEM_P (recog_data.operand[i]))
19581 constrain_operands_cached (reload_completed);
19582 if (which_alternative != -1)
19584 const char *constraints = recog_data.constraints[i];
19585 int alt = which_alternative;
19587 while (*constraints == '=' || *constraints == '+')
19588 constraints++;
19589 while (alt-- > 0)
19590 while (*constraints++ != ',')
19592 /* Skip ignored operands. */
19593 if (*constraints == 'X')
19594 continue;
19596 return memory_address_length (XEXP (recog_data.operand[i], 0));
19598 return 0;
19601 /* Compute default value for "length_vex" attribute. It includes
19602 2 or 3 byte VEX prefix and 1 opcode byte. */
19605 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19606 int has_vex_w)
19608 int i;
19610 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19611 byte VEX prefix. */
19612 if (!has_0f_opcode || has_vex_w)
19613 return 3 + 1;
19615 /* We can always use 2 byte VEX prefix in 32bit. */
19616 if (!TARGET_64BIT)
19617 return 2 + 1;
19619 extract_insn_cached (insn);
19621 for (i = recog_data.n_operands - 1; i >= 0; --i)
19622 if (REG_P (recog_data.operand[i]))
19624 /* REX.W bit uses 3 byte VEX prefix. */
19625 if (GET_MODE (recog_data.operand[i]) == DImode
19626 && GENERAL_REG_P (recog_data.operand[i]))
19627 return 3 + 1;
19629 else
19631 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19632 if (MEM_P (recog_data.operand[i])
19633 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19634 return 3 + 1;
19637 return 2 + 1;
19640 /* Return the maximum number of instructions a cpu can issue. */
19642 static int
19643 ix86_issue_rate (void)
19645 switch (ix86_tune)
19647 case PROCESSOR_PENTIUM:
19648 case PROCESSOR_ATOM:
19649 case PROCESSOR_K6:
19650 return 2;
19652 case PROCESSOR_PENTIUMPRO:
19653 case PROCESSOR_PENTIUM4:
19654 case PROCESSOR_ATHLON:
19655 case PROCESSOR_K8:
19656 case PROCESSOR_AMDFAM10:
19657 case PROCESSOR_NOCONA:
19658 case PROCESSOR_GENERIC32:
19659 case PROCESSOR_GENERIC64:
19660 return 3;
19662 case PROCESSOR_CORE2:
19663 return 4;
19665 default:
19666 return 1;
19670 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19671 by DEP_INSN and nothing set by DEP_INSN. */
19673 static int
19674 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19676 rtx set, set2;
19678 /* Simplify the test for uninteresting insns. */
19679 if (insn_type != TYPE_SETCC
19680 && insn_type != TYPE_ICMOV
19681 && insn_type != TYPE_FCMOV
19682 && insn_type != TYPE_IBR)
19683 return 0;
19685 if ((set = single_set (dep_insn)) != 0)
19687 set = SET_DEST (set);
19688 set2 = NULL_RTX;
19690 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19691 && XVECLEN (PATTERN (dep_insn), 0) == 2
19692 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19693 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19695 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19696 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19698 else
19699 return 0;
19701 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19702 return 0;
19704 /* This test is true if the dependent insn reads the flags but
19705 not any other potentially set register. */
19706 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19707 return 0;
19709 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19710 return 0;
19712 return 1;
19715 /* Return true iff USE_INSN has a memory address with operands set by
19716 SET_INSN. */
19718 bool
19719 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19721 int i;
19722 extract_insn_cached (use_insn);
19723 for (i = recog_data.n_operands - 1; i >= 0; --i)
19724 if (MEM_P (recog_data.operand[i]))
19726 rtx addr = XEXP (recog_data.operand[i], 0);
19727 return modified_in_p (addr, set_insn) != 0;
19729 return false;
19732 static int
19733 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19735 enum attr_type insn_type, dep_insn_type;
19736 enum attr_memory memory;
19737 rtx set, set2;
19738 int dep_insn_code_number;
19740 /* Anti and output dependencies have zero cost on all CPUs. */
19741 if (REG_NOTE_KIND (link) != 0)
19742 return 0;
19744 dep_insn_code_number = recog_memoized (dep_insn);
19746 /* If we can't recognize the insns, we can't really do anything. */
19747 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19748 return cost;
19750 insn_type = get_attr_type (insn);
19751 dep_insn_type = get_attr_type (dep_insn);
19753 switch (ix86_tune)
19755 case PROCESSOR_PENTIUM:
19756 /* Address Generation Interlock adds a cycle of latency. */
19757 if (insn_type == TYPE_LEA)
19759 rtx addr = PATTERN (insn);
19761 if (GET_CODE (addr) == PARALLEL)
19762 addr = XVECEXP (addr, 0, 0);
19764 gcc_assert (GET_CODE (addr) == SET);
19766 addr = SET_SRC (addr);
19767 if (modified_in_p (addr, dep_insn))
19768 cost += 1;
19770 else if (ix86_agi_dependent (dep_insn, insn))
19771 cost += 1;
19773 /* ??? Compares pair with jump/setcc. */
19774 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19775 cost = 0;
19777 /* Floating point stores require value to be ready one cycle earlier. */
19778 if (insn_type == TYPE_FMOV
19779 && get_attr_memory (insn) == MEMORY_STORE
19780 && !ix86_agi_dependent (dep_insn, insn))
19781 cost += 1;
19782 break;
19784 case PROCESSOR_PENTIUMPRO:
19785 memory = get_attr_memory (insn);
19787 /* INT->FP conversion is expensive. */
19788 if (get_attr_fp_int_src (dep_insn))
19789 cost += 5;
19791 /* There is one cycle extra latency between an FP op and a store. */
19792 if (insn_type == TYPE_FMOV
19793 && (set = single_set (dep_insn)) != NULL_RTX
19794 && (set2 = single_set (insn)) != NULL_RTX
19795 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19796 && MEM_P (SET_DEST (set2)))
19797 cost += 1;
19799 /* Show ability of reorder buffer to hide latency of load by executing
19800 in parallel with previous instruction in case
19801 previous instruction is not needed to compute the address. */
19802 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19803 && !ix86_agi_dependent (dep_insn, insn))
19805 /* Claim moves to take one cycle, as core can issue one load
19806 at time and the next load can start cycle later. */
19807 if (dep_insn_type == TYPE_IMOV
19808 || dep_insn_type == TYPE_FMOV)
19809 cost = 1;
19810 else if (cost > 1)
19811 cost--;
19813 break;
19815 case PROCESSOR_K6:
19816 memory = get_attr_memory (insn);
19818 /* The esp dependency is resolved before the instruction is really
19819 finished. */
19820 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19821 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19822 return 1;
19824 /* INT->FP conversion is expensive. */
19825 if (get_attr_fp_int_src (dep_insn))
19826 cost += 5;
19828 /* Show ability of reorder buffer to hide latency of load by executing
19829 in parallel with previous instruction in case
19830 previous instruction is not needed to compute the address. */
19831 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19832 && !ix86_agi_dependent (dep_insn, insn))
19834 /* Claim moves to take one cycle, as core can issue one load
19835 at time and the next load can start cycle later. */
19836 if (dep_insn_type == TYPE_IMOV
19837 || dep_insn_type == TYPE_FMOV)
19838 cost = 1;
19839 else if (cost > 2)
19840 cost -= 2;
19841 else
19842 cost = 1;
19844 break;
19846 case PROCESSOR_ATHLON:
19847 case PROCESSOR_K8:
19848 case PROCESSOR_AMDFAM10:
19849 case PROCESSOR_ATOM:
19850 case PROCESSOR_GENERIC32:
19851 case PROCESSOR_GENERIC64:
19852 memory = get_attr_memory (insn);
19854 /* Show ability of reorder buffer to hide latency of load by executing
19855 in parallel with previous instruction in case
19856 previous instruction is not needed to compute the address. */
19857 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19858 && !ix86_agi_dependent (dep_insn, insn))
19860 enum attr_unit unit = get_attr_unit (insn);
19861 int loadcost = 3;
19863 /* Because of the difference between the length of integer and
19864 floating unit pipeline preparation stages, the memory operands
19865 for floating point are cheaper.
19867 ??? For Athlon it the difference is most probably 2. */
19868 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19869 loadcost = 3;
19870 else
19871 loadcost = TARGET_ATHLON ? 2 : 0;
19873 if (cost >= loadcost)
19874 cost -= loadcost;
19875 else
19876 cost = 0;
19879 default:
19880 break;
19883 return cost;
19886 /* How many alternative schedules to try. This should be as wide as the
19887 scheduling freedom in the DFA, but no wider. Making this value too
19888 large results extra work for the scheduler. */
19890 static int
19891 ia32_multipass_dfa_lookahead (void)
19893 switch (ix86_tune)
19895 case PROCESSOR_PENTIUM:
19896 return 2;
19898 case PROCESSOR_PENTIUMPRO:
19899 case PROCESSOR_K6:
19900 return 1;
19902 default:
19903 return 0;
19908 /* Compute the alignment given to a constant that is being placed in memory.
19909 EXP is the constant and ALIGN is the alignment that the object would
19910 ordinarily have.
19911 The value of this function is used instead of that alignment to align
19912 the object. */
19915 ix86_constant_alignment (tree exp, int align)
19917 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19918 || TREE_CODE (exp) == INTEGER_CST)
19920 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19921 return 64;
19922 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19923 return 128;
19925 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19926 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19927 return BITS_PER_WORD;
19929 return align;
19932 /* Compute the alignment for a static variable.
19933 TYPE is the data type, and ALIGN is the alignment that
19934 the object would ordinarily have. The value of this function is used
19935 instead of that alignment to align the object. */
19938 ix86_data_alignment (tree type, int align)
19940 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19942 if (AGGREGATE_TYPE_P (type)
19943 && TYPE_SIZE (type)
19944 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19945 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19946 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19947 && align < max_align)
19948 align = max_align;
19950 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19951 to 16byte boundary. */
19952 if (TARGET_64BIT)
19954 if (AGGREGATE_TYPE_P (type)
19955 && TYPE_SIZE (type)
19956 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19957 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19958 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19959 return 128;
19962 if (TREE_CODE (type) == ARRAY_TYPE)
19964 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19965 return 64;
19966 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19967 return 128;
19969 else if (TREE_CODE (type) == COMPLEX_TYPE)
19972 if (TYPE_MODE (type) == DCmode && align < 64)
19973 return 64;
19974 if ((TYPE_MODE (type) == XCmode
19975 || TYPE_MODE (type) == TCmode) && align < 128)
19976 return 128;
19978 else if ((TREE_CODE (type) == RECORD_TYPE
19979 || TREE_CODE (type) == UNION_TYPE
19980 || TREE_CODE (type) == QUAL_UNION_TYPE)
19981 && TYPE_FIELDS (type))
19983 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19984 return 64;
19985 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19986 return 128;
19988 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19989 || TREE_CODE (type) == INTEGER_TYPE)
19991 if (TYPE_MODE (type) == DFmode && align < 64)
19992 return 64;
19993 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19994 return 128;
19997 return align;
20000 /* Compute the alignment for a local variable or a stack slot. EXP is
20001 the data type or decl itself, MODE is the widest mode available and
20002 ALIGN is the alignment that the object would ordinarily have. The
20003 value of this macro is used instead of that alignment to align the
20004 object. */
20006 unsigned int
20007 ix86_local_alignment (tree exp, enum machine_mode mode,
20008 unsigned int align)
20010 tree type, decl;
20012 if (exp && DECL_P (exp))
20014 type = TREE_TYPE (exp);
20015 decl = exp;
20017 else
20019 type = exp;
20020 decl = NULL;
20023 /* Don't do dynamic stack realignment for long long objects with
20024 -mpreferred-stack-boundary=2. */
20025 if (!TARGET_64BIT
20026 && align == 64
20027 && ix86_preferred_stack_boundary < 64
20028 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20029 && (!type || !TYPE_USER_ALIGN (type))
20030 && (!decl || !DECL_USER_ALIGN (decl)))
20031 align = 32;
20033 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20034 register in MODE. We will return the largest alignment of XF
20035 and DF. */
20036 if (!type)
20038 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20039 align = GET_MODE_ALIGNMENT (DFmode);
20040 return align;
20043 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20044 to 16byte boundary. */
20045 if (TARGET_64BIT)
20047 if (AGGREGATE_TYPE_P (type)
20048 && TYPE_SIZE (type)
20049 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20050 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20051 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20052 return 128;
20054 if (TREE_CODE (type) == ARRAY_TYPE)
20056 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20057 return 64;
20058 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20059 return 128;
20061 else if (TREE_CODE (type) == COMPLEX_TYPE)
20063 if (TYPE_MODE (type) == DCmode && align < 64)
20064 return 64;
20065 if ((TYPE_MODE (type) == XCmode
20066 || TYPE_MODE (type) == TCmode) && align < 128)
20067 return 128;
20069 else if ((TREE_CODE (type) == RECORD_TYPE
20070 || TREE_CODE (type) == UNION_TYPE
20071 || TREE_CODE (type) == QUAL_UNION_TYPE)
20072 && TYPE_FIELDS (type))
20074 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20075 return 64;
20076 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20077 return 128;
20079 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20080 || TREE_CODE (type) == INTEGER_TYPE)
20083 if (TYPE_MODE (type) == DFmode && align < 64)
20084 return 64;
20085 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20086 return 128;
20088 return align;
20091 /* Emit RTL insns to initialize the variable parts of a trampoline.
20092 FNADDR is an RTX for the address of the function's pure code.
20093 CXT is an RTX for the static chain value for the function. */
20094 void
20095 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
20097 if (!TARGET_64BIT)
20099 /* Compute offset from the end of the jmp to the target function. */
20100 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
20101 plus_constant (tramp, 10),
20102 NULL_RTX, 1, OPTAB_DIRECT);
20103 emit_move_insn (gen_rtx_MEM (QImode, tramp),
20104 gen_int_mode (0xb9, QImode));
20105 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
20106 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
20107 gen_int_mode (0xe9, QImode));
20108 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
20110 else
20112 int offset = 0;
20113 /* Try to load address using shorter movl instead of movabs.
20114 We may want to support movq for kernel mode, but kernel does not use
20115 trampolines at the moment. */
20116 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20118 fnaddr = copy_to_mode_reg (DImode, fnaddr);
20119 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20120 gen_int_mode (0xbb41, HImode));
20121 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
20122 gen_lowpart (SImode, fnaddr));
20123 offset += 6;
20125 else
20127 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20128 gen_int_mode (0xbb49, HImode));
20129 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20130 fnaddr);
20131 offset += 10;
20133 /* Load static chain using movabs to r10. */
20134 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20135 gen_int_mode (0xba49, HImode));
20136 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20137 cxt);
20138 offset += 10;
20139 /* Jump to the r11 */
20140 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20141 gen_int_mode (0xff49, HImode));
20142 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
20143 gen_int_mode (0xe3, QImode));
20144 offset += 3;
20145 gcc_assert (offset <= TRAMPOLINE_SIZE);
20148 #ifdef ENABLE_EXECUTE_STACK
20149 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20150 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
20151 #endif
20154 /* Codes for all the SSE/MMX builtins. */
20155 enum ix86_builtins
20157 IX86_BUILTIN_ADDPS,
20158 IX86_BUILTIN_ADDSS,
20159 IX86_BUILTIN_DIVPS,
20160 IX86_BUILTIN_DIVSS,
20161 IX86_BUILTIN_MULPS,
20162 IX86_BUILTIN_MULSS,
20163 IX86_BUILTIN_SUBPS,
20164 IX86_BUILTIN_SUBSS,
20166 IX86_BUILTIN_CMPEQPS,
20167 IX86_BUILTIN_CMPLTPS,
20168 IX86_BUILTIN_CMPLEPS,
20169 IX86_BUILTIN_CMPGTPS,
20170 IX86_BUILTIN_CMPGEPS,
20171 IX86_BUILTIN_CMPNEQPS,
20172 IX86_BUILTIN_CMPNLTPS,
20173 IX86_BUILTIN_CMPNLEPS,
20174 IX86_BUILTIN_CMPNGTPS,
20175 IX86_BUILTIN_CMPNGEPS,
20176 IX86_BUILTIN_CMPORDPS,
20177 IX86_BUILTIN_CMPUNORDPS,
20178 IX86_BUILTIN_CMPEQSS,
20179 IX86_BUILTIN_CMPLTSS,
20180 IX86_BUILTIN_CMPLESS,
20181 IX86_BUILTIN_CMPNEQSS,
20182 IX86_BUILTIN_CMPNLTSS,
20183 IX86_BUILTIN_CMPNLESS,
20184 IX86_BUILTIN_CMPNGTSS,
20185 IX86_BUILTIN_CMPNGESS,
20186 IX86_BUILTIN_CMPORDSS,
20187 IX86_BUILTIN_CMPUNORDSS,
20189 IX86_BUILTIN_COMIEQSS,
20190 IX86_BUILTIN_COMILTSS,
20191 IX86_BUILTIN_COMILESS,
20192 IX86_BUILTIN_COMIGTSS,
20193 IX86_BUILTIN_COMIGESS,
20194 IX86_BUILTIN_COMINEQSS,
20195 IX86_BUILTIN_UCOMIEQSS,
20196 IX86_BUILTIN_UCOMILTSS,
20197 IX86_BUILTIN_UCOMILESS,
20198 IX86_BUILTIN_UCOMIGTSS,
20199 IX86_BUILTIN_UCOMIGESS,
20200 IX86_BUILTIN_UCOMINEQSS,
20202 IX86_BUILTIN_CVTPI2PS,
20203 IX86_BUILTIN_CVTPS2PI,
20204 IX86_BUILTIN_CVTSI2SS,
20205 IX86_BUILTIN_CVTSI642SS,
20206 IX86_BUILTIN_CVTSS2SI,
20207 IX86_BUILTIN_CVTSS2SI64,
20208 IX86_BUILTIN_CVTTPS2PI,
20209 IX86_BUILTIN_CVTTSS2SI,
20210 IX86_BUILTIN_CVTTSS2SI64,
20212 IX86_BUILTIN_MAXPS,
20213 IX86_BUILTIN_MAXSS,
20214 IX86_BUILTIN_MINPS,
20215 IX86_BUILTIN_MINSS,
20217 IX86_BUILTIN_LOADUPS,
20218 IX86_BUILTIN_STOREUPS,
20219 IX86_BUILTIN_MOVSS,
20221 IX86_BUILTIN_MOVHLPS,
20222 IX86_BUILTIN_MOVLHPS,
20223 IX86_BUILTIN_LOADHPS,
20224 IX86_BUILTIN_LOADLPS,
20225 IX86_BUILTIN_STOREHPS,
20226 IX86_BUILTIN_STORELPS,
20228 IX86_BUILTIN_MASKMOVQ,
20229 IX86_BUILTIN_MOVMSKPS,
20230 IX86_BUILTIN_PMOVMSKB,
20232 IX86_BUILTIN_MOVNTPS,
20233 IX86_BUILTIN_MOVNTQ,
20235 IX86_BUILTIN_LOADDQU,
20236 IX86_BUILTIN_STOREDQU,
20238 IX86_BUILTIN_PACKSSWB,
20239 IX86_BUILTIN_PACKSSDW,
20240 IX86_BUILTIN_PACKUSWB,
20242 IX86_BUILTIN_PADDB,
20243 IX86_BUILTIN_PADDW,
20244 IX86_BUILTIN_PADDD,
20245 IX86_BUILTIN_PADDQ,
20246 IX86_BUILTIN_PADDSB,
20247 IX86_BUILTIN_PADDSW,
20248 IX86_BUILTIN_PADDUSB,
20249 IX86_BUILTIN_PADDUSW,
20250 IX86_BUILTIN_PSUBB,
20251 IX86_BUILTIN_PSUBW,
20252 IX86_BUILTIN_PSUBD,
20253 IX86_BUILTIN_PSUBQ,
20254 IX86_BUILTIN_PSUBSB,
20255 IX86_BUILTIN_PSUBSW,
20256 IX86_BUILTIN_PSUBUSB,
20257 IX86_BUILTIN_PSUBUSW,
20259 IX86_BUILTIN_PAND,
20260 IX86_BUILTIN_PANDN,
20261 IX86_BUILTIN_POR,
20262 IX86_BUILTIN_PXOR,
20264 IX86_BUILTIN_PAVGB,
20265 IX86_BUILTIN_PAVGW,
20267 IX86_BUILTIN_PCMPEQB,
20268 IX86_BUILTIN_PCMPEQW,
20269 IX86_BUILTIN_PCMPEQD,
20270 IX86_BUILTIN_PCMPGTB,
20271 IX86_BUILTIN_PCMPGTW,
20272 IX86_BUILTIN_PCMPGTD,
20274 IX86_BUILTIN_PMADDWD,
20276 IX86_BUILTIN_PMAXSW,
20277 IX86_BUILTIN_PMAXUB,
20278 IX86_BUILTIN_PMINSW,
20279 IX86_BUILTIN_PMINUB,
20281 IX86_BUILTIN_PMULHUW,
20282 IX86_BUILTIN_PMULHW,
20283 IX86_BUILTIN_PMULLW,
20285 IX86_BUILTIN_PSADBW,
20286 IX86_BUILTIN_PSHUFW,
20288 IX86_BUILTIN_PSLLW,
20289 IX86_BUILTIN_PSLLD,
20290 IX86_BUILTIN_PSLLQ,
20291 IX86_BUILTIN_PSRAW,
20292 IX86_BUILTIN_PSRAD,
20293 IX86_BUILTIN_PSRLW,
20294 IX86_BUILTIN_PSRLD,
20295 IX86_BUILTIN_PSRLQ,
20296 IX86_BUILTIN_PSLLWI,
20297 IX86_BUILTIN_PSLLDI,
20298 IX86_BUILTIN_PSLLQI,
20299 IX86_BUILTIN_PSRAWI,
20300 IX86_BUILTIN_PSRADI,
20301 IX86_BUILTIN_PSRLWI,
20302 IX86_BUILTIN_PSRLDI,
20303 IX86_BUILTIN_PSRLQI,
20305 IX86_BUILTIN_PUNPCKHBW,
20306 IX86_BUILTIN_PUNPCKHWD,
20307 IX86_BUILTIN_PUNPCKHDQ,
20308 IX86_BUILTIN_PUNPCKLBW,
20309 IX86_BUILTIN_PUNPCKLWD,
20310 IX86_BUILTIN_PUNPCKLDQ,
20312 IX86_BUILTIN_SHUFPS,
20314 IX86_BUILTIN_RCPPS,
20315 IX86_BUILTIN_RCPSS,
20316 IX86_BUILTIN_RSQRTPS,
20317 IX86_BUILTIN_RSQRTPS_NR,
20318 IX86_BUILTIN_RSQRTSS,
20319 IX86_BUILTIN_RSQRTF,
20320 IX86_BUILTIN_SQRTPS,
20321 IX86_BUILTIN_SQRTPS_NR,
20322 IX86_BUILTIN_SQRTSS,
20324 IX86_BUILTIN_UNPCKHPS,
20325 IX86_BUILTIN_UNPCKLPS,
20327 IX86_BUILTIN_ANDPS,
20328 IX86_BUILTIN_ANDNPS,
20329 IX86_BUILTIN_ORPS,
20330 IX86_BUILTIN_XORPS,
20332 IX86_BUILTIN_EMMS,
20333 IX86_BUILTIN_LDMXCSR,
20334 IX86_BUILTIN_STMXCSR,
20335 IX86_BUILTIN_SFENCE,
20337 /* 3DNow! Original */
20338 IX86_BUILTIN_FEMMS,
20339 IX86_BUILTIN_PAVGUSB,
20340 IX86_BUILTIN_PF2ID,
20341 IX86_BUILTIN_PFACC,
20342 IX86_BUILTIN_PFADD,
20343 IX86_BUILTIN_PFCMPEQ,
20344 IX86_BUILTIN_PFCMPGE,
20345 IX86_BUILTIN_PFCMPGT,
20346 IX86_BUILTIN_PFMAX,
20347 IX86_BUILTIN_PFMIN,
20348 IX86_BUILTIN_PFMUL,
20349 IX86_BUILTIN_PFRCP,
20350 IX86_BUILTIN_PFRCPIT1,
20351 IX86_BUILTIN_PFRCPIT2,
20352 IX86_BUILTIN_PFRSQIT1,
20353 IX86_BUILTIN_PFRSQRT,
20354 IX86_BUILTIN_PFSUB,
20355 IX86_BUILTIN_PFSUBR,
20356 IX86_BUILTIN_PI2FD,
20357 IX86_BUILTIN_PMULHRW,
20359 /* 3DNow! Athlon Extensions */
20360 IX86_BUILTIN_PF2IW,
20361 IX86_BUILTIN_PFNACC,
20362 IX86_BUILTIN_PFPNACC,
20363 IX86_BUILTIN_PI2FW,
20364 IX86_BUILTIN_PSWAPDSI,
20365 IX86_BUILTIN_PSWAPDSF,
20367 /* SSE2 */
20368 IX86_BUILTIN_ADDPD,
20369 IX86_BUILTIN_ADDSD,
20370 IX86_BUILTIN_DIVPD,
20371 IX86_BUILTIN_DIVSD,
20372 IX86_BUILTIN_MULPD,
20373 IX86_BUILTIN_MULSD,
20374 IX86_BUILTIN_SUBPD,
20375 IX86_BUILTIN_SUBSD,
20377 IX86_BUILTIN_CMPEQPD,
20378 IX86_BUILTIN_CMPLTPD,
20379 IX86_BUILTIN_CMPLEPD,
20380 IX86_BUILTIN_CMPGTPD,
20381 IX86_BUILTIN_CMPGEPD,
20382 IX86_BUILTIN_CMPNEQPD,
20383 IX86_BUILTIN_CMPNLTPD,
20384 IX86_BUILTIN_CMPNLEPD,
20385 IX86_BUILTIN_CMPNGTPD,
20386 IX86_BUILTIN_CMPNGEPD,
20387 IX86_BUILTIN_CMPORDPD,
20388 IX86_BUILTIN_CMPUNORDPD,
20389 IX86_BUILTIN_CMPEQSD,
20390 IX86_BUILTIN_CMPLTSD,
20391 IX86_BUILTIN_CMPLESD,
20392 IX86_BUILTIN_CMPNEQSD,
20393 IX86_BUILTIN_CMPNLTSD,
20394 IX86_BUILTIN_CMPNLESD,
20395 IX86_BUILTIN_CMPORDSD,
20396 IX86_BUILTIN_CMPUNORDSD,
20398 IX86_BUILTIN_COMIEQSD,
20399 IX86_BUILTIN_COMILTSD,
20400 IX86_BUILTIN_COMILESD,
20401 IX86_BUILTIN_COMIGTSD,
20402 IX86_BUILTIN_COMIGESD,
20403 IX86_BUILTIN_COMINEQSD,
20404 IX86_BUILTIN_UCOMIEQSD,
20405 IX86_BUILTIN_UCOMILTSD,
20406 IX86_BUILTIN_UCOMILESD,
20407 IX86_BUILTIN_UCOMIGTSD,
20408 IX86_BUILTIN_UCOMIGESD,
20409 IX86_BUILTIN_UCOMINEQSD,
20411 IX86_BUILTIN_MAXPD,
20412 IX86_BUILTIN_MAXSD,
20413 IX86_BUILTIN_MINPD,
20414 IX86_BUILTIN_MINSD,
20416 IX86_BUILTIN_ANDPD,
20417 IX86_BUILTIN_ANDNPD,
20418 IX86_BUILTIN_ORPD,
20419 IX86_BUILTIN_XORPD,
20421 IX86_BUILTIN_SQRTPD,
20422 IX86_BUILTIN_SQRTSD,
20424 IX86_BUILTIN_UNPCKHPD,
20425 IX86_BUILTIN_UNPCKLPD,
20427 IX86_BUILTIN_SHUFPD,
20429 IX86_BUILTIN_LOADUPD,
20430 IX86_BUILTIN_STOREUPD,
20431 IX86_BUILTIN_MOVSD,
20433 IX86_BUILTIN_LOADHPD,
20434 IX86_BUILTIN_LOADLPD,
20436 IX86_BUILTIN_CVTDQ2PD,
20437 IX86_BUILTIN_CVTDQ2PS,
20439 IX86_BUILTIN_CVTPD2DQ,
20440 IX86_BUILTIN_CVTPD2PI,
20441 IX86_BUILTIN_CVTPD2PS,
20442 IX86_BUILTIN_CVTTPD2DQ,
20443 IX86_BUILTIN_CVTTPD2PI,
20445 IX86_BUILTIN_CVTPI2PD,
20446 IX86_BUILTIN_CVTSI2SD,
20447 IX86_BUILTIN_CVTSI642SD,
20449 IX86_BUILTIN_CVTSD2SI,
20450 IX86_BUILTIN_CVTSD2SI64,
20451 IX86_BUILTIN_CVTSD2SS,
20452 IX86_BUILTIN_CVTSS2SD,
20453 IX86_BUILTIN_CVTTSD2SI,
20454 IX86_BUILTIN_CVTTSD2SI64,
20456 IX86_BUILTIN_CVTPS2DQ,
20457 IX86_BUILTIN_CVTPS2PD,
20458 IX86_BUILTIN_CVTTPS2DQ,
20460 IX86_BUILTIN_MOVNTI,
20461 IX86_BUILTIN_MOVNTPD,
20462 IX86_BUILTIN_MOVNTDQ,
20464 IX86_BUILTIN_MOVQ128,
20466 /* SSE2 MMX */
20467 IX86_BUILTIN_MASKMOVDQU,
20468 IX86_BUILTIN_MOVMSKPD,
20469 IX86_BUILTIN_PMOVMSKB128,
20471 IX86_BUILTIN_PACKSSWB128,
20472 IX86_BUILTIN_PACKSSDW128,
20473 IX86_BUILTIN_PACKUSWB128,
20475 IX86_BUILTIN_PADDB128,
20476 IX86_BUILTIN_PADDW128,
20477 IX86_BUILTIN_PADDD128,
20478 IX86_BUILTIN_PADDQ128,
20479 IX86_BUILTIN_PADDSB128,
20480 IX86_BUILTIN_PADDSW128,
20481 IX86_BUILTIN_PADDUSB128,
20482 IX86_BUILTIN_PADDUSW128,
20483 IX86_BUILTIN_PSUBB128,
20484 IX86_BUILTIN_PSUBW128,
20485 IX86_BUILTIN_PSUBD128,
20486 IX86_BUILTIN_PSUBQ128,
20487 IX86_BUILTIN_PSUBSB128,
20488 IX86_BUILTIN_PSUBSW128,
20489 IX86_BUILTIN_PSUBUSB128,
20490 IX86_BUILTIN_PSUBUSW128,
20492 IX86_BUILTIN_PAND128,
20493 IX86_BUILTIN_PANDN128,
20494 IX86_BUILTIN_POR128,
20495 IX86_BUILTIN_PXOR128,
20497 IX86_BUILTIN_PAVGB128,
20498 IX86_BUILTIN_PAVGW128,
20500 IX86_BUILTIN_PCMPEQB128,
20501 IX86_BUILTIN_PCMPEQW128,
20502 IX86_BUILTIN_PCMPEQD128,
20503 IX86_BUILTIN_PCMPGTB128,
20504 IX86_BUILTIN_PCMPGTW128,
20505 IX86_BUILTIN_PCMPGTD128,
20507 IX86_BUILTIN_PMADDWD128,
20509 IX86_BUILTIN_PMAXSW128,
20510 IX86_BUILTIN_PMAXUB128,
20511 IX86_BUILTIN_PMINSW128,
20512 IX86_BUILTIN_PMINUB128,
20514 IX86_BUILTIN_PMULUDQ,
20515 IX86_BUILTIN_PMULUDQ128,
20516 IX86_BUILTIN_PMULHUW128,
20517 IX86_BUILTIN_PMULHW128,
20518 IX86_BUILTIN_PMULLW128,
20520 IX86_BUILTIN_PSADBW128,
20521 IX86_BUILTIN_PSHUFHW,
20522 IX86_BUILTIN_PSHUFLW,
20523 IX86_BUILTIN_PSHUFD,
20525 IX86_BUILTIN_PSLLDQI128,
20526 IX86_BUILTIN_PSLLWI128,
20527 IX86_BUILTIN_PSLLDI128,
20528 IX86_BUILTIN_PSLLQI128,
20529 IX86_BUILTIN_PSRAWI128,
20530 IX86_BUILTIN_PSRADI128,
20531 IX86_BUILTIN_PSRLDQI128,
20532 IX86_BUILTIN_PSRLWI128,
20533 IX86_BUILTIN_PSRLDI128,
20534 IX86_BUILTIN_PSRLQI128,
20536 IX86_BUILTIN_PSLLDQ128,
20537 IX86_BUILTIN_PSLLW128,
20538 IX86_BUILTIN_PSLLD128,
20539 IX86_BUILTIN_PSLLQ128,
20540 IX86_BUILTIN_PSRAW128,
20541 IX86_BUILTIN_PSRAD128,
20542 IX86_BUILTIN_PSRLW128,
20543 IX86_BUILTIN_PSRLD128,
20544 IX86_BUILTIN_PSRLQ128,
20546 IX86_BUILTIN_PUNPCKHBW128,
20547 IX86_BUILTIN_PUNPCKHWD128,
20548 IX86_BUILTIN_PUNPCKHDQ128,
20549 IX86_BUILTIN_PUNPCKHQDQ128,
20550 IX86_BUILTIN_PUNPCKLBW128,
20551 IX86_BUILTIN_PUNPCKLWD128,
20552 IX86_BUILTIN_PUNPCKLDQ128,
20553 IX86_BUILTIN_PUNPCKLQDQ128,
20555 IX86_BUILTIN_CLFLUSH,
20556 IX86_BUILTIN_MFENCE,
20557 IX86_BUILTIN_LFENCE,
20559 IX86_BUILTIN_BSRSI,
20560 IX86_BUILTIN_BSRDI,
20561 IX86_BUILTIN_RDPMC,
20562 IX86_BUILTIN_RDTSC,
20563 IX86_BUILTIN_RDTSCP,
20564 IX86_BUILTIN_ROLQI,
20565 IX86_BUILTIN_ROLHI,
20566 IX86_BUILTIN_RORQI,
20567 IX86_BUILTIN_RORHI,
20569 /* SSE3. */
20570 IX86_BUILTIN_ADDSUBPS,
20571 IX86_BUILTIN_HADDPS,
20572 IX86_BUILTIN_HSUBPS,
20573 IX86_BUILTIN_MOVSHDUP,
20574 IX86_BUILTIN_MOVSLDUP,
20575 IX86_BUILTIN_ADDSUBPD,
20576 IX86_BUILTIN_HADDPD,
20577 IX86_BUILTIN_HSUBPD,
20578 IX86_BUILTIN_LDDQU,
20580 IX86_BUILTIN_MONITOR,
20581 IX86_BUILTIN_MWAIT,
20583 /* SSSE3. */
20584 IX86_BUILTIN_PHADDW,
20585 IX86_BUILTIN_PHADDD,
20586 IX86_BUILTIN_PHADDSW,
20587 IX86_BUILTIN_PHSUBW,
20588 IX86_BUILTIN_PHSUBD,
20589 IX86_BUILTIN_PHSUBSW,
20590 IX86_BUILTIN_PMADDUBSW,
20591 IX86_BUILTIN_PMULHRSW,
20592 IX86_BUILTIN_PSHUFB,
20593 IX86_BUILTIN_PSIGNB,
20594 IX86_BUILTIN_PSIGNW,
20595 IX86_BUILTIN_PSIGND,
20596 IX86_BUILTIN_PALIGNR,
20597 IX86_BUILTIN_PABSB,
20598 IX86_BUILTIN_PABSW,
20599 IX86_BUILTIN_PABSD,
20601 IX86_BUILTIN_PHADDW128,
20602 IX86_BUILTIN_PHADDD128,
20603 IX86_BUILTIN_PHADDSW128,
20604 IX86_BUILTIN_PHSUBW128,
20605 IX86_BUILTIN_PHSUBD128,
20606 IX86_BUILTIN_PHSUBSW128,
20607 IX86_BUILTIN_PMADDUBSW128,
20608 IX86_BUILTIN_PMULHRSW128,
20609 IX86_BUILTIN_PSHUFB128,
20610 IX86_BUILTIN_PSIGNB128,
20611 IX86_BUILTIN_PSIGNW128,
20612 IX86_BUILTIN_PSIGND128,
20613 IX86_BUILTIN_PALIGNR128,
20614 IX86_BUILTIN_PABSB128,
20615 IX86_BUILTIN_PABSW128,
20616 IX86_BUILTIN_PABSD128,
20618 /* AMDFAM10 - SSE4A New Instructions. */
20619 IX86_BUILTIN_MOVNTSD,
20620 IX86_BUILTIN_MOVNTSS,
20621 IX86_BUILTIN_EXTRQI,
20622 IX86_BUILTIN_EXTRQ,
20623 IX86_BUILTIN_INSERTQI,
20624 IX86_BUILTIN_INSERTQ,
20626 /* SSE4.1. */
20627 IX86_BUILTIN_BLENDPD,
20628 IX86_BUILTIN_BLENDPS,
20629 IX86_BUILTIN_BLENDVPD,
20630 IX86_BUILTIN_BLENDVPS,
20631 IX86_BUILTIN_PBLENDVB128,
20632 IX86_BUILTIN_PBLENDW128,
20634 IX86_BUILTIN_DPPD,
20635 IX86_BUILTIN_DPPS,
20637 IX86_BUILTIN_INSERTPS128,
20639 IX86_BUILTIN_MOVNTDQA,
20640 IX86_BUILTIN_MPSADBW128,
20641 IX86_BUILTIN_PACKUSDW128,
20642 IX86_BUILTIN_PCMPEQQ,
20643 IX86_BUILTIN_PHMINPOSUW128,
20645 IX86_BUILTIN_PMAXSB128,
20646 IX86_BUILTIN_PMAXSD128,
20647 IX86_BUILTIN_PMAXUD128,
20648 IX86_BUILTIN_PMAXUW128,
20650 IX86_BUILTIN_PMINSB128,
20651 IX86_BUILTIN_PMINSD128,
20652 IX86_BUILTIN_PMINUD128,
20653 IX86_BUILTIN_PMINUW128,
20655 IX86_BUILTIN_PMOVSXBW128,
20656 IX86_BUILTIN_PMOVSXBD128,
20657 IX86_BUILTIN_PMOVSXBQ128,
20658 IX86_BUILTIN_PMOVSXWD128,
20659 IX86_BUILTIN_PMOVSXWQ128,
20660 IX86_BUILTIN_PMOVSXDQ128,
20662 IX86_BUILTIN_PMOVZXBW128,
20663 IX86_BUILTIN_PMOVZXBD128,
20664 IX86_BUILTIN_PMOVZXBQ128,
20665 IX86_BUILTIN_PMOVZXWD128,
20666 IX86_BUILTIN_PMOVZXWQ128,
20667 IX86_BUILTIN_PMOVZXDQ128,
20669 IX86_BUILTIN_PMULDQ128,
20670 IX86_BUILTIN_PMULLD128,
20672 IX86_BUILTIN_ROUNDPD,
20673 IX86_BUILTIN_ROUNDPS,
20674 IX86_BUILTIN_ROUNDSD,
20675 IX86_BUILTIN_ROUNDSS,
20677 IX86_BUILTIN_PTESTZ,
20678 IX86_BUILTIN_PTESTC,
20679 IX86_BUILTIN_PTESTNZC,
20681 IX86_BUILTIN_VEC_INIT_V2SI,
20682 IX86_BUILTIN_VEC_INIT_V4HI,
20683 IX86_BUILTIN_VEC_INIT_V8QI,
20684 IX86_BUILTIN_VEC_EXT_V2DF,
20685 IX86_BUILTIN_VEC_EXT_V2DI,
20686 IX86_BUILTIN_VEC_EXT_V4SF,
20687 IX86_BUILTIN_VEC_EXT_V4SI,
20688 IX86_BUILTIN_VEC_EXT_V8HI,
20689 IX86_BUILTIN_VEC_EXT_V2SI,
20690 IX86_BUILTIN_VEC_EXT_V4HI,
20691 IX86_BUILTIN_VEC_EXT_V16QI,
20692 IX86_BUILTIN_VEC_SET_V2DI,
20693 IX86_BUILTIN_VEC_SET_V4SF,
20694 IX86_BUILTIN_VEC_SET_V4SI,
20695 IX86_BUILTIN_VEC_SET_V8HI,
20696 IX86_BUILTIN_VEC_SET_V4HI,
20697 IX86_BUILTIN_VEC_SET_V16QI,
20699 IX86_BUILTIN_VEC_PACK_SFIX,
20701 /* SSE4.2. */
20702 IX86_BUILTIN_CRC32QI,
20703 IX86_BUILTIN_CRC32HI,
20704 IX86_BUILTIN_CRC32SI,
20705 IX86_BUILTIN_CRC32DI,
20707 IX86_BUILTIN_PCMPESTRI128,
20708 IX86_BUILTIN_PCMPESTRM128,
20709 IX86_BUILTIN_PCMPESTRA128,
20710 IX86_BUILTIN_PCMPESTRC128,
20711 IX86_BUILTIN_PCMPESTRO128,
20712 IX86_BUILTIN_PCMPESTRS128,
20713 IX86_BUILTIN_PCMPESTRZ128,
20714 IX86_BUILTIN_PCMPISTRI128,
20715 IX86_BUILTIN_PCMPISTRM128,
20716 IX86_BUILTIN_PCMPISTRA128,
20717 IX86_BUILTIN_PCMPISTRC128,
20718 IX86_BUILTIN_PCMPISTRO128,
20719 IX86_BUILTIN_PCMPISTRS128,
20720 IX86_BUILTIN_PCMPISTRZ128,
20722 IX86_BUILTIN_PCMPGTQ,
20724 /* AES instructions */
20725 IX86_BUILTIN_AESENC128,
20726 IX86_BUILTIN_AESENCLAST128,
20727 IX86_BUILTIN_AESDEC128,
20728 IX86_BUILTIN_AESDECLAST128,
20729 IX86_BUILTIN_AESIMC128,
20730 IX86_BUILTIN_AESKEYGENASSIST128,
20732 /* PCLMUL instruction */
20733 IX86_BUILTIN_PCLMULQDQ128,
20735 /* AVX */
20736 IX86_BUILTIN_ADDPD256,
20737 IX86_BUILTIN_ADDPS256,
20738 IX86_BUILTIN_ADDSUBPD256,
20739 IX86_BUILTIN_ADDSUBPS256,
20740 IX86_BUILTIN_ANDPD256,
20741 IX86_BUILTIN_ANDPS256,
20742 IX86_BUILTIN_ANDNPD256,
20743 IX86_BUILTIN_ANDNPS256,
20744 IX86_BUILTIN_BLENDPD256,
20745 IX86_BUILTIN_BLENDPS256,
20746 IX86_BUILTIN_BLENDVPD256,
20747 IX86_BUILTIN_BLENDVPS256,
20748 IX86_BUILTIN_DIVPD256,
20749 IX86_BUILTIN_DIVPS256,
20750 IX86_BUILTIN_DPPS256,
20751 IX86_BUILTIN_HADDPD256,
20752 IX86_BUILTIN_HADDPS256,
20753 IX86_BUILTIN_HSUBPD256,
20754 IX86_BUILTIN_HSUBPS256,
20755 IX86_BUILTIN_MAXPD256,
20756 IX86_BUILTIN_MAXPS256,
20757 IX86_BUILTIN_MINPD256,
20758 IX86_BUILTIN_MINPS256,
20759 IX86_BUILTIN_MULPD256,
20760 IX86_BUILTIN_MULPS256,
20761 IX86_BUILTIN_ORPD256,
20762 IX86_BUILTIN_ORPS256,
20763 IX86_BUILTIN_SHUFPD256,
20764 IX86_BUILTIN_SHUFPS256,
20765 IX86_BUILTIN_SUBPD256,
20766 IX86_BUILTIN_SUBPS256,
20767 IX86_BUILTIN_XORPD256,
20768 IX86_BUILTIN_XORPS256,
20769 IX86_BUILTIN_CMPSD,
20770 IX86_BUILTIN_CMPSS,
20771 IX86_BUILTIN_CMPPD,
20772 IX86_BUILTIN_CMPPS,
20773 IX86_BUILTIN_CMPPD256,
20774 IX86_BUILTIN_CMPPS256,
20775 IX86_BUILTIN_CVTDQ2PD256,
20776 IX86_BUILTIN_CVTDQ2PS256,
20777 IX86_BUILTIN_CVTPD2PS256,
20778 IX86_BUILTIN_CVTPS2DQ256,
20779 IX86_BUILTIN_CVTPS2PD256,
20780 IX86_BUILTIN_CVTTPD2DQ256,
20781 IX86_BUILTIN_CVTPD2DQ256,
20782 IX86_BUILTIN_CVTTPS2DQ256,
20783 IX86_BUILTIN_EXTRACTF128PD256,
20784 IX86_BUILTIN_EXTRACTF128PS256,
20785 IX86_BUILTIN_EXTRACTF128SI256,
20786 IX86_BUILTIN_VZEROALL,
20787 IX86_BUILTIN_VZEROUPPER,
20788 IX86_BUILTIN_VZEROUPPER_REX64,
20789 IX86_BUILTIN_VPERMILVARPD,
20790 IX86_BUILTIN_VPERMILVARPS,
20791 IX86_BUILTIN_VPERMILVARPD256,
20792 IX86_BUILTIN_VPERMILVARPS256,
20793 IX86_BUILTIN_VPERMILPD,
20794 IX86_BUILTIN_VPERMILPS,
20795 IX86_BUILTIN_VPERMILPD256,
20796 IX86_BUILTIN_VPERMILPS256,
20797 IX86_BUILTIN_VPERM2F128PD256,
20798 IX86_BUILTIN_VPERM2F128PS256,
20799 IX86_BUILTIN_VPERM2F128SI256,
20800 IX86_BUILTIN_VBROADCASTSS,
20801 IX86_BUILTIN_VBROADCASTSD256,
20802 IX86_BUILTIN_VBROADCASTSS256,
20803 IX86_BUILTIN_VBROADCASTPD256,
20804 IX86_BUILTIN_VBROADCASTPS256,
20805 IX86_BUILTIN_VINSERTF128PD256,
20806 IX86_BUILTIN_VINSERTF128PS256,
20807 IX86_BUILTIN_VINSERTF128SI256,
20808 IX86_BUILTIN_LOADUPD256,
20809 IX86_BUILTIN_LOADUPS256,
20810 IX86_BUILTIN_STOREUPD256,
20811 IX86_BUILTIN_STOREUPS256,
20812 IX86_BUILTIN_LDDQU256,
20813 IX86_BUILTIN_MOVNTDQ256,
20814 IX86_BUILTIN_MOVNTPD256,
20815 IX86_BUILTIN_MOVNTPS256,
20816 IX86_BUILTIN_LOADDQU256,
20817 IX86_BUILTIN_STOREDQU256,
20818 IX86_BUILTIN_MASKLOADPD,
20819 IX86_BUILTIN_MASKLOADPS,
20820 IX86_BUILTIN_MASKSTOREPD,
20821 IX86_BUILTIN_MASKSTOREPS,
20822 IX86_BUILTIN_MASKLOADPD256,
20823 IX86_BUILTIN_MASKLOADPS256,
20824 IX86_BUILTIN_MASKSTOREPD256,
20825 IX86_BUILTIN_MASKSTOREPS256,
20826 IX86_BUILTIN_MOVSHDUP256,
20827 IX86_BUILTIN_MOVSLDUP256,
20828 IX86_BUILTIN_MOVDDUP256,
20830 IX86_BUILTIN_SQRTPD256,
20831 IX86_BUILTIN_SQRTPS256,
20832 IX86_BUILTIN_SQRTPS_NR256,
20833 IX86_BUILTIN_RSQRTPS256,
20834 IX86_BUILTIN_RSQRTPS_NR256,
20836 IX86_BUILTIN_RCPPS256,
20838 IX86_BUILTIN_ROUNDPD256,
20839 IX86_BUILTIN_ROUNDPS256,
20841 IX86_BUILTIN_UNPCKHPD256,
20842 IX86_BUILTIN_UNPCKLPD256,
20843 IX86_BUILTIN_UNPCKHPS256,
20844 IX86_BUILTIN_UNPCKLPS256,
20846 IX86_BUILTIN_SI256_SI,
20847 IX86_BUILTIN_PS256_PS,
20848 IX86_BUILTIN_PD256_PD,
20849 IX86_BUILTIN_SI_SI256,
20850 IX86_BUILTIN_PS_PS256,
20851 IX86_BUILTIN_PD_PD256,
20853 IX86_BUILTIN_VTESTZPD,
20854 IX86_BUILTIN_VTESTCPD,
20855 IX86_BUILTIN_VTESTNZCPD,
20856 IX86_BUILTIN_VTESTZPS,
20857 IX86_BUILTIN_VTESTCPS,
20858 IX86_BUILTIN_VTESTNZCPS,
20859 IX86_BUILTIN_VTESTZPD256,
20860 IX86_BUILTIN_VTESTCPD256,
20861 IX86_BUILTIN_VTESTNZCPD256,
20862 IX86_BUILTIN_VTESTZPS256,
20863 IX86_BUILTIN_VTESTCPS256,
20864 IX86_BUILTIN_VTESTNZCPS256,
20865 IX86_BUILTIN_PTESTZ256,
20866 IX86_BUILTIN_PTESTC256,
20867 IX86_BUILTIN_PTESTNZC256,
20869 IX86_BUILTIN_MOVMSKPD256,
20870 IX86_BUILTIN_MOVMSKPS256,
20872 /* TFmode support builtins. */
20873 IX86_BUILTIN_INFQ,
20874 IX86_BUILTIN_HUGE_VALQ,
20875 IX86_BUILTIN_FABSQ,
20876 IX86_BUILTIN_COPYSIGNQ,
20878 /* SSE5 instructions */
20879 IX86_BUILTIN_FMADDSS,
20880 IX86_BUILTIN_FMADDSD,
20881 IX86_BUILTIN_FMADDPS,
20882 IX86_BUILTIN_FMADDPD,
20883 IX86_BUILTIN_FMSUBSS,
20884 IX86_BUILTIN_FMSUBSD,
20885 IX86_BUILTIN_FMSUBPS,
20886 IX86_BUILTIN_FMSUBPD,
20887 IX86_BUILTIN_FNMADDSS,
20888 IX86_BUILTIN_FNMADDSD,
20889 IX86_BUILTIN_FNMADDPS,
20890 IX86_BUILTIN_FNMADDPD,
20891 IX86_BUILTIN_FNMSUBSS,
20892 IX86_BUILTIN_FNMSUBSD,
20893 IX86_BUILTIN_FNMSUBPS,
20894 IX86_BUILTIN_FNMSUBPD,
20895 IX86_BUILTIN_PCMOV,
20896 IX86_BUILTIN_PCMOV_V2DI,
20897 IX86_BUILTIN_PCMOV_V4SI,
20898 IX86_BUILTIN_PCMOV_V8HI,
20899 IX86_BUILTIN_PCMOV_V16QI,
20900 IX86_BUILTIN_PCMOV_V4SF,
20901 IX86_BUILTIN_PCMOV_V2DF,
20902 IX86_BUILTIN_PPERM,
20903 IX86_BUILTIN_PERMPS,
20904 IX86_BUILTIN_PERMPD,
20905 IX86_BUILTIN_PMACSSWW,
20906 IX86_BUILTIN_PMACSWW,
20907 IX86_BUILTIN_PMACSSWD,
20908 IX86_BUILTIN_PMACSWD,
20909 IX86_BUILTIN_PMACSSDD,
20910 IX86_BUILTIN_PMACSDD,
20911 IX86_BUILTIN_PMACSSDQL,
20912 IX86_BUILTIN_PMACSSDQH,
20913 IX86_BUILTIN_PMACSDQL,
20914 IX86_BUILTIN_PMACSDQH,
20915 IX86_BUILTIN_PMADCSSWD,
20916 IX86_BUILTIN_PMADCSWD,
20917 IX86_BUILTIN_PHADDBW,
20918 IX86_BUILTIN_PHADDBD,
20919 IX86_BUILTIN_PHADDBQ,
20920 IX86_BUILTIN_PHADDWD,
20921 IX86_BUILTIN_PHADDWQ,
20922 IX86_BUILTIN_PHADDDQ,
20923 IX86_BUILTIN_PHADDUBW,
20924 IX86_BUILTIN_PHADDUBD,
20925 IX86_BUILTIN_PHADDUBQ,
20926 IX86_BUILTIN_PHADDUWD,
20927 IX86_BUILTIN_PHADDUWQ,
20928 IX86_BUILTIN_PHADDUDQ,
20929 IX86_BUILTIN_PHSUBBW,
20930 IX86_BUILTIN_PHSUBWD,
20931 IX86_BUILTIN_PHSUBDQ,
20932 IX86_BUILTIN_PROTB,
20933 IX86_BUILTIN_PROTW,
20934 IX86_BUILTIN_PROTD,
20935 IX86_BUILTIN_PROTQ,
20936 IX86_BUILTIN_PROTB_IMM,
20937 IX86_BUILTIN_PROTW_IMM,
20938 IX86_BUILTIN_PROTD_IMM,
20939 IX86_BUILTIN_PROTQ_IMM,
20940 IX86_BUILTIN_PSHLB,
20941 IX86_BUILTIN_PSHLW,
20942 IX86_BUILTIN_PSHLD,
20943 IX86_BUILTIN_PSHLQ,
20944 IX86_BUILTIN_PSHAB,
20945 IX86_BUILTIN_PSHAW,
20946 IX86_BUILTIN_PSHAD,
20947 IX86_BUILTIN_PSHAQ,
20948 IX86_BUILTIN_FRCZSS,
20949 IX86_BUILTIN_FRCZSD,
20950 IX86_BUILTIN_FRCZPS,
20951 IX86_BUILTIN_FRCZPD,
20952 IX86_BUILTIN_CVTPH2PS,
20953 IX86_BUILTIN_CVTPS2PH,
20955 IX86_BUILTIN_COMEQSS,
20956 IX86_BUILTIN_COMNESS,
20957 IX86_BUILTIN_COMLTSS,
20958 IX86_BUILTIN_COMLESS,
20959 IX86_BUILTIN_COMGTSS,
20960 IX86_BUILTIN_COMGESS,
20961 IX86_BUILTIN_COMUEQSS,
20962 IX86_BUILTIN_COMUNESS,
20963 IX86_BUILTIN_COMULTSS,
20964 IX86_BUILTIN_COMULESS,
20965 IX86_BUILTIN_COMUGTSS,
20966 IX86_BUILTIN_COMUGESS,
20967 IX86_BUILTIN_COMORDSS,
20968 IX86_BUILTIN_COMUNORDSS,
20969 IX86_BUILTIN_COMFALSESS,
20970 IX86_BUILTIN_COMTRUESS,
20972 IX86_BUILTIN_COMEQSD,
20973 IX86_BUILTIN_COMNESD,
20974 IX86_BUILTIN_COMLTSD,
20975 IX86_BUILTIN_COMLESD,
20976 IX86_BUILTIN_COMGTSD,
20977 IX86_BUILTIN_COMGESD,
20978 IX86_BUILTIN_COMUEQSD,
20979 IX86_BUILTIN_COMUNESD,
20980 IX86_BUILTIN_COMULTSD,
20981 IX86_BUILTIN_COMULESD,
20982 IX86_BUILTIN_COMUGTSD,
20983 IX86_BUILTIN_COMUGESD,
20984 IX86_BUILTIN_COMORDSD,
20985 IX86_BUILTIN_COMUNORDSD,
20986 IX86_BUILTIN_COMFALSESD,
20987 IX86_BUILTIN_COMTRUESD,
20989 IX86_BUILTIN_COMEQPS,
20990 IX86_BUILTIN_COMNEPS,
20991 IX86_BUILTIN_COMLTPS,
20992 IX86_BUILTIN_COMLEPS,
20993 IX86_BUILTIN_COMGTPS,
20994 IX86_BUILTIN_COMGEPS,
20995 IX86_BUILTIN_COMUEQPS,
20996 IX86_BUILTIN_COMUNEPS,
20997 IX86_BUILTIN_COMULTPS,
20998 IX86_BUILTIN_COMULEPS,
20999 IX86_BUILTIN_COMUGTPS,
21000 IX86_BUILTIN_COMUGEPS,
21001 IX86_BUILTIN_COMORDPS,
21002 IX86_BUILTIN_COMUNORDPS,
21003 IX86_BUILTIN_COMFALSEPS,
21004 IX86_BUILTIN_COMTRUEPS,
21006 IX86_BUILTIN_COMEQPD,
21007 IX86_BUILTIN_COMNEPD,
21008 IX86_BUILTIN_COMLTPD,
21009 IX86_BUILTIN_COMLEPD,
21010 IX86_BUILTIN_COMGTPD,
21011 IX86_BUILTIN_COMGEPD,
21012 IX86_BUILTIN_COMUEQPD,
21013 IX86_BUILTIN_COMUNEPD,
21014 IX86_BUILTIN_COMULTPD,
21015 IX86_BUILTIN_COMULEPD,
21016 IX86_BUILTIN_COMUGTPD,
21017 IX86_BUILTIN_COMUGEPD,
21018 IX86_BUILTIN_COMORDPD,
21019 IX86_BUILTIN_COMUNORDPD,
21020 IX86_BUILTIN_COMFALSEPD,
21021 IX86_BUILTIN_COMTRUEPD,
21023 IX86_BUILTIN_PCOMEQUB,
21024 IX86_BUILTIN_PCOMNEUB,
21025 IX86_BUILTIN_PCOMLTUB,
21026 IX86_BUILTIN_PCOMLEUB,
21027 IX86_BUILTIN_PCOMGTUB,
21028 IX86_BUILTIN_PCOMGEUB,
21029 IX86_BUILTIN_PCOMFALSEUB,
21030 IX86_BUILTIN_PCOMTRUEUB,
21031 IX86_BUILTIN_PCOMEQUW,
21032 IX86_BUILTIN_PCOMNEUW,
21033 IX86_BUILTIN_PCOMLTUW,
21034 IX86_BUILTIN_PCOMLEUW,
21035 IX86_BUILTIN_PCOMGTUW,
21036 IX86_BUILTIN_PCOMGEUW,
21037 IX86_BUILTIN_PCOMFALSEUW,
21038 IX86_BUILTIN_PCOMTRUEUW,
21039 IX86_BUILTIN_PCOMEQUD,
21040 IX86_BUILTIN_PCOMNEUD,
21041 IX86_BUILTIN_PCOMLTUD,
21042 IX86_BUILTIN_PCOMLEUD,
21043 IX86_BUILTIN_PCOMGTUD,
21044 IX86_BUILTIN_PCOMGEUD,
21045 IX86_BUILTIN_PCOMFALSEUD,
21046 IX86_BUILTIN_PCOMTRUEUD,
21047 IX86_BUILTIN_PCOMEQUQ,
21048 IX86_BUILTIN_PCOMNEUQ,
21049 IX86_BUILTIN_PCOMLTUQ,
21050 IX86_BUILTIN_PCOMLEUQ,
21051 IX86_BUILTIN_PCOMGTUQ,
21052 IX86_BUILTIN_PCOMGEUQ,
21053 IX86_BUILTIN_PCOMFALSEUQ,
21054 IX86_BUILTIN_PCOMTRUEUQ,
21056 IX86_BUILTIN_PCOMEQB,
21057 IX86_BUILTIN_PCOMNEB,
21058 IX86_BUILTIN_PCOMLTB,
21059 IX86_BUILTIN_PCOMLEB,
21060 IX86_BUILTIN_PCOMGTB,
21061 IX86_BUILTIN_PCOMGEB,
21062 IX86_BUILTIN_PCOMFALSEB,
21063 IX86_BUILTIN_PCOMTRUEB,
21064 IX86_BUILTIN_PCOMEQW,
21065 IX86_BUILTIN_PCOMNEW,
21066 IX86_BUILTIN_PCOMLTW,
21067 IX86_BUILTIN_PCOMLEW,
21068 IX86_BUILTIN_PCOMGTW,
21069 IX86_BUILTIN_PCOMGEW,
21070 IX86_BUILTIN_PCOMFALSEW,
21071 IX86_BUILTIN_PCOMTRUEW,
21072 IX86_BUILTIN_PCOMEQD,
21073 IX86_BUILTIN_PCOMNED,
21074 IX86_BUILTIN_PCOMLTD,
21075 IX86_BUILTIN_PCOMLED,
21076 IX86_BUILTIN_PCOMGTD,
21077 IX86_BUILTIN_PCOMGED,
21078 IX86_BUILTIN_PCOMFALSED,
21079 IX86_BUILTIN_PCOMTRUED,
21080 IX86_BUILTIN_PCOMEQQ,
21081 IX86_BUILTIN_PCOMNEQ,
21082 IX86_BUILTIN_PCOMLTQ,
21083 IX86_BUILTIN_PCOMLEQ,
21084 IX86_BUILTIN_PCOMGTQ,
21085 IX86_BUILTIN_PCOMGEQ,
21086 IX86_BUILTIN_PCOMFALSEQ,
21087 IX86_BUILTIN_PCOMTRUEQ,
21089 IX86_BUILTIN_MAX
21092 /* Table for the ix86 builtin decls. */
21093 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21095 /* Table of all of the builtin functions that are possible with different ISA's
21096 but are waiting to be built until a function is declared to use that
21097 ISA. */
21098 struct GTY(()) builtin_isa {
21099 tree type; /* builtin type to use in the declaration */
21100 const char *name; /* function name */
21101 int isa; /* isa_flags this builtin is defined for */
21102 bool const_p; /* true if the declaration is constant */
21105 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21108 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21109 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
21110 * function decl in the ix86_builtins array. Returns the function decl or
21111 * NULL_TREE, if the builtin was not added.
21113 * If the front end has a special hook for builtin functions, delay adding
21114 * builtin functions that aren't in the current ISA until the ISA is changed
21115 * with function specific optimization. Doing so, can save about 300K for the
21116 * default compiler. When the builtin is expanded, check at that time whether
21117 * it is valid.
21119 * If the front end doesn't have a special hook, record all builtins, even if
21120 * it isn't an instruction set in the current ISA in case the user uses
21121 * function specific options for a different ISA, so that we don't get scope
21122 * errors if a builtin is added in the middle of a function scope. */
21124 static inline tree
21125 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
21127 tree decl = NULL_TREE;
21129 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21131 ix86_builtins_isa[(int) code].isa = mask;
21133 if ((mask & ix86_isa_flags) != 0
21134 || (lang_hooks.builtin_function
21135 == lang_hooks.builtin_function_ext_scope))
21138 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
21139 NULL_TREE);
21140 ix86_builtins[(int) code] = decl;
21141 ix86_builtins_isa[(int) code].type = NULL_TREE;
21143 else
21145 ix86_builtins[(int) code] = NULL_TREE;
21146 ix86_builtins_isa[(int) code].const_p = false;
21147 ix86_builtins_isa[(int) code].type = type;
21148 ix86_builtins_isa[(int) code].name = name;
21152 return decl;
21155 /* Like def_builtin, but also marks the function decl "const". */
21157 static inline tree
21158 def_builtin_const (int mask, const char *name, tree type,
21159 enum ix86_builtins code)
21161 tree decl = def_builtin (mask, name, type, code);
21162 if (decl)
21163 TREE_READONLY (decl) = 1;
21164 else
21165 ix86_builtins_isa[(int) code].const_p = true;
21167 return decl;
21170 /* Add any new builtin functions for a given ISA that may not have been
21171 declared. This saves a bit of space compared to adding all of the
21172 declarations to the tree, even if we didn't use them. */
21174 static void
21175 ix86_add_new_builtins (int isa)
21177 int i;
21178 tree decl;
21180 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21182 if ((ix86_builtins_isa[i].isa & isa) != 0
21183 && ix86_builtins_isa[i].type != NULL_TREE)
21185 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21186 ix86_builtins_isa[i].type,
21187 i, BUILT_IN_MD, NULL,
21188 NULL_TREE);
21190 ix86_builtins[i] = decl;
21191 ix86_builtins_isa[i].type = NULL_TREE;
21192 if (ix86_builtins_isa[i].const_p)
21193 TREE_READONLY (decl) = 1;
21198 /* Bits for builtin_description.flag. */
21200 /* Set when we don't support the comparison natively, and should
21201 swap_comparison in order to support it. */
21202 #define BUILTIN_DESC_SWAP_OPERANDS 1
21204 struct builtin_description
21206 const unsigned int mask;
21207 const enum insn_code icode;
21208 const char *const name;
21209 const enum ix86_builtins code;
21210 const enum rtx_code comparison;
21211 const int flag;
21214 static const struct builtin_description bdesc_comi[] =
21216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21242 static const struct builtin_description bdesc_pcmpestr[] =
21244 /* SSE4.2 */
21245 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21246 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21247 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21248 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21249 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21250 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21251 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21254 static const struct builtin_description bdesc_pcmpistr[] =
21256 /* SSE4.2 */
21257 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21258 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21259 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21260 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21261 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21262 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21263 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21266 /* Special builtin types */
21267 enum ix86_special_builtin_type
21269 SPECIAL_FTYPE_UNKNOWN,
21270 VOID_FTYPE_VOID,
21271 UINT64_FTYPE_VOID,
21272 UINT64_FTYPE_PUNSIGNED,
21273 V32QI_FTYPE_PCCHAR,
21274 V16QI_FTYPE_PCCHAR,
21275 V8SF_FTYPE_PCV4SF,
21276 V8SF_FTYPE_PCFLOAT,
21277 V4DF_FTYPE_PCV2DF,
21278 V4DF_FTYPE_PCDOUBLE,
21279 V4SF_FTYPE_PCFLOAT,
21280 V2DF_FTYPE_PCDOUBLE,
21281 V8SF_FTYPE_PCV8SF_V8SF,
21282 V4DF_FTYPE_PCV4DF_V4DF,
21283 V4SF_FTYPE_V4SF_PCV2SF,
21284 V4SF_FTYPE_PCV4SF_V4SF,
21285 V2DF_FTYPE_V2DF_PCDOUBLE,
21286 V2DF_FTYPE_PCV2DF_V2DF,
21287 V2DI_FTYPE_PV2DI,
21288 VOID_FTYPE_PV2SF_V4SF,
21289 VOID_FTYPE_PV4DI_V4DI,
21290 VOID_FTYPE_PV2DI_V2DI,
21291 VOID_FTYPE_PCHAR_V32QI,
21292 VOID_FTYPE_PCHAR_V16QI,
21293 VOID_FTYPE_PFLOAT_V8SF,
21294 VOID_FTYPE_PFLOAT_V4SF,
21295 VOID_FTYPE_PDOUBLE_V4DF,
21296 VOID_FTYPE_PDOUBLE_V2DF,
21297 VOID_FTYPE_PDI_DI,
21298 VOID_FTYPE_PINT_INT,
21299 VOID_FTYPE_PV8SF_V8SF_V8SF,
21300 VOID_FTYPE_PV4DF_V4DF_V4DF,
21301 VOID_FTYPE_PV4SF_V4SF_V4SF,
21302 VOID_FTYPE_PV2DF_V2DF_V2DF
21305 /* Builtin types */
21306 enum ix86_builtin_type
21308 FTYPE_UNKNOWN,
21309 FLOAT128_FTYPE_FLOAT128,
21310 FLOAT_FTYPE_FLOAT,
21311 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21312 INT_FTYPE_V8SF_V8SF_PTEST,
21313 INT_FTYPE_V4DI_V4DI_PTEST,
21314 INT_FTYPE_V4DF_V4DF_PTEST,
21315 INT_FTYPE_V4SF_V4SF_PTEST,
21316 INT_FTYPE_V2DI_V2DI_PTEST,
21317 INT_FTYPE_V2DF_V2DF_PTEST,
21318 INT_FTYPE_INT,
21319 UINT64_FTYPE_INT,
21320 INT64_FTYPE_INT64,
21321 INT64_FTYPE_V4SF,
21322 INT64_FTYPE_V2DF,
21323 INT_FTYPE_V16QI,
21324 INT_FTYPE_V8QI,
21325 INT_FTYPE_V8SF,
21326 INT_FTYPE_V4DF,
21327 INT_FTYPE_V4SF,
21328 INT_FTYPE_V2DF,
21329 V16QI_FTYPE_V16QI,
21330 V8SI_FTYPE_V8SF,
21331 V8SI_FTYPE_V4SI,
21332 V8HI_FTYPE_V8HI,
21333 V8HI_FTYPE_V16QI,
21334 V8QI_FTYPE_V8QI,
21335 V8SF_FTYPE_V8SF,
21336 V8SF_FTYPE_V8SI,
21337 V8SF_FTYPE_V4SF,
21338 V4SI_FTYPE_V4SI,
21339 V4SI_FTYPE_V16QI,
21340 V4SI_FTYPE_V8SI,
21341 V4SI_FTYPE_V8HI,
21342 V4SI_FTYPE_V4DF,
21343 V4SI_FTYPE_V4SF,
21344 V4SI_FTYPE_V2DF,
21345 V4HI_FTYPE_V4HI,
21346 V4DF_FTYPE_V4DF,
21347 V4DF_FTYPE_V4SI,
21348 V4DF_FTYPE_V4SF,
21349 V4DF_FTYPE_V2DF,
21350 V4SF_FTYPE_V4DF,
21351 V4SF_FTYPE_V4SF,
21352 V4SF_FTYPE_V4SF_VEC_MERGE,
21353 V4SF_FTYPE_V8SF,
21354 V4SF_FTYPE_V4SI,
21355 V4SF_FTYPE_V2DF,
21356 V2DI_FTYPE_V2DI,
21357 V2DI_FTYPE_V16QI,
21358 V2DI_FTYPE_V8HI,
21359 V2DI_FTYPE_V4SI,
21360 V2DF_FTYPE_V2DF,
21361 V2DF_FTYPE_V2DF_VEC_MERGE,
21362 V2DF_FTYPE_V4SI,
21363 V2DF_FTYPE_V4DF,
21364 V2DF_FTYPE_V4SF,
21365 V2DF_FTYPE_V2SI,
21366 V2SI_FTYPE_V2SI,
21367 V2SI_FTYPE_V4SF,
21368 V2SI_FTYPE_V2SF,
21369 V2SI_FTYPE_V2DF,
21370 V2SF_FTYPE_V2SF,
21371 V2SF_FTYPE_V2SI,
21372 V16QI_FTYPE_V16QI_V16QI,
21373 V16QI_FTYPE_V8HI_V8HI,
21374 V8QI_FTYPE_V8QI_V8QI,
21375 V8QI_FTYPE_V4HI_V4HI,
21376 V8HI_FTYPE_V8HI_V8HI,
21377 V8HI_FTYPE_V8HI_V8HI_COUNT,
21378 V8HI_FTYPE_V16QI_V16QI,
21379 V8HI_FTYPE_V4SI_V4SI,
21380 V8HI_FTYPE_V8HI_SI_COUNT,
21381 V8SF_FTYPE_V8SF_V8SF,
21382 V8SF_FTYPE_V8SF_V8SI,
21383 V4SI_FTYPE_V4SI_V4SI,
21384 V4SI_FTYPE_V4SI_V4SI_COUNT,
21385 V4SI_FTYPE_V8HI_V8HI,
21386 V4SI_FTYPE_V4SF_V4SF,
21387 V4SI_FTYPE_V2DF_V2DF,
21388 V4SI_FTYPE_V4SI_SI_COUNT,
21389 V4HI_FTYPE_V4HI_V4HI,
21390 V4HI_FTYPE_V4HI_V4HI_COUNT,
21391 V4HI_FTYPE_V8QI_V8QI,
21392 V4HI_FTYPE_V2SI_V2SI,
21393 V4HI_FTYPE_V4HI_SI_COUNT,
21394 V4DF_FTYPE_V4DF_V4DF,
21395 V4DF_FTYPE_V4DF_V4DI,
21396 V4SF_FTYPE_V4SF_V4SF,
21397 V4SF_FTYPE_V4SF_V4SF_SWAP,
21398 V4SF_FTYPE_V4SF_V4SI,
21399 V4SF_FTYPE_V4SF_V2SI,
21400 V4SF_FTYPE_V4SF_V2DF,
21401 V4SF_FTYPE_V4SF_DI,
21402 V4SF_FTYPE_V4SF_SI,
21403 V2DI_FTYPE_V2DI_V2DI,
21404 V2DI_FTYPE_V2DI_V2DI_COUNT,
21405 V2DI_FTYPE_V16QI_V16QI,
21406 V2DI_FTYPE_V4SI_V4SI,
21407 V2DI_FTYPE_V2DI_V16QI,
21408 V2DI_FTYPE_V2DF_V2DF,
21409 V2DI_FTYPE_V2DI_SI_COUNT,
21410 V2SI_FTYPE_V2SI_V2SI,
21411 V2SI_FTYPE_V2SI_V2SI_COUNT,
21412 V2SI_FTYPE_V4HI_V4HI,
21413 V2SI_FTYPE_V2SF_V2SF,
21414 V2SI_FTYPE_V2SI_SI_COUNT,
21415 V2DF_FTYPE_V2DF_V2DF,
21416 V2DF_FTYPE_V2DF_V2DF_SWAP,
21417 V2DF_FTYPE_V2DF_V4SF,
21418 V2DF_FTYPE_V2DF_V2DI,
21419 V2DF_FTYPE_V2DF_DI,
21420 V2DF_FTYPE_V2DF_SI,
21421 V2SF_FTYPE_V2SF_V2SF,
21422 V1DI_FTYPE_V1DI_V1DI,
21423 V1DI_FTYPE_V1DI_V1DI_COUNT,
21424 V1DI_FTYPE_V8QI_V8QI,
21425 V1DI_FTYPE_V2SI_V2SI,
21426 V1DI_FTYPE_V1DI_SI_COUNT,
21427 UINT64_FTYPE_UINT64_UINT64,
21428 UINT_FTYPE_UINT_UINT,
21429 UINT_FTYPE_UINT_USHORT,
21430 UINT_FTYPE_UINT_UCHAR,
21431 UINT16_FTYPE_UINT16_INT,
21432 UINT8_FTYPE_UINT8_INT,
21433 V8HI_FTYPE_V8HI_INT,
21434 V4SI_FTYPE_V4SI_INT,
21435 V4HI_FTYPE_V4HI_INT,
21436 V8SF_FTYPE_V8SF_INT,
21437 V4SI_FTYPE_V8SI_INT,
21438 V4SF_FTYPE_V8SF_INT,
21439 V2DF_FTYPE_V4DF_INT,
21440 V4DF_FTYPE_V4DF_INT,
21441 V4SF_FTYPE_V4SF_INT,
21442 V2DI_FTYPE_V2DI_INT,
21443 V2DI2TI_FTYPE_V2DI_INT,
21444 V2DF_FTYPE_V2DF_INT,
21445 V16QI_FTYPE_V16QI_V16QI_V16QI,
21446 V8SF_FTYPE_V8SF_V8SF_V8SF,
21447 V4DF_FTYPE_V4DF_V4DF_V4DF,
21448 V4SF_FTYPE_V4SF_V4SF_V4SF,
21449 V2DF_FTYPE_V2DF_V2DF_V2DF,
21450 V16QI_FTYPE_V16QI_V16QI_INT,
21451 V8SI_FTYPE_V8SI_V8SI_INT,
21452 V8SI_FTYPE_V8SI_V4SI_INT,
21453 V8HI_FTYPE_V8HI_V8HI_INT,
21454 V8SF_FTYPE_V8SF_V8SF_INT,
21455 V8SF_FTYPE_V8SF_V4SF_INT,
21456 V4SI_FTYPE_V4SI_V4SI_INT,
21457 V4DF_FTYPE_V4DF_V4DF_INT,
21458 V4DF_FTYPE_V4DF_V2DF_INT,
21459 V4SF_FTYPE_V4SF_V4SF_INT,
21460 V2DI_FTYPE_V2DI_V2DI_INT,
21461 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21462 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21463 V2DF_FTYPE_V2DF_V2DF_INT,
21464 V2DI_FTYPE_V2DI_UINT_UINT,
21465 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21468 /* Special builtins with variable number of arguments. */
21469 static const struct builtin_description bdesc_special_args[] =
21471 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
21472 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
21474 /* MMX */
21475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21477 /* 3DNow! */
21478 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21480 /* SSE */
21481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21490 /* SSE or 3DNow!A */
21491 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21492 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21494 /* SSE2 */
21495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21508 /* SSE3 */
21509 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21511 /* SSE4.1 */
21512 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21514 /* SSE4A */
21515 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21516 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21518 /* AVX */
21519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21521 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21551 /* Builtins with variable number of arguments. */
21552 static const struct builtin_description bdesc_args[] =
21554 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
21555 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
21556 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
21557 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21558 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21559 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21560 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21562 /* MMX */
21563 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21564 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21565 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21566 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21567 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21568 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21570 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21571 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21572 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21573 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21574 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21575 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21576 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21577 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21579 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21580 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21582 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21583 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21584 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21585 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21587 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21588 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21589 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21590 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21591 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21592 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21594 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21595 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21596 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21597 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21598 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21599 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21601 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21602 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21603 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21605 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21607 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21608 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21609 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21610 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21611 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21612 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21614 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21615 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21616 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21617 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21618 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21619 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21621 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21622 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21623 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21624 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21626 /* 3DNow! */
21627 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21628 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21629 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21630 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21632 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21633 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21634 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21635 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21636 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21637 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21638 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21639 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21640 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21641 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21642 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21643 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21644 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21645 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21646 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21648 /* 3DNow!A */
21649 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21650 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21651 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21652 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21653 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21654 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21656 /* SSE */
21657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21659 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21660 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21661 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21662 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21663 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21664 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21665 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21666 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21667 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21668 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21670 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21672 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21673 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21674 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21675 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21676 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21677 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21678 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21679 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21681 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21682 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21683 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21684 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21685 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21686 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21687 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21688 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21689 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21690 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21691 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21692 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21693 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21694 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21695 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21696 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21697 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21698 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21699 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21700 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21701 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21702 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21704 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21705 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21706 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21707 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21709 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21710 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21711 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21712 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21714 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21715 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21716 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21717 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21718 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21720 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21721 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21722 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21724 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21726 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21727 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21730 /* SSE MMX or 3Dnow!A */
21731 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21732 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21733 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21735 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21736 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21737 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21738 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21740 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21741 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21743 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21745 /* SSE2 */
21746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21764 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21765 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21768 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21771 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21772 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21773 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21774 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21792 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21793 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21794 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21795 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21801 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21802 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21806 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21808 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21809 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21815 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21817 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21818 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21819 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21820 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21821 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21822 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21823 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21824 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21830 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21831 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21832 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21835 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21836 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21838 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21839 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21840 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21841 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21843 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21844 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21846 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21847 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21848 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21850 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21853 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21854 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21855 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21871 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21880 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21885 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21886 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21887 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21888 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21889 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21890 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21893 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21894 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21895 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21896 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21897 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21898 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21900 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21901 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21902 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21903 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21911 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21912 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21916 /* SSE2 MMX */
21917 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21918 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21920 /* SSE3 */
21921 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21922 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21924 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21925 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21926 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21927 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21928 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21929 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21931 /* SSSE3 */
21932 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21933 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21934 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21935 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21936 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21937 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21939 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21940 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21941 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21942 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21943 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21944 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21945 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21946 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21947 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21948 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21949 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21950 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21951 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21952 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21953 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21954 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21955 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21956 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21957 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21958 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21959 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21960 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21961 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21962 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21964 /* SSSE3. */
21965 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21966 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21968 /* SSE4.1 */
21969 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21970 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21971 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21972 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21973 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21974 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21975 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21976 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21977 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21978 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21980 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21981 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21982 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21983 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21984 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21985 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21986 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21987 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21988 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21989 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21990 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21991 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21992 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21994 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21995 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21996 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21997 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21998 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21999 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22000 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22001 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22002 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22003 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22004 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22005 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22007 /* SSE4.1 and SSE5 */
22008 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22009 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22010 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22011 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22013 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22014 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22015 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22017 /* SSE4.2 */
22018 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22019 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22020 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22021 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22022 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22024 /* SSE4A */
22025 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22026 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22027 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22028 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22030 /* AES */
22031 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22032 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22034 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22035 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22036 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22037 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22039 /* PCLMUL */
22040 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22042 /* AVX */
22043 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22044 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22047 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22048 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22051 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22057 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22058 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22059 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22060 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22061 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22062 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22063 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22064 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22065 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22066 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22067 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22068 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22092 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22093 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22094 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22096 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22114 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22116 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22118 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22130 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22140 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22141 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22145 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22146 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22147 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22150 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22151 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
22154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22157 /* SSE5 */
22158 enum multi_arg_type {
22159 MULTI_ARG_UNKNOWN,
22160 MULTI_ARG_3_SF,
22161 MULTI_ARG_3_DF,
22162 MULTI_ARG_3_DI,
22163 MULTI_ARG_3_SI,
22164 MULTI_ARG_3_SI_DI,
22165 MULTI_ARG_3_HI,
22166 MULTI_ARG_3_HI_SI,
22167 MULTI_ARG_3_QI,
22168 MULTI_ARG_3_PERMPS,
22169 MULTI_ARG_3_PERMPD,
22170 MULTI_ARG_2_SF,
22171 MULTI_ARG_2_DF,
22172 MULTI_ARG_2_DI,
22173 MULTI_ARG_2_SI,
22174 MULTI_ARG_2_HI,
22175 MULTI_ARG_2_QI,
22176 MULTI_ARG_2_DI_IMM,
22177 MULTI_ARG_2_SI_IMM,
22178 MULTI_ARG_2_HI_IMM,
22179 MULTI_ARG_2_QI_IMM,
22180 MULTI_ARG_2_SF_CMP,
22181 MULTI_ARG_2_DF_CMP,
22182 MULTI_ARG_2_DI_CMP,
22183 MULTI_ARG_2_SI_CMP,
22184 MULTI_ARG_2_HI_CMP,
22185 MULTI_ARG_2_QI_CMP,
22186 MULTI_ARG_2_DI_TF,
22187 MULTI_ARG_2_SI_TF,
22188 MULTI_ARG_2_HI_TF,
22189 MULTI_ARG_2_QI_TF,
22190 MULTI_ARG_2_SF_TF,
22191 MULTI_ARG_2_DF_TF,
22192 MULTI_ARG_1_SF,
22193 MULTI_ARG_1_DF,
22194 MULTI_ARG_1_DI,
22195 MULTI_ARG_1_SI,
22196 MULTI_ARG_1_HI,
22197 MULTI_ARG_1_QI,
22198 MULTI_ARG_1_SI_DI,
22199 MULTI_ARG_1_HI_DI,
22200 MULTI_ARG_1_HI_SI,
22201 MULTI_ARG_1_QI_DI,
22202 MULTI_ARG_1_QI_SI,
22203 MULTI_ARG_1_QI_HI,
22204 MULTI_ARG_1_PH2PS,
22205 MULTI_ARG_1_PS2PH
22208 static const struct builtin_description bdesc_multi_arg[] =
22210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22219 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22228 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22232 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22236 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22237 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22238 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22239 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22240 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22241 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22242 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22243 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22244 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22245 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22246 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22247 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22248 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22249 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22250 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22251 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22252 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22253 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22254 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22255 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22256 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22257 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22258 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22261 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22263 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22266 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22277 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22279 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22280 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22281 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22283 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22446 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22447 in the current target ISA to allow the user to compile particular modules
22448 with different target specific options that differ from the command line
22449 options. */
22450 static void
22451 ix86_init_mmx_sse_builtins (void)
22453 const struct builtin_description * d;
22454 size_t i;
22456 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22457 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22458 tree V1DI_type_node
22459 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22460 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22461 tree V2DI_type_node
22462 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22463 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22464 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22465 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22466 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22467 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22468 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22470 tree pchar_type_node = build_pointer_type (char_type_node);
22471 tree pcchar_type_node
22472 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22473 tree pfloat_type_node = build_pointer_type (float_type_node);
22474 tree pcfloat_type_node
22475 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22476 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22477 tree pcv2sf_type_node
22478 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22479 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22480 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22482 /* Comparisons. */
22483 tree int_ftype_v4sf_v4sf
22484 = build_function_type_list (integer_type_node,
22485 V4SF_type_node, V4SF_type_node, NULL_TREE);
22486 tree v4si_ftype_v4sf_v4sf
22487 = build_function_type_list (V4SI_type_node,
22488 V4SF_type_node, V4SF_type_node, NULL_TREE);
22489 /* MMX/SSE/integer conversions. */
22490 tree int_ftype_v4sf
22491 = build_function_type_list (integer_type_node,
22492 V4SF_type_node, NULL_TREE);
22493 tree int64_ftype_v4sf
22494 = build_function_type_list (long_long_integer_type_node,
22495 V4SF_type_node, NULL_TREE);
22496 tree int_ftype_v8qi
22497 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22498 tree v4sf_ftype_v4sf_int
22499 = build_function_type_list (V4SF_type_node,
22500 V4SF_type_node, integer_type_node, NULL_TREE);
22501 tree v4sf_ftype_v4sf_int64
22502 = build_function_type_list (V4SF_type_node,
22503 V4SF_type_node, long_long_integer_type_node,
22504 NULL_TREE);
22505 tree v4sf_ftype_v4sf_v2si
22506 = build_function_type_list (V4SF_type_node,
22507 V4SF_type_node, V2SI_type_node, NULL_TREE);
22509 /* Miscellaneous. */
22510 tree v8qi_ftype_v4hi_v4hi
22511 = build_function_type_list (V8QI_type_node,
22512 V4HI_type_node, V4HI_type_node, NULL_TREE);
22513 tree v4hi_ftype_v2si_v2si
22514 = build_function_type_list (V4HI_type_node,
22515 V2SI_type_node, V2SI_type_node, NULL_TREE);
22516 tree v4sf_ftype_v4sf_v4sf_int
22517 = build_function_type_list (V4SF_type_node,
22518 V4SF_type_node, V4SF_type_node,
22519 integer_type_node, NULL_TREE);
22520 tree v2si_ftype_v4hi_v4hi
22521 = build_function_type_list (V2SI_type_node,
22522 V4HI_type_node, V4HI_type_node, NULL_TREE);
22523 tree v4hi_ftype_v4hi_int
22524 = build_function_type_list (V4HI_type_node,
22525 V4HI_type_node, integer_type_node, NULL_TREE);
22526 tree v2si_ftype_v2si_int
22527 = build_function_type_list (V2SI_type_node,
22528 V2SI_type_node, integer_type_node, NULL_TREE);
22529 tree v1di_ftype_v1di_int
22530 = build_function_type_list (V1DI_type_node,
22531 V1DI_type_node, integer_type_node, NULL_TREE);
22533 tree void_ftype_void
22534 = build_function_type (void_type_node, void_list_node);
22535 tree void_ftype_unsigned
22536 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22537 tree void_ftype_unsigned_unsigned
22538 = build_function_type_list (void_type_node, unsigned_type_node,
22539 unsigned_type_node, NULL_TREE);
22540 tree void_ftype_pcvoid_unsigned_unsigned
22541 = build_function_type_list (void_type_node, const_ptr_type_node,
22542 unsigned_type_node, unsigned_type_node,
22543 NULL_TREE);
22544 tree unsigned_ftype_void
22545 = build_function_type (unsigned_type_node, void_list_node);
22546 tree v2si_ftype_v4sf
22547 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22548 /* Loads/stores. */
22549 tree void_ftype_v8qi_v8qi_pchar
22550 = build_function_type_list (void_type_node,
22551 V8QI_type_node, V8QI_type_node,
22552 pchar_type_node, NULL_TREE);
22553 tree v4sf_ftype_pcfloat
22554 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22555 tree v4sf_ftype_v4sf_pcv2sf
22556 = build_function_type_list (V4SF_type_node,
22557 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22558 tree void_ftype_pv2sf_v4sf
22559 = build_function_type_list (void_type_node,
22560 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22561 tree void_ftype_pfloat_v4sf
22562 = build_function_type_list (void_type_node,
22563 pfloat_type_node, V4SF_type_node, NULL_TREE);
22564 tree void_ftype_pdi_di
22565 = build_function_type_list (void_type_node,
22566 pdi_type_node, long_long_unsigned_type_node,
22567 NULL_TREE);
22568 tree void_ftype_pv2di_v2di
22569 = build_function_type_list (void_type_node,
22570 pv2di_type_node, V2DI_type_node, NULL_TREE);
22571 /* Normal vector unops. */
22572 tree v4sf_ftype_v4sf
22573 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22574 tree v16qi_ftype_v16qi
22575 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22576 tree v8hi_ftype_v8hi
22577 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22578 tree v4si_ftype_v4si
22579 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22580 tree v8qi_ftype_v8qi
22581 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22582 tree v4hi_ftype_v4hi
22583 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22585 /* Normal vector binops. */
22586 tree v4sf_ftype_v4sf_v4sf
22587 = build_function_type_list (V4SF_type_node,
22588 V4SF_type_node, V4SF_type_node, NULL_TREE);
22589 tree v8qi_ftype_v8qi_v8qi
22590 = build_function_type_list (V8QI_type_node,
22591 V8QI_type_node, V8QI_type_node, NULL_TREE);
22592 tree v4hi_ftype_v4hi_v4hi
22593 = build_function_type_list (V4HI_type_node,
22594 V4HI_type_node, V4HI_type_node, NULL_TREE);
22595 tree v2si_ftype_v2si_v2si
22596 = build_function_type_list (V2SI_type_node,
22597 V2SI_type_node, V2SI_type_node, NULL_TREE);
22598 tree v1di_ftype_v1di_v1di
22599 = build_function_type_list (V1DI_type_node,
22600 V1DI_type_node, V1DI_type_node, NULL_TREE);
22601 tree v1di_ftype_v1di_v1di_int
22602 = build_function_type_list (V1DI_type_node,
22603 V1DI_type_node, V1DI_type_node,
22604 integer_type_node, NULL_TREE);
22605 tree v2si_ftype_v2sf
22606 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22607 tree v2sf_ftype_v2si
22608 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22609 tree v2si_ftype_v2si
22610 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22611 tree v2sf_ftype_v2sf
22612 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22613 tree v2sf_ftype_v2sf_v2sf
22614 = build_function_type_list (V2SF_type_node,
22615 V2SF_type_node, V2SF_type_node, NULL_TREE);
22616 tree v2si_ftype_v2sf_v2sf
22617 = build_function_type_list (V2SI_type_node,
22618 V2SF_type_node, V2SF_type_node, NULL_TREE);
22619 tree pint_type_node = build_pointer_type (integer_type_node);
22620 tree pdouble_type_node = build_pointer_type (double_type_node);
22621 tree pcdouble_type_node = build_pointer_type (
22622 build_type_variant (double_type_node, 1, 0));
22623 tree int_ftype_v2df_v2df
22624 = build_function_type_list (integer_type_node,
22625 V2DF_type_node, V2DF_type_node, NULL_TREE);
22627 tree void_ftype_pcvoid
22628 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22629 tree v4sf_ftype_v4si
22630 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22631 tree v4si_ftype_v4sf
22632 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22633 tree v2df_ftype_v4si
22634 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22635 tree v4si_ftype_v2df
22636 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22637 tree v4si_ftype_v2df_v2df
22638 = build_function_type_list (V4SI_type_node,
22639 V2DF_type_node, V2DF_type_node, NULL_TREE);
22640 tree v2si_ftype_v2df
22641 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22642 tree v4sf_ftype_v2df
22643 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22644 tree v2df_ftype_v2si
22645 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22646 tree v2df_ftype_v4sf
22647 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22648 tree int_ftype_v2df
22649 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22650 tree int64_ftype_v2df
22651 = build_function_type_list (long_long_integer_type_node,
22652 V2DF_type_node, NULL_TREE);
22653 tree v2df_ftype_v2df_int
22654 = build_function_type_list (V2DF_type_node,
22655 V2DF_type_node, integer_type_node, NULL_TREE);
22656 tree v2df_ftype_v2df_int64
22657 = build_function_type_list (V2DF_type_node,
22658 V2DF_type_node, long_long_integer_type_node,
22659 NULL_TREE);
22660 tree v4sf_ftype_v4sf_v2df
22661 = build_function_type_list (V4SF_type_node,
22662 V4SF_type_node, V2DF_type_node, NULL_TREE);
22663 tree v2df_ftype_v2df_v4sf
22664 = build_function_type_list (V2DF_type_node,
22665 V2DF_type_node, V4SF_type_node, NULL_TREE);
22666 tree v2df_ftype_v2df_v2df_int
22667 = build_function_type_list (V2DF_type_node,
22668 V2DF_type_node, V2DF_type_node,
22669 integer_type_node,
22670 NULL_TREE);
22671 tree v2df_ftype_v2df_pcdouble
22672 = build_function_type_list (V2DF_type_node,
22673 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22674 tree void_ftype_pdouble_v2df
22675 = build_function_type_list (void_type_node,
22676 pdouble_type_node, V2DF_type_node, NULL_TREE);
22677 tree void_ftype_pint_int
22678 = build_function_type_list (void_type_node,
22679 pint_type_node, integer_type_node, NULL_TREE);
22680 tree void_ftype_v16qi_v16qi_pchar
22681 = build_function_type_list (void_type_node,
22682 V16QI_type_node, V16QI_type_node,
22683 pchar_type_node, NULL_TREE);
22684 tree v2df_ftype_pcdouble
22685 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22686 tree v2df_ftype_v2df_v2df
22687 = build_function_type_list (V2DF_type_node,
22688 V2DF_type_node, V2DF_type_node, NULL_TREE);
22689 tree v16qi_ftype_v16qi_v16qi
22690 = build_function_type_list (V16QI_type_node,
22691 V16QI_type_node, V16QI_type_node, NULL_TREE);
22692 tree v8hi_ftype_v8hi_v8hi
22693 = build_function_type_list (V8HI_type_node,
22694 V8HI_type_node, V8HI_type_node, NULL_TREE);
22695 tree v4si_ftype_v4si_v4si
22696 = build_function_type_list (V4SI_type_node,
22697 V4SI_type_node, V4SI_type_node, NULL_TREE);
22698 tree v2di_ftype_v2di_v2di
22699 = build_function_type_list (V2DI_type_node,
22700 V2DI_type_node, V2DI_type_node, NULL_TREE);
22701 tree v2di_ftype_v2df_v2df
22702 = build_function_type_list (V2DI_type_node,
22703 V2DF_type_node, V2DF_type_node, NULL_TREE);
22704 tree v2df_ftype_v2df
22705 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22706 tree v2di_ftype_v2di_int
22707 = build_function_type_list (V2DI_type_node,
22708 V2DI_type_node, integer_type_node, NULL_TREE);
22709 tree v2di_ftype_v2di_v2di_int
22710 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22711 V2DI_type_node, integer_type_node, NULL_TREE);
22712 tree v4si_ftype_v4si_int
22713 = build_function_type_list (V4SI_type_node,
22714 V4SI_type_node, integer_type_node, NULL_TREE);
22715 tree v8hi_ftype_v8hi_int
22716 = build_function_type_list (V8HI_type_node,
22717 V8HI_type_node, integer_type_node, NULL_TREE);
22718 tree v4si_ftype_v8hi_v8hi
22719 = build_function_type_list (V4SI_type_node,
22720 V8HI_type_node, V8HI_type_node, NULL_TREE);
22721 tree v1di_ftype_v8qi_v8qi
22722 = build_function_type_list (V1DI_type_node,
22723 V8QI_type_node, V8QI_type_node, NULL_TREE);
22724 tree v1di_ftype_v2si_v2si
22725 = build_function_type_list (V1DI_type_node,
22726 V2SI_type_node, V2SI_type_node, NULL_TREE);
22727 tree v2di_ftype_v16qi_v16qi
22728 = build_function_type_list (V2DI_type_node,
22729 V16QI_type_node, V16QI_type_node, NULL_TREE);
22730 tree v2di_ftype_v4si_v4si
22731 = build_function_type_list (V2DI_type_node,
22732 V4SI_type_node, V4SI_type_node, NULL_TREE);
22733 tree int_ftype_v16qi
22734 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22735 tree v16qi_ftype_pcchar
22736 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22737 tree void_ftype_pchar_v16qi
22738 = build_function_type_list (void_type_node,
22739 pchar_type_node, V16QI_type_node, NULL_TREE);
22741 tree v2di_ftype_v2di_unsigned_unsigned
22742 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22743 unsigned_type_node, unsigned_type_node,
22744 NULL_TREE);
22745 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22746 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22747 unsigned_type_node, unsigned_type_node,
22748 NULL_TREE);
22749 tree v2di_ftype_v2di_v16qi
22750 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22751 NULL_TREE);
22752 tree v2df_ftype_v2df_v2df_v2df
22753 = build_function_type_list (V2DF_type_node,
22754 V2DF_type_node, V2DF_type_node,
22755 V2DF_type_node, NULL_TREE);
22756 tree v4sf_ftype_v4sf_v4sf_v4sf
22757 = build_function_type_list (V4SF_type_node,
22758 V4SF_type_node, V4SF_type_node,
22759 V4SF_type_node, NULL_TREE);
22760 tree v8hi_ftype_v16qi
22761 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22762 NULL_TREE);
22763 tree v4si_ftype_v16qi
22764 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22765 NULL_TREE);
22766 tree v2di_ftype_v16qi
22767 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22768 NULL_TREE);
22769 tree v4si_ftype_v8hi
22770 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22771 NULL_TREE);
22772 tree v2di_ftype_v8hi
22773 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22774 NULL_TREE);
22775 tree v2di_ftype_v4si
22776 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22777 NULL_TREE);
22778 tree v2di_ftype_pv2di
22779 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22780 NULL_TREE);
22781 tree v16qi_ftype_v16qi_v16qi_int
22782 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22783 V16QI_type_node, integer_type_node,
22784 NULL_TREE);
22785 tree v16qi_ftype_v16qi_v16qi_v16qi
22786 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22787 V16QI_type_node, V16QI_type_node,
22788 NULL_TREE);
22789 tree v8hi_ftype_v8hi_v8hi_int
22790 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22791 V8HI_type_node, integer_type_node,
22792 NULL_TREE);
22793 tree v4si_ftype_v4si_v4si_int
22794 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22795 V4SI_type_node, integer_type_node,
22796 NULL_TREE);
22797 tree int_ftype_v2di_v2di
22798 = build_function_type_list (integer_type_node,
22799 V2DI_type_node, V2DI_type_node,
22800 NULL_TREE);
22801 tree int_ftype_v16qi_int_v16qi_int_int
22802 = build_function_type_list (integer_type_node,
22803 V16QI_type_node,
22804 integer_type_node,
22805 V16QI_type_node,
22806 integer_type_node,
22807 integer_type_node,
22808 NULL_TREE);
22809 tree v16qi_ftype_v16qi_int_v16qi_int_int
22810 = build_function_type_list (V16QI_type_node,
22811 V16QI_type_node,
22812 integer_type_node,
22813 V16QI_type_node,
22814 integer_type_node,
22815 integer_type_node,
22816 NULL_TREE);
22817 tree int_ftype_v16qi_v16qi_int
22818 = build_function_type_list (integer_type_node,
22819 V16QI_type_node,
22820 V16QI_type_node,
22821 integer_type_node,
22822 NULL_TREE);
22824 /* SSE5 instructions */
22825 tree v2di_ftype_v2di_v2di_v2di
22826 = build_function_type_list (V2DI_type_node,
22827 V2DI_type_node,
22828 V2DI_type_node,
22829 V2DI_type_node,
22830 NULL_TREE);
22832 tree v4si_ftype_v4si_v4si_v4si
22833 = build_function_type_list (V4SI_type_node,
22834 V4SI_type_node,
22835 V4SI_type_node,
22836 V4SI_type_node,
22837 NULL_TREE);
22839 tree v4si_ftype_v4si_v4si_v2di
22840 = build_function_type_list (V4SI_type_node,
22841 V4SI_type_node,
22842 V4SI_type_node,
22843 V2DI_type_node,
22844 NULL_TREE);
22846 tree v8hi_ftype_v8hi_v8hi_v8hi
22847 = build_function_type_list (V8HI_type_node,
22848 V8HI_type_node,
22849 V8HI_type_node,
22850 V8HI_type_node,
22851 NULL_TREE);
22853 tree v8hi_ftype_v8hi_v8hi_v4si
22854 = build_function_type_list (V8HI_type_node,
22855 V8HI_type_node,
22856 V8HI_type_node,
22857 V4SI_type_node,
22858 NULL_TREE);
22860 tree v2df_ftype_v2df_v2df_v16qi
22861 = build_function_type_list (V2DF_type_node,
22862 V2DF_type_node,
22863 V2DF_type_node,
22864 V16QI_type_node,
22865 NULL_TREE);
22867 tree v4sf_ftype_v4sf_v4sf_v16qi
22868 = build_function_type_list (V4SF_type_node,
22869 V4SF_type_node,
22870 V4SF_type_node,
22871 V16QI_type_node,
22872 NULL_TREE);
22874 tree v2di_ftype_v2di_si
22875 = build_function_type_list (V2DI_type_node,
22876 V2DI_type_node,
22877 integer_type_node,
22878 NULL_TREE);
22880 tree v4si_ftype_v4si_si
22881 = build_function_type_list (V4SI_type_node,
22882 V4SI_type_node,
22883 integer_type_node,
22884 NULL_TREE);
22886 tree v8hi_ftype_v8hi_si
22887 = build_function_type_list (V8HI_type_node,
22888 V8HI_type_node,
22889 integer_type_node,
22890 NULL_TREE);
22892 tree v16qi_ftype_v16qi_si
22893 = build_function_type_list (V16QI_type_node,
22894 V16QI_type_node,
22895 integer_type_node,
22896 NULL_TREE);
22897 tree v4sf_ftype_v4hi
22898 = build_function_type_list (V4SF_type_node,
22899 V4HI_type_node,
22900 NULL_TREE);
22902 tree v4hi_ftype_v4sf
22903 = build_function_type_list (V4HI_type_node,
22904 V4SF_type_node,
22905 NULL_TREE);
22907 tree v2di_ftype_v2di
22908 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22910 tree v16qi_ftype_v8hi_v8hi
22911 = build_function_type_list (V16QI_type_node,
22912 V8HI_type_node, V8HI_type_node,
22913 NULL_TREE);
22914 tree v8hi_ftype_v4si_v4si
22915 = build_function_type_list (V8HI_type_node,
22916 V4SI_type_node, V4SI_type_node,
22917 NULL_TREE);
22918 tree v8hi_ftype_v16qi_v16qi
22919 = build_function_type_list (V8HI_type_node,
22920 V16QI_type_node, V16QI_type_node,
22921 NULL_TREE);
22922 tree v4hi_ftype_v8qi_v8qi
22923 = build_function_type_list (V4HI_type_node,
22924 V8QI_type_node, V8QI_type_node,
22925 NULL_TREE);
22926 tree unsigned_ftype_unsigned_uchar
22927 = build_function_type_list (unsigned_type_node,
22928 unsigned_type_node,
22929 unsigned_char_type_node,
22930 NULL_TREE);
22931 tree unsigned_ftype_unsigned_ushort
22932 = build_function_type_list (unsigned_type_node,
22933 unsigned_type_node,
22934 short_unsigned_type_node,
22935 NULL_TREE);
22936 tree unsigned_ftype_unsigned_unsigned
22937 = build_function_type_list (unsigned_type_node,
22938 unsigned_type_node,
22939 unsigned_type_node,
22940 NULL_TREE);
22941 tree uint64_ftype_uint64_uint64
22942 = build_function_type_list (long_long_unsigned_type_node,
22943 long_long_unsigned_type_node,
22944 long_long_unsigned_type_node,
22945 NULL_TREE);
22946 tree float_ftype_float
22947 = build_function_type_list (float_type_node,
22948 float_type_node,
22949 NULL_TREE);
22951 /* AVX builtins */
22952 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22953 V32QImode);
22954 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22955 V8SImode);
22956 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22957 V8SFmode);
22958 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22959 V4DImode);
22960 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22961 V4DFmode);
22962 tree v8sf_ftype_v8sf
22963 = build_function_type_list (V8SF_type_node,
22964 V8SF_type_node,
22965 NULL_TREE);
22966 tree v8si_ftype_v8sf
22967 = build_function_type_list (V8SI_type_node,
22968 V8SF_type_node,
22969 NULL_TREE);
22970 tree v8sf_ftype_v8si
22971 = build_function_type_list (V8SF_type_node,
22972 V8SI_type_node,
22973 NULL_TREE);
22974 tree v4si_ftype_v4df
22975 = build_function_type_list (V4SI_type_node,
22976 V4DF_type_node,
22977 NULL_TREE);
22978 tree v4df_ftype_v4df
22979 = build_function_type_list (V4DF_type_node,
22980 V4DF_type_node,
22981 NULL_TREE);
22982 tree v4df_ftype_v4si
22983 = build_function_type_list (V4DF_type_node,
22984 V4SI_type_node,
22985 NULL_TREE);
22986 tree v4df_ftype_v4sf
22987 = build_function_type_list (V4DF_type_node,
22988 V4SF_type_node,
22989 NULL_TREE);
22990 tree v4sf_ftype_v4df
22991 = build_function_type_list (V4SF_type_node,
22992 V4DF_type_node,
22993 NULL_TREE);
22994 tree v8sf_ftype_v8sf_v8sf
22995 = build_function_type_list (V8SF_type_node,
22996 V8SF_type_node, V8SF_type_node,
22997 NULL_TREE);
22998 tree v4df_ftype_v4df_v4df
22999 = build_function_type_list (V4DF_type_node,
23000 V4DF_type_node, V4DF_type_node,
23001 NULL_TREE);
23002 tree v8sf_ftype_v8sf_int
23003 = build_function_type_list (V8SF_type_node,
23004 V8SF_type_node, integer_type_node,
23005 NULL_TREE);
23006 tree v4si_ftype_v8si_int
23007 = build_function_type_list (V4SI_type_node,
23008 V8SI_type_node, integer_type_node,
23009 NULL_TREE);
23010 tree v4df_ftype_v4df_int
23011 = build_function_type_list (V4DF_type_node,
23012 V4DF_type_node, integer_type_node,
23013 NULL_TREE);
23014 tree v4sf_ftype_v8sf_int
23015 = build_function_type_list (V4SF_type_node,
23016 V8SF_type_node, integer_type_node,
23017 NULL_TREE);
23018 tree v2df_ftype_v4df_int
23019 = build_function_type_list (V2DF_type_node,
23020 V4DF_type_node, integer_type_node,
23021 NULL_TREE);
23022 tree v8sf_ftype_v8sf_v8sf_int
23023 = build_function_type_list (V8SF_type_node,
23024 V8SF_type_node, V8SF_type_node,
23025 integer_type_node,
23026 NULL_TREE);
23027 tree v8sf_ftype_v8sf_v8sf_v8sf
23028 = build_function_type_list (V8SF_type_node,
23029 V8SF_type_node, V8SF_type_node,
23030 V8SF_type_node,
23031 NULL_TREE);
23032 tree v4df_ftype_v4df_v4df_v4df
23033 = build_function_type_list (V4DF_type_node,
23034 V4DF_type_node, V4DF_type_node,
23035 V4DF_type_node,
23036 NULL_TREE);
23037 tree v8si_ftype_v8si_v8si_int
23038 = build_function_type_list (V8SI_type_node,
23039 V8SI_type_node, V8SI_type_node,
23040 integer_type_node,
23041 NULL_TREE);
23042 tree v4df_ftype_v4df_v4df_int
23043 = build_function_type_list (V4DF_type_node,
23044 V4DF_type_node, V4DF_type_node,
23045 integer_type_node,
23046 NULL_TREE);
23047 tree v8sf_ftype_pcfloat
23048 = build_function_type_list (V8SF_type_node,
23049 pcfloat_type_node,
23050 NULL_TREE);
23051 tree v4df_ftype_pcdouble
23052 = build_function_type_list (V4DF_type_node,
23053 pcdouble_type_node,
23054 NULL_TREE);
23055 tree pcv4sf_type_node
23056 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
23057 tree pcv2df_type_node
23058 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
23059 tree v8sf_ftype_pcv4sf
23060 = build_function_type_list (V8SF_type_node,
23061 pcv4sf_type_node,
23062 NULL_TREE);
23063 tree v4df_ftype_pcv2df
23064 = build_function_type_list (V4DF_type_node,
23065 pcv2df_type_node,
23066 NULL_TREE);
23067 tree v32qi_ftype_pcchar
23068 = build_function_type_list (V32QI_type_node,
23069 pcchar_type_node,
23070 NULL_TREE);
23071 tree void_ftype_pchar_v32qi
23072 = build_function_type_list (void_type_node,
23073 pchar_type_node, V32QI_type_node,
23074 NULL_TREE);
23075 tree v8si_ftype_v8si_v4si_int
23076 = build_function_type_list (V8SI_type_node,
23077 V8SI_type_node, V4SI_type_node,
23078 integer_type_node,
23079 NULL_TREE);
23080 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
23081 tree void_ftype_pv4di_v4di
23082 = build_function_type_list (void_type_node,
23083 pv4di_type_node, V4DI_type_node,
23084 NULL_TREE);
23085 tree v8sf_ftype_v8sf_v4sf_int
23086 = build_function_type_list (V8SF_type_node,
23087 V8SF_type_node, V4SF_type_node,
23088 integer_type_node,
23089 NULL_TREE);
23090 tree v4df_ftype_v4df_v2df_int
23091 = build_function_type_list (V4DF_type_node,
23092 V4DF_type_node, V2DF_type_node,
23093 integer_type_node,
23094 NULL_TREE);
23095 tree void_ftype_pfloat_v8sf
23096 = build_function_type_list (void_type_node,
23097 pfloat_type_node, V8SF_type_node,
23098 NULL_TREE);
23099 tree void_ftype_pdouble_v4df
23100 = build_function_type_list (void_type_node,
23101 pdouble_type_node, V4DF_type_node,
23102 NULL_TREE);
23103 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
23104 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
23105 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
23106 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
23107 tree pcv8sf_type_node
23108 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
23109 tree pcv4df_type_node
23110 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
23111 tree v8sf_ftype_pcv8sf_v8sf
23112 = build_function_type_list (V8SF_type_node,
23113 pcv8sf_type_node, V8SF_type_node,
23114 NULL_TREE);
23115 tree v4df_ftype_pcv4df_v4df
23116 = build_function_type_list (V4DF_type_node,
23117 pcv4df_type_node, V4DF_type_node,
23118 NULL_TREE);
23119 tree v4sf_ftype_pcv4sf_v4sf
23120 = build_function_type_list (V4SF_type_node,
23121 pcv4sf_type_node, V4SF_type_node,
23122 NULL_TREE);
23123 tree v2df_ftype_pcv2df_v2df
23124 = build_function_type_list (V2DF_type_node,
23125 pcv2df_type_node, V2DF_type_node,
23126 NULL_TREE);
23127 tree void_ftype_pv8sf_v8sf_v8sf
23128 = build_function_type_list (void_type_node,
23129 pv8sf_type_node, V8SF_type_node,
23130 V8SF_type_node,
23131 NULL_TREE);
23132 tree void_ftype_pv4df_v4df_v4df
23133 = build_function_type_list (void_type_node,
23134 pv4df_type_node, V4DF_type_node,
23135 V4DF_type_node,
23136 NULL_TREE);
23137 tree void_ftype_pv4sf_v4sf_v4sf
23138 = build_function_type_list (void_type_node,
23139 pv4sf_type_node, V4SF_type_node,
23140 V4SF_type_node,
23141 NULL_TREE);
23142 tree void_ftype_pv2df_v2df_v2df
23143 = build_function_type_list (void_type_node,
23144 pv2df_type_node, V2DF_type_node,
23145 V2DF_type_node,
23146 NULL_TREE);
23147 tree v4df_ftype_v2df
23148 = build_function_type_list (V4DF_type_node,
23149 V2DF_type_node,
23150 NULL_TREE);
23151 tree v8sf_ftype_v4sf
23152 = build_function_type_list (V8SF_type_node,
23153 V4SF_type_node,
23154 NULL_TREE);
23155 tree v8si_ftype_v4si
23156 = build_function_type_list (V8SI_type_node,
23157 V4SI_type_node,
23158 NULL_TREE);
23159 tree v2df_ftype_v4df
23160 = build_function_type_list (V2DF_type_node,
23161 V4DF_type_node,
23162 NULL_TREE);
23163 tree v4sf_ftype_v8sf
23164 = build_function_type_list (V4SF_type_node,
23165 V8SF_type_node,
23166 NULL_TREE);
23167 tree v4si_ftype_v8si
23168 = build_function_type_list (V4SI_type_node,
23169 V8SI_type_node,
23170 NULL_TREE);
23171 tree int_ftype_v4df
23172 = build_function_type_list (integer_type_node,
23173 V4DF_type_node,
23174 NULL_TREE);
23175 tree int_ftype_v8sf
23176 = build_function_type_list (integer_type_node,
23177 V8SF_type_node,
23178 NULL_TREE);
23179 tree int_ftype_v8sf_v8sf
23180 = build_function_type_list (integer_type_node,
23181 V8SF_type_node, V8SF_type_node,
23182 NULL_TREE);
23183 tree int_ftype_v4di_v4di
23184 = build_function_type_list (integer_type_node,
23185 V4DI_type_node, V4DI_type_node,
23186 NULL_TREE);
23187 tree int_ftype_v4df_v4df
23188 = build_function_type_list (integer_type_node,
23189 V4DF_type_node, V4DF_type_node,
23190 NULL_TREE);
23191 tree v8sf_ftype_v8sf_v8si
23192 = build_function_type_list (V8SF_type_node,
23193 V8SF_type_node, V8SI_type_node,
23194 NULL_TREE);
23195 tree v4df_ftype_v4df_v4di
23196 = build_function_type_list (V4DF_type_node,
23197 V4DF_type_node, V4DI_type_node,
23198 NULL_TREE);
23199 tree v4sf_ftype_v4sf_v4si
23200 = build_function_type_list (V4SF_type_node,
23201 V4SF_type_node, V4SI_type_node, NULL_TREE);
23202 tree v2df_ftype_v2df_v2di
23203 = build_function_type_list (V2DF_type_node,
23204 V2DF_type_node, V2DI_type_node, NULL_TREE);
23206 /* Integer intrinsics. */
23207 tree uint64_ftype_void
23208 = build_function_type (long_long_unsigned_type_node,
23209 void_list_node);
23210 tree int_ftype_int
23211 = build_function_type_list (integer_type_node,
23212 integer_type_node, NULL_TREE);
23213 tree int64_ftype_int64
23214 = build_function_type_list (long_long_integer_type_node,
23215 long_long_integer_type_node,
23216 NULL_TREE);
23217 tree uint64_ftype_int
23218 = build_function_type_list (long_long_unsigned_type_node,
23219 integer_type_node, NULL_TREE);
23220 tree punsigned_type_node = build_pointer_type (unsigned_type_node);
23221 tree uint64_ftype_punsigned
23222 = build_function_type_list (long_long_unsigned_type_node,
23223 punsigned_type_node, NULL_TREE);
23224 tree ushort_ftype_ushort_int
23225 = build_function_type_list (short_unsigned_type_node,
23226 short_unsigned_type_node,
23227 integer_type_node,
23228 NULL_TREE);
23229 tree uchar_ftype_uchar_int
23230 = build_function_type_list (unsigned_char_type_node,
23231 unsigned_char_type_node,
23232 integer_type_node,
23233 NULL_TREE);
23235 tree ftype;
23237 /* Add all special builtins with variable number of operands. */
23238 for (i = 0, d = bdesc_special_args;
23239 i < ARRAY_SIZE (bdesc_special_args);
23240 i++, d++)
23242 tree type;
23244 if (d->name == 0)
23245 continue;
23247 switch ((enum ix86_special_builtin_type) d->flag)
23249 case VOID_FTYPE_VOID:
23250 type = void_ftype_void;
23251 break;
23252 case UINT64_FTYPE_VOID:
23253 type = uint64_ftype_void;
23254 break;
23255 case UINT64_FTYPE_PUNSIGNED:
23256 type = uint64_ftype_punsigned;
23257 break;
23258 case V32QI_FTYPE_PCCHAR:
23259 type = v32qi_ftype_pcchar;
23260 break;
23261 case V16QI_FTYPE_PCCHAR:
23262 type = v16qi_ftype_pcchar;
23263 break;
23264 case V8SF_FTYPE_PCV4SF:
23265 type = v8sf_ftype_pcv4sf;
23266 break;
23267 case V8SF_FTYPE_PCFLOAT:
23268 type = v8sf_ftype_pcfloat;
23269 break;
23270 case V4DF_FTYPE_PCV2DF:
23271 type = v4df_ftype_pcv2df;
23272 break;
23273 case V4DF_FTYPE_PCDOUBLE:
23274 type = v4df_ftype_pcdouble;
23275 break;
23276 case V4SF_FTYPE_PCFLOAT:
23277 type = v4sf_ftype_pcfloat;
23278 break;
23279 case V2DI_FTYPE_PV2DI:
23280 type = v2di_ftype_pv2di;
23281 break;
23282 case V2DF_FTYPE_PCDOUBLE:
23283 type = v2df_ftype_pcdouble;
23284 break;
23285 case V8SF_FTYPE_PCV8SF_V8SF:
23286 type = v8sf_ftype_pcv8sf_v8sf;
23287 break;
23288 case V4DF_FTYPE_PCV4DF_V4DF:
23289 type = v4df_ftype_pcv4df_v4df;
23290 break;
23291 case V4SF_FTYPE_V4SF_PCV2SF:
23292 type = v4sf_ftype_v4sf_pcv2sf;
23293 break;
23294 case V4SF_FTYPE_PCV4SF_V4SF:
23295 type = v4sf_ftype_pcv4sf_v4sf;
23296 break;
23297 case V2DF_FTYPE_V2DF_PCDOUBLE:
23298 type = v2df_ftype_v2df_pcdouble;
23299 break;
23300 case V2DF_FTYPE_PCV2DF_V2DF:
23301 type = v2df_ftype_pcv2df_v2df;
23302 break;
23303 case VOID_FTYPE_PV2SF_V4SF:
23304 type = void_ftype_pv2sf_v4sf;
23305 break;
23306 case VOID_FTYPE_PV4DI_V4DI:
23307 type = void_ftype_pv4di_v4di;
23308 break;
23309 case VOID_FTYPE_PV2DI_V2DI:
23310 type = void_ftype_pv2di_v2di;
23311 break;
23312 case VOID_FTYPE_PCHAR_V32QI:
23313 type = void_ftype_pchar_v32qi;
23314 break;
23315 case VOID_FTYPE_PCHAR_V16QI:
23316 type = void_ftype_pchar_v16qi;
23317 break;
23318 case VOID_FTYPE_PFLOAT_V8SF:
23319 type = void_ftype_pfloat_v8sf;
23320 break;
23321 case VOID_FTYPE_PFLOAT_V4SF:
23322 type = void_ftype_pfloat_v4sf;
23323 break;
23324 case VOID_FTYPE_PDOUBLE_V4DF:
23325 type = void_ftype_pdouble_v4df;
23326 break;
23327 case VOID_FTYPE_PDOUBLE_V2DF:
23328 type = void_ftype_pdouble_v2df;
23329 break;
23330 case VOID_FTYPE_PDI_DI:
23331 type = void_ftype_pdi_di;
23332 break;
23333 case VOID_FTYPE_PINT_INT:
23334 type = void_ftype_pint_int;
23335 break;
23336 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23337 type = void_ftype_pv8sf_v8sf_v8sf;
23338 break;
23339 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23340 type = void_ftype_pv4df_v4df_v4df;
23341 break;
23342 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23343 type = void_ftype_pv4sf_v4sf_v4sf;
23344 break;
23345 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23346 type = void_ftype_pv2df_v2df_v2df;
23347 break;
23348 default:
23349 gcc_unreachable ();
23352 def_builtin (d->mask, d->name, type, d->code);
23355 /* Add all builtins with variable number of operands. */
23356 for (i = 0, d = bdesc_args;
23357 i < ARRAY_SIZE (bdesc_args);
23358 i++, d++)
23360 tree type;
23362 if (d->name == 0)
23363 continue;
23365 switch ((enum ix86_builtin_type) d->flag)
23367 case FLOAT_FTYPE_FLOAT:
23368 type = float_ftype_float;
23369 break;
23370 case INT_FTYPE_V8SF_V8SF_PTEST:
23371 type = int_ftype_v8sf_v8sf;
23372 break;
23373 case INT_FTYPE_V4DI_V4DI_PTEST:
23374 type = int_ftype_v4di_v4di;
23375 break;
23376 case INT_FTYPE_V4DF_V4DF_PTEST:
23377 type = int_ftype_v4df_v4df;
23378 break;
23379 case INT_FTYPE_V4SF_V4SF_PTEST:
23380 type = int_ftype_v4sf_v4sf;
23381 break;
23382 case INT_FTYPE_V2DI_V2DI_PTEST:
23383 type = int_ftype_v2di_v2di;
23384 break;
23385 case INT_FTYPE_V2DF_V2DF_PTEST:
23386 type = int_ftype_v2df_v2df;
23387 break;
23388 case INT_FTYPE_INT:
23389 type = int_ftype_int;
23390 break;
23391 case UINT64_FTYPE_INT:
23392 type = uint64_ftype_int;
23393 break;
23394 case INT64_FTYPE_INT64:
23395 type = int64_ftype_int64;
23396 break;
23397 case INT64_FTYPE_V4SF:
23398 type = int64_ftype_v4sf;
23399 break;
23400 case INT64_FTYPE_V2DF:
23401 type = int64_ftype_v2df;
23402 break;
23403 case INT_FTYPE_V16QI:
23404 type = int_ftype_v16qi;
23405 break;
23406 case INT_FTYPE_V8QI:
23407 type = int_ftype_v8qi;
23408 break;
23409 case INT_FTYPE_V8SF:
23410 type = int_ftype_v8sf;
23411 break;
23412 case INT_FTYPE_V4DF:
23413 type = int_ftype_v4df;
23414 break;
23415 case INT_FTYPE_V4SF:
23416 type = int_ftype_v4sf;
23417 break;
23418 case INT_FTYPE_V2DF:
23419 type = int_ftype_v2df;
23420 break;
23421 case V16QI_FTYPE_V16QI:
23422 type = v16qi_ftype_v16qi;
23423 break;
23424 case V8SI_FTYPE_V8SF:
23425 type = v8si_ftype_v8sf;
23426 break;
23427 case V8SI_FTYPE_V4SI:
23428 type = v8si_ftype_v4si;
23429 break;
23430 case V8HI_FTYPE_V8HI:
23431 type = v8hi_ftype_v8hi;
23432 break;
23433 case V8HI_FTYPE_V16QI:
23434 type = v8hi_ftype_v16qi;
23435 break;
23436 case V8QI_FTYPE_V8QI:
23437 type = v8qi_ftype_v8qi;
23438 break;
23439 case V8SF_FTYPE_V8SF:
23440 type = v8sf_ftype_v8sf;
23441 break;
23442 case V8SF_FTYPE_V8SI:
23443 type = v8sf_ftype_v8si;
23444 break;
23445 case V8SF_FTYPE_V4SF:
23446 type = v8sf_ftype_v4sf;
23447 break;
23448 case V4SI_FTYPE_V4DF:
23449 type = v4si_ftype_v4df;
23450 break;
23451 case V4SI_FTYPE_V4SI:
23452 type = v4si_ftype_v4si;
23453 break;
23454 case V4SI_FTYPE_V16QI:
23455 type = v4si_ftype_v16qi;
23456 break;
23457 case V4SI_FTYPE_V8SI:
23458 type = v4si_ftype_v8si;
23459 break;
23460 case V4SI_FTYPE_V8HI:
23461 type = v4si_ftype_v8hi;
23462 break;
23463 case V4SI_FTYPE_V4SF:
23464 type = v4si_ftype_v4sf;
23465 break;
23466 case V4SI_FTYPE_V2DF:
23467 type = v4si_ftype_v2df;
23468 break;
23469 case V4HI_FTYPE_V4HI:
23470 type = v4hi_ftype_v4hi;
23471 break;
23472 case V4DF_FTYPE_V4DF:
23473 type = v4df_ftype_v4df;
23474 break;
23475 case V4DF_FTYPE_V4SI:
23476 type = v4df_ftype_v4si;
23477 break;
23478 case V4DF_FTYPE_V4SF:
23479 type = v4df_ftype_v4sf;
23480 break;
23481 case V4DF_FTYPE_V2DF:
23482 type = v4df_ftype_v2df;
23483 break;
23484 case V4SF_FTYPE_V4SF:
23485 case V4SF_FTYPE_V4SF_VEC_MERGE:
23486 type = v4sf_ftype_v4sf;
23487 break;
23488 case V4SF_FTYPE_V8SF:
23489 type = v4sf_ftype_v8sf;
23490 break;
23491 case V4SF_FTYPE_V4SI:
23492 type = v4sf_ftype_v4si;
23493 break;
23494 case V4SF_FTYPE_V4DF:
23495 type = v4sf_ftype_v4df;
23496 break;
23497 case V4SF_FTYPE_V2DF:
23498 type = v4sf_ftype_v2df;
23499 break;
23500 case V2DI_FTYPE_V2DI:
23501 type = v2di_ftype_v2di;
23502 break;
23503 case V2DI_FTYPE_V16QI:
23504 type = v2di_ftype_v16qi;
23505 break;
23506 case V2DI_FTYPE_V8HI:
23507 type = v2di_ftype_v8hi;
23508 break;
23509 case V2DI_FTYPE_V4SI:
23510 type = v2di_ftype_v4si;
23511 break;
23512 case V2SI_FTYPE_V2SI:
23513 type = v2si_ftype_v2si;
23514 break;
23515 case V2SI_FTYPE_V4SF:
23516 type = v2si_ftype_v4sf;
23517 break;
23518 case V2SI_FTYPE_V2DF:
23519 type = v2si_ftype_v2df;
23520 break;
23521 case V2SI_FTYPE_V2SF:
23522 type = v2si_ftype_v2sf;
23523 break;
23524 case V2DF_FTYPE_V4DF:
23525 type = v2df_ftype_v4df;
23526 break;
23527 case V2DF_FTYPE_V4SF:
23528 type = v2df_ftype_v4sf;
23529 break;
23530 case V2DF_FTYPE_V2DF:
23531 case V2DF_FTYPE_V2DF_VEC_MERGE:
23532 type = v2df_ftype_v2df;
23533 break;
23534 case V2DF_FTYPE_V2SI:
23535 type = v2df_ftype_v2si;
23536 break;
23537 case V2DF_FTYPE_V4SI:
23538 type = v2df_ftype_v4si;
23539 break;
23540 case V2SF_FTYPE_V2SF:
23541 type = v2sf_ftype_v2sf;
23542 break;
23543 case V2SF_FTYPE_V2SI:
23544 type = v2sf_ftype_v2si;
23545 break;
23546 case V16QI_FTYPE_V16QI_V16QI:
23547 type = v16qi_ftype_v16qi_v16qi;
23548 break;
23549 case V16QI_FTYPE_V8HI_V8HI:
23550 type = v16qi_ftype_v8hi_v8hi;
23551 break;
23552 case V8QI_FTYPE_V8QI_V8QI:
23553 type = v8qi_ftype_v8qi_v8qi;
23554 break;
23555 case V8QI_FTYPE_V4HI_V4HI:
23556 type = v8qi_ftype_v4hi_v4hi;
23557 break;
23558 case V8HI_FTYPE_V8HI_V8HI:
23559 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23560 type = v8hi_ftype_v8hi_v8hi;
23561 break;
23562 case V8HI_FTYPE_V16QI_V16QI:
23563 type = v8hi_ftype_v16qi_v16qi;
23564 break;
23565 case V8HI_FTYPE_V4SI_V4SI:
23566 type = v8hi_ftype_v4si_v4si;
23567 break;
23568 case V8HI_FTYPE_V8HI_SI_COUNT:
23569 type = v8hi_ftype_v8hi_int;
23570 break;
23571 case V8SF_FTYPE_V8SF_V8SF:
23572 type = v8sf_ftype_v8sf_v8sf;
23573 break;
23574 case V8SF_FTYPE_V8SF_V8SI:
23575 type = v8sf_ftype_v8sf_v8si;
23576 break;
23577 case V4SI_FTYPE_V4SI_V4SI:
23578 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23579 type = v4si_ftype_v4si_v4si;
23580 break;
23581 case V4SI_FTYPE_V8HI_V8HI:
23582 type = v4si_ftype_v8hi_v8hi;
23583 break;
23584 case V4SI_FTYPE_V4SF_V4SF:
23585 type = v4si_ftype_v4sf_v4sf;
23586 break;
23587 case V4SI_FTYPE_V2DF_V2DF:
23588 type = v4si_ftype_v2df_v2df;
23589 break;
23590 case V4SI_FTYPE_V4SI_SI_COUNT:
23591 type = v4si_ftype_v4si_int;
23592 break;
23593 case V4HI_FTYPE_V4HI_V4HI:
23594 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23595 type = v4hi_ftype_v4hi_v4hi;
23596 break;
23597 case V4HI_FTYPE_V8QI_V8QI:
23598 type = v4hi_ftype_v8qi_v8qi;
23599 break;
23600 case V4HI_FTYPE_V2SI_V2SI:
23601 type = v4hi_ftype_v2si_v2si;
23602 break;
23603 case V4HI_FTYPE_V4HI_SI_COUNT:
23604 type = v4hi_ftype_v4hi_int;
23605 break;
23606 case V4DF_FTYPE_V4DF_V4DF:
23607 type = v4df_ftype_v4df_v4df;
23608 break;
23609 case V4DF_FTYPE_V4DF_V4DI:
23610 type = v4df_ftype_v4df_v4di;
23611 break;
23612 case V4SF_FTYPE_V4SF_V4SF:
23613 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23614 type = v4sf_ftype_v4sf_v4sf;
23615 break;
23616 case V4SF_FTYPE_V4SF_V4SI:
23617 type = v4sf_ftype_v4sf_v4si;
23618 break;
23619 case V4SF_FTYPE_V4SF_V2SI:
23620 type = v4sf_ftype_v4sf_v2si;
23621 break;
23622 case V4SF_FTYPE_V4SF_V2DF:
23623 type = v4sf_ftype_v4sf_v2df;
23624 break;
23625 case V4SF_FTYPE_V4SF_DI:
23626 type = v4sf_ftype_v4sf_int64;
23627 break;
23628 case V4SF_FTYPE_V4SF_SI:
23629 type = v4sf_ftype_v4sf_int;
23630 break;
23631 case V2DI_FTYPE_V2DI_V2DI:
23632 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23633 type = v2di_ftype_v2di_v2di;
23634 break;
23635 case V2DI_FTYPE_V16QI_V16QI:
23636 type = v2di_ftype_v16qi_v16qi;
23637 break;
23638 case V2DI_FTYPE_V4SI_V4SI:
23639 type = v2di_ftype_v4si_v4si;
23640 break;
23641 case V2DI_FTYPE_V2DI_V16QI:
23642 type = v2di_ftype_v2di_v16qi;
23643 break;
23644 case V2DI_FTYPE_V2DF_V2DF:
23645 type = v2di_ftype_v2df_v2df;
23646 break;
23647 case V2DI_FTYPE_V2DI_SI_COUNT:
23648 type = v2di_ftype_v2di_int;
23649 break;
23650 case V2SI_FTYPE_V2SI_V2SI:
23651 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23652 type = v2si_ftype_v2si_v2si;
23653 break;
23654 case V2SI_FTYPE_V4HI_V4HI:
23655 type = v2si_ftype_v4hi_v4hi;
23656 break;
23657 case V2SI_FTYPE_V2SF_V2SF:
23658 type = v2si_ftype_v2sf_v2sf;
23659 break;
23660 case V2SI_FTYPE_V2SI_SI_COUNT:
23661 type = v2si_ftype_v2si_int;
23662 break;
23663 case V2DF_FTYPE_V2DF_V2DF:
23664 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23665 type = v2df_ftype_v2df_v2df;
23666 break;
23667 case V2DF_FTYPE_V2DF_V4SF:
23668 type = v2df_ftype_v2df_v4sf;
23669 break;
23670 case V2DF_FTYPE_V2DF_V2DI:
23671 type = v2df_ftype_v2df_v2di;
23672 break;
23673 case V2DF_FTYPE_V2DF_DI:
23674 type = v2df_ftype_v2df_int64;
23675 break;
23676 case V2DF_FTYPE_V2DF_SI:
23677 type = v2df_ftype_v2df_int;
23678 break;
23679 case V2SF_FTYPE_V2SF_V2SF:
23680 type = v2sf_ftype_v2sf_v2sf;
23681 break;
23682 case V1DI_FTYPE_V1DI_V1DI:
23683 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23684 type = v1di_ftype_v1di_v1di;
23685 break;
23686 case V1DI_FTYPE_V8QI_V8QI:
23687 type = v1di_ftype_v8qi_v8qi;
23688 break;
23689 case V1DI_FTYPE_V2SI_V2SI:
23690 type = v1di_ftype_v2si_v2si;
23691 break;
23692 case V1DI_FTYPE_V1DI_SI_COUNT:
23693 type = v1di_ftype_v1di_int;
23694 break;
23695 case UINT64_FTYPE_UINT64_UINT64:
23696 type = uint64_ftype_uint64_uint64;
23697 break;
23698 case UINT_FTYPE_UINT_UINT:
23699 type = unsigned_ftype_unsigned_unsigned;
23700 break;
23701 case UINT_FTYPE_UINT_USHORT:
23702 type = unsigned_ftype_unsigned_ushort;
23703 break;
23704 case UINT_FTYPE_UINT_UCHAR:
23705 type = unsigned_ftype_unsigned_uchar;
23706 break;
23707 case UINT16_FTYPE_UINT16_INT:
23708 type = ushort_ftype_ushort_int;
23709 break;
23710 case UINT8_FTYPE_UINT8_INT:
23711 type = uchar_ftype_uchar_int;
23712 break;
23713 case V8HI_FTYPE_V8HI_INT:
23714 type = v8hi_ftype_v8hi_int;
23715 break;
23716 case V8SF_FTYPE_V8SF_INT:
23717 type = v8sf_ftype_v8sf_int;
23718 break;
23719 case V4SI_FTYPE_V4SI_INT:
23720 type = v4si_ftype_v4si_int;
23721 break;
23722 case V4SI_FTYPE_V8SI_INT:
23723 type = v4si_ftype_v8si_int;
23724 break;
23725 case V4HI_FTYPE_V4HI_INT:
23726 type = v4hi_ftype_v4hi_int;
23727 break;
23728 case V4DF_FTYPE_V4DF_INT:
23729 type = v4df_ftype_v4df_int;
23730 break;
23731 case V4SF_FTYPE_V4SF_INT:
23732 type = v4sf_ftype_v4sf_int;
23733 break;
23734 case V4SF_FTYPE_V8SF_INT:
23735 type = v4sf_ftype_v8sf_int;
23736 break;
23737 case V2DI_FTYPE_V2DI_INT:
23738 case V2DI2TI_FTYPE_V2DI_INT:
23739 type = v2di_ftype_v2di_int;
23740 break;
23741 case V2DF_FTYPE_V2DF_INT:
23742 type = v2df_ftype_v2df_int;
23743 break;
23744 case V2DF_FTYPE_V4DF_INT:
23745 type = v2df_ftype_v4df_int;
23746 break;
23747 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23748 type = v16qi_ftype_v16qi_v16qi_v16qi;
23749 break;
23750 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23751 type = v8sf_ftype_v8sf_v8sf_v8sf;
23752 break;
23753 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23754 type = v4df_ftype_v4df_v4df_v4df;
23755 break;
23756 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23757 type = v4sf_ftype_v4sf_v4sf_v4sf;
23758 break;
23759 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23760 type = v2df_ftype_v2df_v2df_v2df;
23761 break;
23762 case V16QI_FTYPE_V16QI_V16QI_INT:
23763 type = v16qi_ftype_v16qi_v16qi_int;
23764 break;
23765 case V8SI_FTYPE_V8SI_V8SI_INT:
23766 type = v8si_ftype_v8si_v8si_int;
23767 break;
23768 case V8SI_FTYPE_V8SI_V4SI_INT:
23769 type = v8si_ftype_v8si_v4si_int;
23770 break;
23771 case V8HI_FTYPE_V8HI_V8HI_INT:
23772 type = v8hi_ftype_v8hi_v8hi_int;
23773 break;
23774 case V8SF_FTYPE_V8SF_V8SF_INT:
23775 type = v8sf_ftype_v8sf_v8sf_int;
23776 break;
23777 case V8SF_FTYPE_V8SF_V4SF_INT:
23778 type = v8sf_ftype_v8sf_v4sf_int;
23779 break;
23780 case V4SI_FTYPE_V4SI_V4SI_INT:
23781 type = v4si_ftype_v4si_v4si_int;
23782 break;
23783 case V4DF_FTYPE_V4DF_V4DF_INT:
23784 type = v4df_ftype_v4df_v4df_int;
23785 break;
23786 case V4DF_FTYPE_V4DF_V2DF_INT:
23787 type = v4df_ftype_v4df_v2df_int;
23788 break;
23789 case V4SF_FTYPE_V4SF_V4SF_INT:
23790 type = v4sf_ftype_v4sf_v4sf_int;
23791 break;
23792 case V2DI_FTYPE_V2DI_V2DI_INT:
23793 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23794 type = v2di_ftype_v2di_v2di_int;
23795 break;
23796 case V2DF_FTYPE_V2DF_V2DF_INT:
23797 type = v2df_ftype_v2df_v2df_int;
23798 break;
23799 case V2DI_FTYPE_V2DI_UINT_UINT:
23800 type = v2di_ftype_v2di_unsigned_unsigned;
23801 break;
23802 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23803 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23804 break;
23805 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23806 type = v1di_ftype_v1di_v1di_int;
23807 break;
23808 default:
23809 gcc_unreachable ();
23812 def_builtin_const (d->mask, d->name, type, d->code);
23815 /* pcmpestr[im] insns. */
23816 for (i = 0, d = bdesc_pcmpestr;
23817 i < ARRAY_SIZE (bdesc_pcmpestr);
23818 i++, d++)
23820 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23821 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23822 else
23823 ftype = int_ftype_v16qi_int_v16qi_int_int;
23824 def_builtin_const (d->mask, d->name, ftype, d->code);
23827 /* pcmpistr[im] insns. */
23828 for (i = 0, d = bdesc_pcmpistr;
23829 i < ARRAY_SIZE (bdesc_pcmpistr);
23830 i++, d++)
23832 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23833 ftype = v16qi_ftype_v16qi_v16qi_int;
23834 else
23835 ftype = int_ftype_v16qi_v16qi_int;
23836 def_builtin_const (d->mask, d->name, ftype, d->code);
23839 /* comi/ucomi insns. */
23840 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23841 if (d->mask == OPTION_MASK_ISA_SSE2)
23842 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23843 else
23844 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23846 /* SSE */
23847 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23848 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23850 /* SSE or 3DNow!A */
23851 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23853 /* SSE2 */
23854 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23856 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23857 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23859 /* SSE3. */
23860 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23861 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23863 /* AES */
23864 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23865 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23866 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23867 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23868 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23869 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23871 /* PCLMUL */
23872 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23874 /* AVX */
23875 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23876 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23878 /* Access to the vec_init patterns. */
23879 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23880 integer_type_node, NULL_TREE);
23881 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23883 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23884 short_integer_type_node,
23885 short_integer_type_node,
23886 short_integer_type_node, NULL_TREE);
23887 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23889 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23890 char_type_node, char_type_node,
23891 char_type_node, char_type_node,
23892 char_type_node, char_type_node,
23893 char_type_node, NULL_TREE);
23894 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23896 /* Access to the vec_extract patterns. */
23897 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23898 integer_type_node, NULL_TREE);
23899 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23901 ftype = build_function_type_list (long_long_integer_type_node,
23902 V2DI_type_node, integer_type_node,
23903 NULL_TREE);
23904 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23906 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23907 integer_type_node, NULL_TREE);
23908 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23910 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23911 integer_type_node, NULL_TREE);
23912 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23914 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23915 integer_type_node, NULL_TREE);
23916 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23918 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23919 integer_type_node, NULL_TREE);
23920 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23922 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23923 integer_type_node, NULL_TREE);
23924 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23926 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23927 integer_type_node, NULL_TREE);
23928 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23930 /* Access to the vec_set patterns. */
23931 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23932 intDI_type_node,
23933 integer_type_node, NULL_TREE);
23934 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23936 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23937 float_type_node,
23938 integer_type_node, NULL_TREE);
23939 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23941 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23942 intSI_type_node,
23943 integer_type_node, NULL_TREE);
23944 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23946 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23947 intHI_type_node,
23948 integer_type_node, NULL_TREE);
23949 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23951 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23952 intHI_type_node,
23953 integer_type_node, NULL_TREE);
23954 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23956 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23957 intQI_type_node,
23958 integer_type_node, NULL_TREE);
23959 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23961 /* Add SSE5 multi-arg argument instructions */
23962 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23964 tree mtype = NULL_TREE;
23966 if (d->name == 0)
23967 continue;
23969 switch ((enum multi_arg_type)d->flag)
23971 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23972 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23973 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23974 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23975 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23976 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23977 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23978 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23979 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23980 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23981 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23982 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23983 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23984 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23985 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23986 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23987 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23988 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23989 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23990 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23991 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23992 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23993 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23994 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23995 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23996 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23997 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23998 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23999 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
24000 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
24001 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
24002 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
24003 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
24004 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
24005 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
24006 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
24007 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
24008 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
24009 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
24010 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
24011 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
24012 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
24013 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
24014 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
24015 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
24016 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
24017 case MULTI_ARG_UNKNOWN:
24018 default:
24019 gcc_unreachable ();
24022 if (mtype)
24023 def_builtin_const (d->mask, d->name, mtype, d->code);
24027 /* Internal method for ix86_init_builtins. */
24029 static void
24030 ix86_init_builtins_va_builtins_abi (void)
24032 tree ms_va_ref, sysv_va_ref;
24033 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24034 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24035 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24036 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24038 if (!TARGET_64BIT)
24039 return;
24040 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24041 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24042 ms_va_ref = build_reference_type (ms_va_list_type_node);
24043 sysv_va_ref =
24044 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24046 fnvoid_va_end_ms =
24047 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24048 fnvoid_va_start_ms =
24049 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24050 fnvoid_va_end_sysv =
24051 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24052 fnvoid_va_start_sysv =
24053 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24054 NULL_TREE);
24055 fnvoid_va_copy_ms =
24056 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24057 NULL_TREE);
24058 fnvoid_va_copy_sysv =
24059 build_function_type_list (void_type_node, sysv_va_ref,
24060 sysv_va_ref, NULL_TREE);
24062 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24063 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24064 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24065 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24066 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24067 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24068 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24069 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24070 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24071 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24072 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24073 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24076 static void
24077 ix86_init_builtins (void)
24079 tree float128_type_node = make_node (REAL_TYPE);
24080 tree ftype, decl;
24082 /* The __float80 type. */
24083 if (TYPE_MODE (long_double_type_node) == XFmode)
24084 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
24085 "__float80");
24086 else
24088 /* The __float80 type. */
24089 tree float80_type_node = make_node (REAL_TYPE);
24091 TYPE_PRECISION (float80_type_node) = 80;
24092 layout_type (float80_type_node);
24093 (*lang_hooks.types.register_builtin_type) (float80_type_node,
24094 "__float80");
24097 /* The __float128 type. */
24098 TYPE_PRECISION (float128_type_node) = 128;
24099 layout_type (float128_type_node);
24100 (*lang_hooks.types.register_builtin_type) (float128_type_node,
24101 "__float128");
24103 /* TFmode support builtins. */
24104 ftype = build_function_type (float128_type_node, void_list_node);
24105 decl = add_builtin_function ("__builtin_infq", ftype,
24106 IX86_BUILTIN_INFQ, BUILT_IN_MD,
24107 NULL, NULL_TREE);
24108 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
24110 decl = add_builtin_function ("__builtin_huge_valq", ftype,
24111 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
24112 NULL, NULL_TREE);
24113 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
24115 /* We will expand them to normal call if SSE2 isn't available since
24116 they are used by libgcc. */
24117 ftype = build_function_type_list (float128_type_node,
24118 float128_type_node,
24119 NULL_TREE);
24120 decl = add_builtin_function ("__builtin_fabsq", ftype,
24121 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
24122 "__fabstf2", NULL_TREE);
24123 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
24124 TREE_READONLY (decl) = 1;
24126 ftype = build_function_type_list (float128_type_node,
24127 float128_type_node,
24128 float128_type_node,
24129 NULL_TREE);
24130 decl = add_builtin_function ("__builtin_copysignq", ftype,
24131 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
24132 "__copysigntf3", NULL_TREE);
24133 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
24134 TREE_READONLY (decl) = 1;
24136 ix86_init_mmx_sse_builtins ();
24137 if (TARGET_64BIT)
24138 ix86_init_builtins_va_builtins_abi ();
24141 /* Errors in the source file can cause expand_expr to return const0_rtx
24142 where we expect a vector. To avoid crashing, use one of the vector
24143 clear instructions. */
24144 static rtx
24145 safe_vector_operand (rtx x, enum machine_mode mode)
24147 if (x == const0_rtx)
24148 x = CONST0_RTX (mode);
24149 return x;
24152 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24154 static rtx
24155 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24157 rtx pat;
24158 tree arg0 = CALL_EXPR_ARG (exp, 0);
24159 tree arg1 = CALL_EXPR_ARG (exp, 1);
24160 rtx op0 = expand_normal (arg0);
24161 rtx op1 = expand_normal (arg1);
24162 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24163 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24164 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24166 if (VECTOR_MODE_P (mode0))
24167 op0 = safe_vector_operand (op0, mode0);
24168 if (VECTOR_MODE_P (mode1))
24169 op1 = safe_vector_operand (op1, mode1);
24171 if (optimize || !target
24172 || GET_MODE (target) != tmode
24173 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24174 target = gen_reg_rtx (tmode);
24176 if (GET_MODE (op1) == SImode && mode1 == TImode)
24178 rtx x = gen_reg_rtx (V4SImode);
24179 emit_insn (gen_sse2_loadd (x, op1));
24180 op1 = gen_lowpart (TImode, x);
24183 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24184 op0 = copy_to_mode_reg (mode0, op0);
24185 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24186 op1 = copy_to_mode_reg (mode1, op1);
24188 pat = GEN_FCN (icode) (target, op0, op1);
24189 if (! pat)
24190 return 0;
24192 emit_insn (pat);
24194 return target;
24197 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24199 static rtx
24200 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24201 enum multi_arg_type m_type,
24202 enum rtx_code sub_code)
24204 rtx pat;
24205 int i;
24206 int nargs;
24207 bool comparison_p = false;
24208 bool tf_p = false;
24209 bool last_arg_constant = false;
24210 int num_memory = 0;
24211 struct {
24212 rtx op;
24213 enum machine_mode mode;
24214 } args[4];
24216 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24218 switch (m_type)
24220 case MULTI_ARG_3_SF:
24221 case MULTI_ARG_3_DF:
24222 case MULTI_ARG_3_DI:
24223 case MULTI_ARG_3_SI:
24224 case MULTI_ARG_3_SI_DI:
24225 case MULTI_ARG_3_HI:
24226 case MULTI_ARG_3_HI_SI:
24227 case MULTI_ARG_3_QI:
24228 case MULTI_ARG_3_PERMPS:
24229 case MULTI_ARG_3_PERMPD:
24230 nargs = 3;
24231 break;
24233 case MULTI_ARG_2_SF:
24234 case MULTI_ARG_2_DF:
24235 case MULTI_ARG_2_DI:
24236 case MULTI_ARG_2_SI:
24237 case MULTI_ARG_2_HI:
24238 case MULTI_ARG_2_QI:
24239 nargs = 2;
24240 break;
24242 case MULTI_ARG_2_DI_IMM:
24243 case MULTI_ARG_2_SI_IMM:
24244 case MULTI_ARG_2_HI_IMM:
24245 case MULTI_ARG_2_QI_IMM:
24246 nargs = 2;
24247 last_arg_constant = true;
24248 break;
24250 case MULTI_ARG_1_SF:
24251 case MULTI_ARG_1_DF:
24252 case MULTI_ARG_1_DI:
24253 case MULTI_ARG_1_SI:
24254 case MULTI_ARG_1_HI:
24255 case MULTI_ARG_1_QI:
24256 case MULTI_ARG_1_SI_DI:
24257 case MULTI_ARG_1_HI_DI:
24258 case MULTI_ARG_1_HI_SI:
24259 case MULTI_ARG_1_QI_DI:
24260 case MULTI_ARG_1_QI_SI:
24261 case MULTI_ARG_1_QI_HI:
24262 case MULTI_ARG_1_PH2PS:
24263 case MULTI_ARG_1_PS2PH:
24264 nargs = 1;
24265 break;
24267 case MULTI_ARG_2_SF_CMP:
24268 case MULTI_ARG_2_DF_CMP:
24269 case MULTI_ARG_2_DI_CMP:
24270 case MULTI_ARG_2_SI_CMP:
24271 case MULTI_ARG_2_HI_CMP:
24272 case MULTI_ARG_2_QI_CMP:
24273 nargs = 2;
24274 comparison_p = true;
24275 break;
24277 case MULTI_ARG_2_SF_TF:
24278 case MULTI_ARG_2_DF_TF:
24279 case MULTI_ARG_2_DI_TF:
24280 case MULTI_ARG_2_SI_TF:
24281 case MULTI_ARG_2_HI_TF:
24282 case MULTI_ARG_2_QI_TF:
24283 nargs = 2;
24284 tf_p = true;
24285 break;
24287 case MULTI_ARG_UNKNOWN:
24288 default:
24289 gcc_unreachable ();
24292 if (optimize || !target
24293 || GET_MODE (target) != tmode
24294 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24295 target = gen_reg_rtx (tmode);
24297 gcc_assert (nargs <= 4);
24299 for (i = 0; i < nargs; i++)
24301 tree arg = CALL_EXPR_ARG (exp, i);
24302 rtx op = expand_normal (arg);
24303 int adjust = (comparison_p) ? 1 : 0;
24304 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24306 if (last_arg_constant && i == nargs-1)
24308 if (!CONST_INT_P (op))
24310 error ("last argument must be an immediate");
24311 return gen_reg_rtx (tmode);
24314 else
24316 if (VECTOR_MODE_P (mode))
24317 op = safe_vector_operand (op, mode);
24319 /* If we aren't optimizing, only allow one memory operand to be
24320 generated. */
24321 if (memory_operand (op, mode))
24322 num_memory++;
24324 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24326 if (optimize
24327 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24328 || num_memory > 1)
24329 op = force_reg (mode, op);
24332 args[i].op = op;
24333 args[i].mode = mode;
24336 switch (nargs)
24338 case 1:
24339 pat = GEN_FCN (icode) (target, args[0].op);
24340 break;
24342 case 2:
24343 if (tf_p)
24344 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24345 GEN_INT ((int)sub_code));
24346 else if (! comparison_p)
24347 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24348 else
24350 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24351 args[0].op,
24352 args[1].op);
24354 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24356 break;
24358 case 3:
24359 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24360 break;
24362 default:
24363 gcc_unreachable ();
24366 if (! pat)
24367 return 0;
24369 emit_insn (pat);
24370 return target;
24373 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24374 insns with vec_merge. */
24376 static rtx
24377 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24378 rtx target)
24380 rtx pat;
24381 tree arg0 = CALL_EXPR_ARG (exp, 0);
24382 rtx op1, op0 = expand_normal (arg0);
24383 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24384 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24386 if (optimize || !target
24387 || GET_MODE (target) != tmode
24388 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24389 target = gen_reg_rtx (tmode);
24391 if (VECTOR_MODE_P (mode0))
24392 op0 = safe_vector_operand (op0, mode0);
24394 if ((optimize && !register_operand (op0, mode0))
24395 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24396 op0 = copy_to_mode_reg (mode0, op0);
24398 op1 = op0;
24399 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24400 op1 = copy_to_mode_reg (mode0, op1);
24402 pat = GEN_FCN (icode) (target, op0, op1);
24403 if (! pat)
24404 return 0;
24405 emit_insn (pat);
24406 return target;
24409 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24411 static rtx
24412 ix86_expand_sse_compare (const struct builtin_description *d,
24413 tree exp, rtx target, bool swap)
24415 rtx pat;
24416 tree arg0 = CALL_EXPR_ARG (exp, 0);
24417 tree arg1 = CALL_EXPR_ARG (exp, 1);
24418 rtx op0 = expand_normal (arg0);
24419 rtx op1 = expand_normal (arg1);
24420 rtx op2;
24421 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24422 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24423 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24424 enum rtx_code comparison = d->comparison;
24426 if (VECTOR_MODE_P (mode0))
24427 op0 = safe_vector_operand (op0, mode0);
24428 if (VECTOR_MODE_P (mode1))
24429 op1 = safe_vector_operand (op1, mode1);
24431 /* Swap operands if we have a comparison that isn't available in
24432 hardware. */
24433 if (swap)
24435 rtx tmp = gen_reg_rtx (mode1);
24436 emit_move_insn (tmp, op1);
24437 op1 = op0;
24438 op0 = tmp;
24441 if (optimize || !target
24442 || GET_MODE (target) != tmode
24443 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24444 target = gen_reg_rtx (tmode);
24446 if ((optimize && !register_operand (op0, mode0))
24447 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24448 op0 = copy_to_mode_reg (mode0, op0);
24449 if ((optimize && !register_operand (op1, mode1))
24450 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24451 op1 = copy_to_mode_reg (mode1, op1);
24453 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24454 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24455 if (! pat)
24456 return 0;
24457 emit_insn (pat);
24458 return target;
24461 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24463 static rtx
24464 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24465 rtx target)
24467 rtx pat;
24468 tree arg0 = CALL_EXPR_ARG (exp, 0);
24469 tree arg1 = CALL_EXPR_ARG (exp, 1);
24470 rtx op0 = expand_normal (arg0);
24471 rtx op1 = expand_normal (arg1);
24472 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24473 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24474 enum rtx_code comparison = d->comparison;
24476 if (VECTOR_MODE_P (mode0))
24477 op0 = safe_vector_operand (op0, mode0);
24478 if (VECTOR_MODE_P (mode1))
24479 op1 = safe_vector_operand (op1, mode1);
24481 /* Swap operands if we have a comparison that isn't available in
24482 hardware. */
24483 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24485 rtx tmp = op1;
24486 op1 = op0;
24487 op0 = tmp;
24490 target = gen_reg_rtx (SImode);
24491 emit_move_insn (target, const0_rtx);
24492 target = gen_rtx_SUBREG (QImode, target, 0);
24494 if ((optimize && !register_operand (op0, mode0))
24495 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24496 op0 = copy_to_mode_reg (mode0, op0);
24497 if ((optimize && !register_operand (op1, mode1))
24498 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24499 op1 = copy_to_mode_reg (mode1, op1);
24501 pat = GEN_FCN (d->icode) (op0, op1);
24502 if (! pat)
24503 return 0;
24504 emit_insn (pat);
24505 emit_insn (gen_rtx_SET (VOIDmode,
24506 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24507 gen_rtx_fmt_ee (comparison, QImode,
24508 SET_DEST (pat),
24509 const0_rtx)));
24511 return SUBREG_REG (target);
24514 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24516 static rtx
24517 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24518 rtx target)
24520 rtx pat;
24521 tree arg0 = CALL_EXPR_ARG (exp, 0);
24522 tree arg1 = CALL_EXPR_ARG (exp, 1);
24523 rtx op0 = expand_normal (arg0);
24524 rtx op1 = expand_normal (arg1);
24525 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24526 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24527 enum rtx_code comparison = d->comparison;
24529 if (VECTOR_MODE_P (mode0))
24530 op0 = safe_vector_operand (op0, mode0);
24531 if (VECTOR_MODE_P (mode1))
24532 op1 = safe_vector_operand (op1, mode1);
24534 target = gen_reg_rtx (SImode);
24535 emit_move_insn (target, const0_rtx);
24536 target = gen_rtx_SUBREG (QImode, target, 0);
24538 if ((optimize && !register_operand (op0, mode0))
24539 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24540 op0 = copy_to_mode_reg (mode0, op0);
24541 if ((optimize && !register_operand (op1, mode1))
24542 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24543 op1 = copy_to_mode_reg (mode1, op1);
24545 pat = GEN_FCN (d->icode) (op0, op1);
24546 if (! pat)
24547 return 0;
24548 emit_insn (pat);
24549 emit_insn (gen_rtx_SET (VOIDmode,
24550 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24551 gen_rtx_fmt_ee (comparison, QImode,
24552 SET_DEST (pat),
24553 const0_rtx)));
24555 return SUBREG_REG (target);
24558 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24560 static rtx
24561 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24562 tree exp, rtx target)
24564 rtx pat;
24565 tree arg0 = CALL_EXPR_ARG (exp, 0);
24566 tree arg1 = CALL_EXPR_ARG (exp, 1);
24567 tree arg2 = CALL_EXPR_ARG (exp, 2);
24568 tree arg3 = CALL_EXPR_ARG (exp, 3);
24569 tree arg4 = CALL_EXPR_ARG (exp, 4);
24570 rtx scratch0, scratch1;
24571 rtx op0 = expand_normal (arg0);
24572 rtx op1 = expand_normal (arg1);
24573 rtx op2 = expand_normal (arg2);
24574 rtx op3 = expand_normal (arg3);
24575 rtx op4 = expand_normal (arg4);
24576 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24578 tmode0 = insn_data[d->icode].operand[0].mode;
24579 tmode1 = insn_data[d->icode].operand[1].mode;
24580 modev2 = insn_data[d->icode].operand[2].mode;
24581 modei3 = insn_data[d->icode].operand[3].mode;
24582 modev4 = insn_data[d->icode].operand[4].mode;
24583 modei5 = insn_data[d->icode].operand[5].mode;
24584 modeimm = insn_data[d->icode].operand[6].mode;
24586 if (VECTOR_MODE_P (modev2))
24587 op0 = safe_vector_operand (op0, modev2);
24588 if (VECTOR_MODE_P (modev4))
24589 op2 = safe_vector_operand (op2, modev4);
24591 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24592 op0 = copy_to_mode_reg (modev2, op0);
24593 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24594 op1 = copy_to_mode_reg (modei3, op1);
24595 if ((optimize && !register_operand (op2, modev4))
24596 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24597 op2 = copy_to_mode_reg (modev4, op2);
24598 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24599 op3 = copy_to_mode_reg (modei5, op3);
24601 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24603 error ("the fifth argument must be a 8-bit immediate");
24604 return const0_rtx;
24607 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24609 if (optimize || !target
24610 || GET_MODE (target) != tmode0
24611 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24612 target = gen_reg_rtx (tmode0);
24614 scratch1 = gen_reg_rtx (tmode1);
24616 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24618 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24620 if (optimize || !target
24621 || GET_MODE (target) != tmode1
24622 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24623 target = gen_reg_rtx (tmode1);
24625 scratch0 = gen_reg_rtx (tmode0);
24627 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24629 else
24631 gcc_assert (d->flag);
24633 scratch0 = gen_reg_rtx (tmode0);
24634 scratch1 = gen_reg_rtx (tmode1);
24636 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24639 if (! pat)
24640 return 0;
24642 emit_insn (pat);
24644 if (d->flag)
24646 target = gen_reg_rtx (SImode);
24647 emit_move_insn (target, const0_rtx);
24648 target = gen_rtx_SUBREG (QImode, target, 0);
24650 emit_insn
24651 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24652 gen_rtx_fmt_ee (EQ, QImode,
24653 gen_rtx_REG ((enum machine_mode) d->flag,
24654 FLAGS_REG),
24655 const0_rtx)));
24656 return SUBREG_REG (target);
24658 else
24659 return target;
24663 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24665 static rtx
24666 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24667 tree exp, rtx target)
24669 rtx pat;
24670 tree arg0 = CALL_EXPR_ARG (exp, 0);
24671 tree arg1 = CALL_EXPR_ARG (exp, 1);
24672 tree arg2 = CALL_EXPR_ARG (exp, 2);
24673 rtx scratch0, scratch1;
24674 rtx op0 = expand_normal (arg0);
24675 rtx op1 = expand_normal (arg1);
24676 rtx op2 = expand_normal (arg2);
24677 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24679 tmode0 = insn_data[d->icode].operand[0].mode;
24680 tmode1 = insn_data[d->icode].operand[1].mode;
24681 modev2 = insn_data[d->icode].operand[2].mode;
24682 modev3 = insn_data[d->icode].operand[3].mode;
24683 modeimm = insn_data[d->icode].operand[4].mode;
24685 if (VECTOR_MODE_P (modev2))
24686 op0 = safe_vector_operand (op0, modev2);
24687 if (VECTOR_MODE_P (modev3))
24688 op1 = safe_vector_operand (op1, modev3);
24690 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24691 op0 = copy_to_mode_reg (modev2, op0);
24692 if ((optimize && !register_operand (op1, modev3))
24693 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24694 op1 = copy_to_mode_reg (modev3, op1);
24696 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24698 error ("the third argument must be a 8-bit immediate");
24699 return const0_rtx;
24702 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24704 if (optimize || !target
24705 || GET_MODE (target) != tmode0
24706 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24707 target = gen_reg_rtx (tmode0);
24709 scratch1 = gen_reg_rtx (tmode1);
24711 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24713 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24715 if (optimize || !target
24716 || GET_MODE (target) != tmode1
24717 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24718 target = gen_reg_rtx (tmode1);
24720 scratch0 = gen_reg_rtx (tmode0);
24722 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24724 else
24726 gcc_assert (d->flag);
24728 scratch0 = gen_reg_rtx (tmode0);
24729 scratch1 = gen_reg_rtx (tmode1);
24731 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24734 if (! pat)
24735 return 0;
24737 emit_insn (pat);
24739 if (d->flag)
24741 target = gen_reg_rtx (SImode);
24742 emit_move_insn (target, const0_rtx);
24743 target = gen_rtx_SUBREG (QImode, target, 0);
24745 emit_insn
24746 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24747 gen_rtx_fmt_ee (EQ, QImode,
24748 gen_rtx_REG ((enum machine_mode) d->flag,
24749 FLAGS_REG),
24750 const0_rtx)));
24751 return SUBREG_REG (target);
24753 else
24754 return target;
24757 /* Subroutine of ix86_expand_builtin to take care of insns with
24758 variable number of operands. */
24760 static rtx
24761 ix86_expand_args_builtin (const struct builtin_description *d,
24762 tree exp, rtx target)
24764 rtx pat, real_target;
24765 unsigned int i, nargs;
24766 unsigned int nargs_constant = 0;
24767 int num_memory = 0;
24768 struct
24770 rtx op;
24771 enum machine_mode mode;
24772 } args[4];
24773 bool last_arg_count = false;
24774 enum insn_code icode = d->icode;
24775 const struct insn_data *insn_p = &insn_data[icode];
24776 enum machine_mode tmode = insn_p->operand[0].mode;
24777 enum machine_mode rmode = VOIDmode;
24778 bool swap = false;
24779 enum rtx_code comparison = d->comparison;
24781 switch ((enum ix86_builtin_type) d->flag)
24783 case INT_FTYPE_V8SF_V8SF_PTEST:
24784 case INT_FTYPE_V4DI_V4DI_PTEST:
24785 case INT_FTYPE_V4DF_V4DF_PTEST:
24786 case INT_FTYPE_V4SF_V4SF_PTEST:
24787 case INT_FTYPE_V2DI_V2DI_PTEST:
24788 case INT_FTYPE_V2DF_V2DF_PTEST:
24789 return ix86_expand_sse_ptest (d, exp, target);
24790 case FLOAT128_FTYPE_FLOAT128:
24791 case FLOAT_FTYPE_FLOAT:
24792 case INT_FTYPE_INT:
24793 case UINT64_FTYPE_INT:
24794 case INT64_FTYPE_INT64:
24795 case INT64_FTYPE_V4SF:
24796 case INT64_FTYPE_V2DF:
24797 case INT_FTYPE_V16QI:
24798 case INT_FTYPE_V8QI:
24799 case INT_FTYPE_V8SF:
24800 case INT_FTYPE_V4DF:
24801 case INT_FTYPE_V4SF:
24802 case INT_FTYPE_V2DF:
24803 case V16QI_FTYPE_V16QI:
24804 case V8SI_FTYPE_V8SF:
24805 case V8SI_FTYPE_V4SI:
24806 case V8HI_FTYPE_V8HI:
24807 case V8HI_FTYPE_V16QI:
24808 case V8QI_FTYPE_V8QI:
24809 case V8SF_FTYPE_V8SF:
24810 case V8SF_FTYPE_V8SI:
24811 case V8SF_FTYPE_V4SF:
24812 case V4SI_FTYPE_V4SI:
24813 case V4SI_FTYPE_V16QI:
24814 case V4SI_FTYPE_V4SF:
24815 case V4SI_FTYPE_V8SI:
24816 case V4SI_FTYPE_V8HI:
24817 case V4SI_FTYPE_V4DF:
24818 case V4SI_FTYPE_V2DF:
24819 case V4HI_FTYPE_V4HI:
24820 case V4DF_FTYPE_V4DF:
24821 case V4DF_FTYPE_V4SI:
24822 case V4DF_FTYPE_V4SF:
24823 case V4DF_FTYPE_V2DF:
24824 case V4SF_FTYPE_V4SF:
24825 case V4SF_FTYPE_V4SI:
24826 case V4SF_FTYPE_V8SF:
24827 case V4SF_FTYPE_V4DF:
24828 case V4SF_FTYPE_V2DF:
24829 case V2DI_FTYPE_V2DI:
24830 case V2DI_FTYPE_V16QI:
24831 case V2DI_FTYPE_V8HI:
24832 case V2DI_FTYPE_V4SI:
24833 case V2DF_FTYPE_V2DF:
24834 case V2DF_FTYPE_V4SI:
24835 case V2DF_FTYPE_V4DF:
24836 case V2DF_FTYPE_V4SF:
24837 case V2DF_FTYPE_V2SI:
24838 case V2SI_FTYPE_V2SI:
24839 case V2SI_FTYPE_V4SF:
24840 case V2SI_FTYPE_V2SF:
24841 case V2SI_FTYPE_V2DF:
24842 case V2SF_FTYPE_V2SF:
24843 case V2SF_FTYPE_V2SI:
24844 nargs = 1;
24845 break;
24846 case V4SF_FTYPE_V4SF_VEC_MERGE:
24847 case V2DF_FTYPE_V2DF_VEC_MERGE:
24848 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24849 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24850 case V16QI_FTYPE_V16QI_V16QI:
24851 case V16QI_FTYPE_V8HI_V8HI:
24852 case V8QI_FTYPE_V8QI_V8QI:
24853 case V8QI_FTYPE_V4HI_V4HI:
24854 case V8HI_FTYPE_V8HI_V8HI:
24855 case V8HI_FTYPE_V16QI_V16QI:
24856 case V8HI_FTYPE_V4SI_V4SI:
24857 case V8SF_FTYPE_V8SF_V8SF:
24858 case V8SF_FTYPE_V8SF_V8SI:
24859 case V4SI_FTYPE_V4SI_V4SI:
24860 case V4SI_FTYPE_V8HI_V8HI:
24861 case V4SI_FTYPE_V4SF_V4SF:
24862 case V4SI_FTYPE_V2DF_V2DF:
24863 case V4HI_FTYPE_V4HI_V4HI:
24864 case V4HI_FTYPE_V8QI_V8QI:
24865 case V4HI_FTYPE_V2SI_V2SI:
24866 case V4DF_FTYPE_V4DF_V4DF:
24867 case V4DF_FTYPE_V4DF_V4DI:
24868 case V4SF_FTYPE_V4SF_V4SF:
24869 case V4SF_FTYPE_V4SF_V4SI:
24870 case V4SF_FTYPE_V4SF_V2SI:
24871 case V4SF_FTYPE_V4SF_V2DF:
24872 case V4SF_FTYPE_V4SF_DI:
24873 case V4SF_FTYPE_V4SF_SI:
24874 case V2DI_FTYPE_V2DI_V2DI:
24875 case V2DI_FTYPE_V16QI_V16QI:
24876 case V2DI_FTYPE_V4SI_V4SI:
24877 case V2DI_FTYPE_V2DI_V16QI:
24878 case V2DI_FTYPE_V2DF_V2DF:
24879 case V2SI_FTYPE_V2SI_V2SI:
24880 case V2SI_FTYPE_V4HI_V4HI:
24881 case V2SI_FTYPE_V2SF_V2SF:
24882 case V2DF_FTYPE_V2DF_V2DF:
24883 case V2DF_FTYPE_V2DF_V4SF:
24884 case V2DF_FTYPE_V2DF_V2DI:
24885 case V2DF_FTYPE_V2DF_DI:
24886 case V2DF_FTYPE_V2DF_SI:
24887 case V2SF_FTYPE_V2SF_V2SF:
24888 case V1DI_FTYPE_V1DI_V1DI:
24889 case V1DI_FTYPE_V8QI_V8QI:
24890 case V1DI_FTYPE_V2SI_V2SI:
24891 if (comparison == UNKNOWN)
24892 return ix86_expand_binop_builtin (icode, exp, target);
24893 nargs = 2;
24894 break;
24895 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24896 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24897 gcc_assert (comparison != UNKNOWN);
24898 nargs = 2;
24899 swap = true;
24900 break;
24901 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24902 case V8HI_FTYPE_V8HI_SI_COUNT:
24903 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24904 case V4SI_FTYPE_V4SI_SI_COUNT:
24905 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24906 case V4HI_FTYPE_V4HI_SI_COUNT:
24907 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24908 case V2DI_FTYPE_V2DI_SI_COUNT:
24909 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24910 case V2SI_FTYPE_V2SI_SI_COUNT:
24911 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24912 case V1DI_FTYPE_V1DI_SI_COUNT:
24913 nargs = 2;
24914 last_arg_count = true;
24915 break;
24916 case UINT64_FTYPE_UINT64_UINT64:
24917 case UINT_FTYPE_UINT_UINT:
24918 case UINT_FTYPE_UINT_USHORT:
24919 case UINT_FTYPE_UINT_UCHAR:
24920 case UINT16_FTYPE_UINT16_INT:
24921 case UINT8_FTYPE_UINT8_INT:
24922 nargs = 2;
24923 break;
24924 case V2DI2TI_FTYPE_V2DI_INT:
24925 nargs = 2;
24926 rmode = V2DImode;
24927 nargs_constant = 1;
24928 break;
24929 case V8HI_FTYPE_V8HI_INT:
24930 case V8SF_FTYPE_V8SF_INT:
24931 case V4SI_FTYPE_V4SI_INT:
24932 case V4SI_FTYPE_V8SI_INT:
24933 case V4HI_FTYPE_V4HI_INT:
24934 case V4DF_FTYPE_V4DF_INT:
24935 case V4SF_FTYPE_V4SF_INT:
24936 case V4SF_FTYPE_V8SF_INT:
24937 case V2DI_FTYPE_V2DI_INT:
24938 case V2DF_FTYPE_V2DF_INT:
24939 case V2DF_FTYPE_V4DF_INT:
24940 nargs = 2;
24941 nargs_constant = 1;
24942 break;
24943 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24944 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24945 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24946 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24947 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24948 nargs = 3;
24949 break;
24950 case V16QI_FTYPE_V16QI_V16QI_INT:
24951 case V8HI_FTYPE_V8HI_V8HI_INT:
24952 case V8SI_FTYPE_V8SI_V8SI_INT:
24953 case V8SI_FTYPE_V8SI_V4SI_INT:
24954 case V8SF_FTYPE_V8SF_V8SF_INT:
24955 case V8SF_FTYPE_V8SF_V4SF_INT:
24956 case V4SI_FTYPE_V4SI_V4SI_INT:
24957 case V4DF_FTYPE_V4DF_V4DF_INT:
24958 case V4DF_FTYPE_V4DF_V2DF_INT:
24959 case V4SF_FTYPE_V4SF_V4SF_INT:
24960 case V2DI_FTYPE_V2DI_V2DI_INT:
24961 case V2DF_FTYPE_V2DF_V2DF_INT:
24962 nargs = 3;
24963 nargs_constant = 1;
24964 break;
24965 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24966 nargs = 3;
24967 rmode = V2DImode;
24968 nargs_constant = 1;
24969 break;
24970 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24971 nargs = 3;
24972 rmode = DImode;
24973 nargs_constant = 1;
24974 break;
24975 case V2DI_FTYPE_V2DI_UINT_UINT:
24976 nargs = 3;
24977 nargs_constant = 2;
24978 break;
24979 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24980 nargs = 4;
24981 nargs_constant = 2;
24982 break;
24983 default:
24984 gcc_unreachable ();
24987 gcc_assert (nargs <= ARRAY_SIZE (args));
24989 if (comparison != UNKNOWN)
24991 gcc_assert (nargs == 2);
24992 return ix86_expand_sse_compare (d, exp, target, swap);
24995 if (rmode == VOIDmode || rmode == tmode)
24997 if (optimize
24998 || target == 0
24999 || GET_MODE (target) != tmode
25000 || ! (*insn_p->operand[0].predicate) (target, tmode))
25001 target = gen_reg_rtx (tmode);
25002 real_target = target;
25004 else
25006 target = gen_reg_rtx (rmode);
25007 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25010 for (i = 0; i < nargs; i++)
25012 tree arg = CALL_EXPR_ARG (exp, i);
25013 rtx op = expand_normal (arg);
25014 enum machine_mode mode = insn_p->operand[i + 1].mode;
25015 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
25017 if (last_arg_count && (i + 1) == nargs)
25019 /* SIMD shift insns take either an 8-bit immediate or
25020 register as count. But builtin functions take int as
25021 count. If count doesn't match, we put it in register. */
25022 if (!match)
25024 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25025 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
25026 op = copy_to_reg (op);
25029 else if ((nargs - i) <= nargs_constant)
25031 if (!match)
25032 switch (icode)
25034 case CODE_FOR_sse4_1_roundpd:
25035 case CODE_FOR_sse4_1_roundps:
25036 case CODE_FOR_sse4_1_roundsd:
25037 case CODE_FOR_sse4_1_roundss:
25038 case CODE_FOR_sse4_1_blendps:
25039 case CODE_FOR_avx_blendpd256:
25040 case CODE_FOR_avx_vpermilv4df:
25041 case CODE_FOR_avx_roundpd256:
25042 case CODE_FOR_avx_roundps256:
25043 error ("the last argument must be a 4-bit immediate");
25044 return const0_rtx;
25046 case CODE_FOR_sse4_1_blendpd:
25047 case CODE_FOR_avx_vpermilv2df:
25048 error ("the last argument must be a 2-bit immediate");
25049 return const0_rtx;
25051 case CODE_FOR_avx_vextractf128v4df:
25052 case CODE_FOR_avx_vextractf128v8sf:
25053 case CODE_FOR_avx_vextractf128v8si:
25054 case CODE_FOR_avx_vinsertf128v4df:
25055 case CODE_FOR_avx_vinsertf128v8sf:
25056 case CODE_FOR_avx_vinsertf128v8si:
25057 error ("the last argument must be a 1-bit immediate");
25058 return const0_rtx;
25060 case CODE_FOR_avx_cmpsdv2df3:
25061 case CODE_FOR_avx_cmpssv4sf3:
25062 case CODE_FOR_avx_cmppdv2df3:
25063 case CODE_FOR_avx_cmppsv4sf3:
25064 case CODE_FOR_avx_cmppdv4df3:
25065 case CODE_FOR_avx_cmppsv8sf3:
25066 error ("the last argument must be a 5-bit immediate");
25067 return const0_rtx;
25069 default:
25070 switch (nargs_constant)
25072 case 2:
25073 if ((nargs - i) == nargs_constant)
25075 error ("the next to last argument must be an 8-bit immediate");
25076 break;
25078 case 1:
25079 error ("the last argument must be an 8-bit immediate");
25080 break;
25081 default:
25082 gcc_unreachable ();
25084 return const0_rtx;
25087 else
25089 if (VECTOR_MODE_P (mode))
25090 op = safe_vector_operand (op, mode);
25092 /* If we aren't optimizing, only allow one memory operand to
25093 be generated. */
25094 if (memory_operand (op, mode))
25095 num_memory++;
25097 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25099 if (optimize || !match || num_memory > 1)
25100 op = copy_to_mode_reg (mode, op);
25102 else
25104 op = copy_to_reg (op);
25105 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25109 args[i].op = op;
25110 args[i].mode = mode;
25113 switch (nargs)
25115 case 1:
25116 pat = GEN_FCN (icode) (real_target, args[0].op);
25117 break;
25118 case 2:
25119 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25120 break;
25121 case 3:
25122 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25123 args[2].op);
25124 break;
25125 case 4:
25126 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25127 args[2].op, args[3].op);
25128 break;
25129 default:
25130 gcc_unreachable ();
25133 if (! pat)
25134 return 0;
25136 emit_insn (pat);
25137 return target;
25140 /* Subroutine of ix86_expand_builtin to take care of special insns
25141 with variable number of operands. */
25143 static rtx
25144 ix86_expand_special_args_builtin (const struct builtin_description *d,
25145 tree exp, rtx target)
25147 tree arg;
25148 rtx pat, op;
25149 unsigned int i, nargs, arg_adjust, memory;
25150 struct
25152 rtx op;
25153 enum machine_mode mode;
25154 } args[2];
25155 enum insn_code icode = d->icode;
25156 bool last_arg_constant = false;
25157 const struct insn_data *insn_p = &insn_data[icode];
25158 enum machine_mode tmode = insn_p->operand[0].mode;
25159 enum { load, store } klass;
25161 switch ((enum ix86_special_builtin_type) d->flag)
25163 case VOID_FTYPE_VOID:
25164 emit_insn (GEN_FCN (icode) (target));
25165 return 0;
25166 case UINT64_FTYPE_VOID:
25167 nargs = 0;
25168 klass = load;
25169 memory = 0;
25170 break;
25171 case UINT64_FTYPE_PUNSIGNED:
25172 case V2DI_FTYPE_PV2DI:
25173 case V32QI_FTYPE_PCCHAR:
25174 case V16QI_FTYPE_PCCHAR:
25175 case V8SF_FTYPE_PCV4SF:
25176 case V8SF_FTYPE_PCFLOAT:
25177 case V4SF_FTYPE_PCFLOAT:
25178 case V4DF_FTYPE_PCV2DF:
25179 case V4DF_FTYPE_PCDOUBLE:
25180 case V2DF_FTYPE_PCDOUBLE:
25181 nargs = 1;
25182 klass = load;
25183 memory = 0;
25184 break;
25185 case VOID_FTYPE_PV2SF_V4SF:
25186 case VOID_FTYPE_PV4DI_V4DI:
25187 case VOID_FTYPE_PV2DI_V2DI:
25188 case VOID_FTYPE_PCHAR_V32QI:
25189 case VOID_FTYPE_PCHAR_V16QI:
25190 case VOID_FTYPE_PFLOAT_V8SF:
25191 case VOID_FTYPE_PFLOAT_V4SF:
25192 case VOID_FTYPE_PDOUBLE_V4DF:
25193 case VOID_FTYPE_PDOUBLE_V2DF:
25194 case VOID_FTYPE_PDI_DI:
25195 case VOID_FTYPE_PINT_INT:
25196 nargs = 1;
25197 klass = store;
25198 /* Reserve memory operand for target. */
25199 memory = ARRAY_SIZE (args);
25200 break;
25201 case V4SF_FTYPE_V4SF_PCV2SF:
25202 case V2DF_FTYPE_V2DF_PCDOUBLE:
25203 nargs = 2;
25204 klass = load;
25205 memory = 1;
25206 break;
25207 case V8SF_FTYPE_PCV8SF_V8SF:
25208 case V4DF_FTYPE_PCV4DF_V4DF:
25209 case V4SF_FTYPE_PCV4SF_V4SF:
25210 case V2DF_FTYPE_PCV2DF_V2DF:
25211 nargs = 2;
25212 klass = load;
25213 memory = 0;
25214 break;
25215 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25216 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25217 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25218 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25219 nargs = 2;
25220 klass = store;
25221 /* Reserve memory operand for target. */
25222 memory = ARRAY_SIZE (args);
25223 break;
25224 default:
25225 gcc_unreachable ();
25228 gcc_assert (nargs <= ARRAY_SIZE (args));
25230 if (klass == store)
25232 arg = CALL_EXPR_ARG (exp, 0);
25233 op = expand_normal (arg);
25234 gcc_assert (target == 0);
25235 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25236 arg_adjust = 1;
25238 else
25240 arg_adjust = 0;
25241 if (optimize
25242 || target == 0
25243 || GET_MODE (target) != tmode
25244 || ! (*insn_p->operand[0].predicate) (target, tmode))
25245 target = gen_reg_rtx (tmode);
25248 for (i = 0; i < nargs; i++)
25250 enum machine_mode mode = insn_p->operand[i + 1].mode;
25251 bool match;
25253 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25254 op = expand_normal (arg);
25255 match = (*insn_p->operand[i + 1].predicate) (op, mode);
25257 if (last_arg_constant && (i + 1) == nargs)
25259 if (!match)
25260 switch (icode)
25262 default:
25263 error ("the last argument must be an 8-bit immediate");
25264 return const0_rtx;
25267 else
25269 if (i == memory)
25271 /* This must be the memory operand. */
25272 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25273 gcc_assert (GET_MODE (op) == mode
25274 || GET_MODE (op) == VOIDmode);
25276 else
25278 /* This must be register. */
25279 if (VECTOR_MODE_P (mode))
25280 op = safe_vector_operand (op, mode);
25282 gcc_assert (GET_MODE (op) == mode
25283 || GET_MODE (op) == VOIDmode);
25284 op = copy_to_mode_reg (mode, op);
25288 args[i].op = op;
25289 args[i].mode = mode;
25292 switch (nargs)
25294 case 0:
25295 pat = GEN_FCN (icode) (target);
25296 break;
25297 case 1:
25298 pat = GEN_FCN (icode) (target, args[0].op);
25299 break;
25300 case 2:
25301 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25302 break;
25303 default:
25304 gcc_unreachable ();
25307 if (! pat)
25308 return 0;
25309 emit_insn (pat);
25310 return klass == store ? 0 : target;
25313 /* Return the integer constant in ARG. Constrain it to be in the range
25314 of the subparts of VEC_TYPE; issue an error if not. */
25316 static int
25317 get_element_number (tree vec_type, tree arg)
25319 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25321 if (!host_integerp (arg, 1)
25322 || (elt = tree_low_cst (arg, 1), elt > max))
25324 error ("selector must be an integer constant in the range 0..%wi", max);
25325 return 0;
25328 return elt;
25331 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25332 ix86_expand_vector_init. We DO have language-level syntax for this, in
25333 the form of (type){ init-list }. Except that since we can't place emms
25334 instructions from inside the compiler, we can't allow the use of MMX
25335 registers unless the user explicitly asks for it. So we do *not* define
25336 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25337 we have builtins invoked by mmintrin.h that gives us license to emit
25338 these sorts of instructions. */
25340 static rtx
25341 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25343 enum machine_mode tmode = TYPE_MODE (type);
25344 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25345 int i, n_elt = GET_MODE_NUNITS (tmode);
25346 rtvec v = rtvec_alloc (n_elt);
25348 gcc_assert (VECTOR_MODE_P (tmode));
25349 gcc_assert (call_expr_nargs (exp) == n_elt);
25351 for (i = 0; i < n_elt; ++i)
25353 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25354 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25357 if (!target || !register_operand (target, tmode))
25358 target = gen_reg_rtx (tmode);
25360 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25361 return target;
25364 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25365 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25366 had a language-level syntax for referencing vector elements. */
25368 static rtx
25369 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25371 enum machine_mode tmode, mode0;
25372 tree arg0, arg1;
25373 int elt;
25374 rtx op0;
25376 arg0 = CALL_EXPR_ARG (exp, 0);
25377 arg1 = CALL_EXPR_ARG (exp, 1);
25379 op0 = expand_normal (arg0);
25380 elt = get_element_number (TREE_TYPE (arg0), arg1);
25382 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25383 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25384 gcc_assert (VECTOR_MODE_P (mode0));
25386 op0 = force_reg (mode0, op0);
25388 if (optimize || !target || !register_operand (target, tmode))
25389 target = gen_reg_rtx (tmode);
25391 ix86_expand_vector_extract (true, target, op0, elt);
25393 return target;
25396 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25397 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25398 a language-level syntax for referencing vector elements. */
25400 static rtx
25401 ix86_expand_vec_set_builtin (tree exp)
25403 enum machine_mode tmode, mode1;
25404 tree arg0, arg1, arg2;
25405 int elt;
25406 rtx op0, op1, target;
25408 arg0 = CALL_EXPR_ARG (exp, 0);
25409 arg1 = CALL_EXPR_ARG (exp, 1);
25410 arg2 = CALL_EXPR_ARG (exp, 2);
25412 tmode = TYPE_MODE (TREE_TYPE (arg0));
25413 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25414 gcc_assert (VECTOR_MODE_P (tmode));
25416 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25417 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25418 elt = get_element_number (TREE_TYPE (arg0), arg2);
25420 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25421 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25423 op0 = force_reg (tmode, op0);
25424 op1 = force_reg (mode1, op1);
25426 /* OP0 is the source of these builtin functions and shouldn't be
25427 modified. Create a copy, use it and return it as target. */
25428 target = gen_reg_rtx (tmode);
25429 emit_move_insn (target, op0);
25430 ix86_expand_vector_set (true, target, op1, elt);
25432 return target;
25435 /* Expand an expression EXP that calls a built-in function,
25436 with result going to TARGET if that's convenient
25437 (and in mode MODE if that's convenient).
25438 SUBTARGET may be used as the target for computing one of EXP's operands.
25439 IGNORE is nonzero if the value is to be ignored. */
25441 static rtx
25442 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25443 enum machine_mode mode ATTRIBUTE_UNUSED,
25444 int ignore ATTRIBUTE_UNUSED)
25446 const struct builtin_description *d;
25447 size_t i;
25448 enum insn_code icode;
25449 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25450 tree arg0, arg1, arg2;
25451 rtx op0, op1, op2, pat;
25452 enum machine_mode mode0, mode1, mode2;
25453 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25455 /* Determine whether the builtin function is available under the current ISA.
25456 Originally the builtin was not created if it wasn't applicable to the
25457 current ISA based on the command line switches. With function specific
25458 options, we need to check in the context of the function making the call
25459 whether it is supported. */
25460 if (ix86_builtins_isa[fcode].isa
25461 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25463 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25464 NULL, NULL, false);
25466 if (!opts)
25467 error ("%qE needs unknown isa option", fndecl);
25468 else
25470 gcc_assert (opts != NULL);
25471 error ("%qE needs isa option %s", fndecl, opts);
25472 free (opts);
25474 return const0_rtx;
25477 switch (fcode)
25479 case IX86_BUILTIN_MASKMOVQ:
25480 case IX86_BUILTIN_MASKMOVDQU:
25481 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25482 ? CODE_FOR_mmx_maskmovq
25483 : CODE_FOR_sse2_maskmovdqu);
25484 /* Note the arg order is different from the operand order. */
25485 arg1 = CALL_EXPR_ARG (exp, 0);
25486 arg2 = CALL_EXPR_ARG (exp, 1);
25487 arg0 = CALL_EXPR_ARG (exp, 2);
25488 op0 = expand_normal (arg0);
25489 op1 = expand_normal (arg1);
25490 op2 = expand_normal (arg2);
25491 mode0 = insn_data[icode].operand[0].mode;
25492 mode1 = insn_data[icode].operand[1].mode;
25493 mode2 = insn_data[icode].operand[2].mode;
25495 op0 = force_reg (Pmode, op0);
25496 op0 = gen_rtx_MEM (mode1, op0);
25498 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25499 op0 = copy_to_mode_reg (mode0, op0);
25500 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25501 op1 = copy_to_mode_reg (mode1, op1);
25502 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25503 op2 = copy_to_mode_reg (mode2, op2);
25504 pat = GEN_FCN (icode) (op0, op1, op2);
25505 if (! pat)
25506 return 0;
25507 emit_insn (pat);
25508 return 0;
25510 case IX86_BUILTIN_LDMXCSR:
25511 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25512 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25513 emit_move_insn (target, op0);
25514 emit_insn (gen_sse_ldmxcsr (target));
25515 return 0;
25517 case IX86_BUILTIN_STMXCSR:
25518 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25519 emit_insn (gen_sse_stmxcsr (target));
25520 return copy_to_mode_reg (SImode, target);
25522 case IX86_BUILTIN_CLFLUSH:
25523 arg0 = CALL_EXPR_ARG (exp, 0);
25524 op0 = expand_normal (arg0);
25525 icode = CODE_FOR_sse2_clflush;
25526 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25527 op0 = copy_to_mode_reg (Pmode, op0);
25529 emit_insn (gen_sse2_clflush (op0));
25530 return 0;
25532 case IX86_BUILTIN_MONITOR:
25533 arg0 = CALL_EXPR_ARG (exp, 0);
25534 arg1 = CALL_EXPR_ARG (exp, 1);
25535 arg2 = CALL_EXPR_ARG (exp, 2);
25536 op0 = expand_normal (arg0);
25537 op1 = expand_normal (arg1);
25538 op2 = expand_normal (arg2);
25539 if (!REG_P (op0))
25540 op0 = copy_to_mode_reg (Pmode, op0);
25541 if (!REG_P (op1))
25542 op1 = copy_to_mode_reg (SImode, op1);
25543 if (!REG_P (op2))
25544 op2 = copy_to_mode_reg (SImode, op2);
25545 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25546 return 0;
25548 case IX86_BUILTIN_MWAIT:
25549 arg0 = CALL_EXPR_ARG (exp, 0);
25550 arg1 = CALL_EXPR_ARG (exp, 1);
25551 op0 = expand_normal (arg0);
25552 op1 = expand_normal (arg1);
25553 if (!REG_P (op0))
25554 op0 = copy_to_mode_reg (SImode, op0);
25555 if (!REG_P (op1))
25556 op1 = copy_to_mode_reg (SImode, op1);
25557 emit_insn (gen_sse3_mwait (op0, op1));
25558 return 0;
25560 case IX86_BUILTIN_VEC_INIT_V2SI:
25561 case IX86_BUILTIN_VEC_INIT_V4HI:
25562 case IX86_BUILTIN_VEC_INIT_V8QI:
25563 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25565 case IX86_BUILTIN_VEC_EXT_V2DF:
25566 case IX86_BUILTIN_VEC_EXT_V2DI:
25567 case IX86_BUILTIN_VEC_EXT_V4SF:
25568 case IX86_BUILTIN_VEC_EXT_V4SI:
25569 case IX86_BUILTIN_VEC_EXT_V8HI:
25570 case IX86_BUILTIN_VEC_EXT_V2SI:
25571 case IX86_BUILTIN_VEC_EXT_V4HI:
25572 case IX86_BUILTIN_VEC_EXT_V16QI:
25573 return ix86_expand_vec_ext_builtin (exp, target);
25575 case IX86_BUILTIN_VEC_SET_V2DI:
25576 case IX86_BUILTIN_VEC_SET_V4SF:
25577 case IX86_BUILTIN_VEC_SET_V4SI:
25578 case IX86_BUILTIN_VEC_SET_V8HI:
25579 case IX86_BUILTIN_VEC_SET_V4HI:
25580 case IX86_BUILTIN_VEC_SET_V16QI:
25581 return ix86_expand_vec_set_builtin (exp);
25583 case IX86_BUILTIN_INFQ:
25584 case IX86_BUILTIN_HUGE_VALQ:
25586 REAL_VALUE_TYPE inf;
25587 rtx tmp;
25589 real_inf (&inf);
25590 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25592 tmp = validize_mem (force_const_mem (mode, tmp));
25594 if (target == 0)
25595 target = gen_reg_rtx (mode);
25597 emit_move_insn (target, tmp);
25598 return target;
25601 default:
25602 break;
25605 for (i = 0, d = bdesc_special_args;
25606 i < ARRAY_SIZE (bdesc_special_args);
25607 i++, d++)
25608 if (d->code == fcode)
25609 return ix86_expand_special_args_builtin (d, exp, target);
25611 for (i = 0, d = bdesc_args;
25612 i < ARRAY_SIZE (bdesc_args);
25613 i++, d++)
25614 if (d->code == fcode)
25615 switch (fcode)
25617 case IX86_BUILTIN_FABSQ:
25618 case IX86_BUILTIN_COPYSIGNQ:
25619 if (!TARGET_SSE2)
25620 /* Emit a normal call if SSE2 isn't available. */
25621 return expand_call (exp, target, ignore);
25622 default:
25623 return ix86_expand_args_builtin (d, exp, target);
25626 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25627 if (d->code == fcode)
25628 return ix86_expand_sse_comi (d, exp, target);
25630 for (i = 0, d = bdesc_pcmpestr;
25631 i < ARRAY_SIZE (bdesc_pcmpestr);
25632 i++, d++)
25633 if (d->code == fcode)
25634 return ix86_expand_sse_pcmpestr (d, exp, target);
25636 for (i = 0, d = bdesc_pcmpistr;
25637 i < ARRAY_SIZE (bdesc_pcmpistr);
25638 i++, d++)
25639 if (d->code == fcode)
25640 return ix86_expand_sse_pcmpistr (d, exp, target);
25642 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25643 if (d->code == fcode)
25644 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25645 (enum multi_arg_type)d->flag,
25646 d->comparison);
25648 gcc_unreachable ();
25651 /* Returns a function decl for a vectorized version of the builtin function
25652 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25653 if it is not available. */
25655 static tree
25656 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25657 tree type_in)
25659 enum machine_mode in_mode, out_mode;
25660 int in_n, out_n;
25662 if (TREE_CODE (type_out) != VECTOR_TYPE
25663 || TREE_CODE (type_in) != VECTOR_TYPE)
25664 return NULL_TREE;
25666 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25667 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25668 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25669 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25671 switch (fn)
25673 case BUILT_IN_SQRT:
25674 if (out_mode == DFmode && out_n == 2
25675 && in_mode == DFmode && in_n == 2)
25676 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25677 break;
25679 case BUILT_IN_SQRTF:
25680 if (out_mode == SFmode && out_n == 4
25681 && in_mode == SFmode && in_n == 4)
25682 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25683 break;
25685 case BUILT_IN_LRINT:
25686 if (out_mode == SImode && out_n == 4
25687 && in_mode == DFmode && in_n == 2)
25688 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25689 break;
25691 case BUILT_IN_LRINTF:
25692 if (out_mode == SImode && out_n == 4
25693 && in_mode == SFmode && in_n == 4)
25694 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25695 break;
25697 default:
25701 /* Dispatch to a handler for a vectorization library. */
25702 if (ix86_veclib_handler)
25703 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25704 type_in);
25706 return NULL_TREE;
25709 /* Handler for an SVML-style interface to
25710 a library with vectorized intrinsics. */
25712 static tree
25713 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25715 char name[20];
25716 tree fntype, new_fndecl, args;
25717 unsigned arity;
25718 const char *bname;
25719 enum machine_mode el_mode, in_mode;
25720 int n, in_n;
25722 /* The SVML is suitable for unsafe math only. */
25723 if (!flag_unsafe_math_optimizations)
25724 return NULL_TREE;
25726 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25727 n = TYPE_VECTOR_SUBPARTS (type_out);
25728 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25729 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25730 if (el_mode != in_mode
25731 || n != in_n)
25732 return NULL_TREE;
25734 switch (fn)
25736 case BUILT_IN_EXP:
25737 case BUILT_IN_LOG:
25738 case BUILT_IN_LOG10:
25739 case BUILT_IN_POW:
25740 case BUILT_IN_TANH:
25741 case BUILT_IN_TAN:
25742 case BUILT_IN_ATAN:
25743 case BUILT_IN_ATAN2:
25744 case BUILT_IN_ATANH:
25745 case BUILT_IN_CBRT:
25746 case BUILT_IN_SINH:
25747 case BUILT_IN_SIN:
25748 case BUILT_IN_ASINH:
25749 case BUILT_IN_ASIN:
25750 case BUILT_IN_COSH:
25751 case BUILT_IN_COS:
25752 case BUILT_IN_ACOSH:
25753 case BUILT_IN_ACOS:
25754 if (el_mode != DFmode || n != 2)
25755 return NULL_TREE;
25756 break;
25758 case BUILT_IN_EXPF:
25759 case BUILT_IN_LOGF:
25760 case BUILT_IN_LOG10F:
25761 case BUILT_IN_POWF:
25762 case BUILT_IN_TANHF:
25763 case BUILT_IN_TANF:
25764 case BUILT_IN_ATANF:
25765 case BUILT_IN_ATAN2F:
25766 case BUILT_IN_ATANHF:
25767 case BUILT_IN_CBRTF:
25768 case BUILT_IN_SINHF:
25769 case BUILT_IN_SINF:
25770 case BUILT_IN_ASINHF:
25771 case BUILT_IN_ASINF:
25772 case BUILT_IN_COSHF:
25773 case BUILT_IN_COSF:
25774 case BUILT_IN_ACOSHF:
25775 case BUILT_IN_ACOSF:
25776 if (el_mode != SFmode || n != 4)
25777 return NULL_TREE;
25778 break;
25780 default:
25781 return NULL_TREE;
25784 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25786 if (fn == BUILT_IN_LOGF)
25787 strcpy (name, "vmlsLn4");
25788 else if (fn == BUILT_IN_LOG)
25789 strcpy (name, "vmldLn2");
25790 else if (n == 4)
25792 sprintf (name, "vmls%s", bname+10);
25793 name[strlen (name)-1] = '4';
25795 else
25796 sprintf (name, "vmld%s2", bname+10);
25798 /* Convert to uppercase. */
25799 name[4] &= ~0x20;
25801 arity = 0;
25802 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25803 args = TREE_CHAIN (args))
25804 arity++;
25806 if (arity == 1)
25807 fntype = build_function_type_list (type_out, type_in, NULL);
25808 else
25809 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25811 /* Build a function declaration for the vectorized function. */
25812 new_fndecl = build_decl (BUILTINS_LOCATION,
25813 FUNCTION_DECL, get_identifier (name), fntype);
25814 TREE_PUBLIC (new_fndecl) = 1;
25815 DECL_EXTERNAL (new_fndecl) = 1;
25816 DECL_IS_NOVOPS (new_fndecl) = 1;
25817 TREE_READONLY (new_fndecl) = 1;
25819 return new_fndecl;
25822 /* Handler for an ACML-style interface to
25823 a library with vectorized intrinsics. */
25825 static tree
25826 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25828 char name[20] = "__vr.._";
25829 tree fntype, new_fndecl, args;
25830 unsigned arity;
25831 const char *bname;
25832 enum machine_mode el_mode, in_mode;
25833 int n, in_n;
25835 /* The ACML is 64bits only and suitable for unsafe math only as
25836 it does not correctly support parts of IEEE with the required
25837 precision such as denormals. */
25838 if (!TARGET_64BIT
25839 || !flag_unsafe_math_optimizations)
25840 return NULL_TREE;
25842 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25843 n = TYPE_VECTOR_SUBPARTS (type_out);
25844 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25845 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25846 if (el_mode != in_mode
25847 || n != in_n)
25848 return NULL_TREE;
25850 switch (fn)
25852 case BUILT_IN_SIN:
25853 case BUILT_IN_COS:
25854 case BUILT_IN_EXP:
25855 case BUILT_IN_LOG:
25856 case BUILT_IN_LOG2:
25857 case BUILT_IN_LOG10:
25858 name[4] = 'd';
25859 name[5] = '2';
25860 if (el_mode != DFmode
25861 || n != 2)
25862 return NULL_TREE;
25863 break;
25865 case BUILT_IN_SINF:
25866 case BUILT_IN_COSF:
25867 case BUILT_IN_EXPF:
25868 case BUILT_IN_POWF:
25869 case BUILT_IN_LOGF:
25870 case BUILT_IN_LOG2F:
25871 case BUILT_IN_LOG10F:
25872 name[4] = 's';
25873 name[5] = '4';
25874 if (el_mode != SFmode
25875 || n != 4)
25876 return NULL_TREE;
25877 break;
25879 default:
25880 return NULL_TREE;
25883 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25884 sprintf (name + 7, "%s", bname+10);
25886 arity = 0;
25887 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25888 args = TREE_CHAIN (args))
25889 arity++;
25891 if (arity == 1)
25892 fntype = build_function_type_list (type_out, type_in, NULL);
25893 else
25894 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25896 /* Build a function declaration for the vectorized function. */
25897 new_fndecl = build_decl (BUILTINS_LOCATION,
25898 FUNCTION_DECL, get_identifier (name), fntype);
25899 TREE_PUBLIC (new_fndecl) = 1;
25900 DECL_EXTERNAL (new_fndecl) = 1;
25901 DECL_IS_NOVOPS (new_fndecl) = 1;
25902 TREE_READONLY (new_fndecl) = 1;
25904 return new_fndecl;
25908 /* Returns a decl of a function that implements conversion of an integer vector
25909 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25910 side of the conversion.
25911 Return NULL_TREE if it is not available. */
25913 static tree
25914 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25916 if (TREE_CODE (type) != VECTOR_TYPE
25917 /* There are only conversions from/to signed integers. */
25918 || TYPE_UNSIGNED (TREE_TYPE (type)))
25919 return NULL_TREE;
25921 switch (code)
25923 case FLOAT_EXPR:
25924 switch (TYPE_MODE (type))
25926 case V4SImode:
25927 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25928 default:
25929 return NULL_TREE;
25932 case FIX_TRUNC_EXPR:
25933 switch (TYPE_MODE (type))
25935 case V4SImode:
25936 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25937 default:
25938 return NULL_TREE;
25940 default:
25941 return NULL_TREE;
25946 /* Returns a code for a target-specific builtin that implements
25947 reciprocal of the function, or NULL_TREE if not available. */
25949 static tree
25950 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25951 bool sqrt ATTRIBUTE_UNUSED)
25953 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25954 && flag_finite_math_only && !flag_trapping_math
25955 && flag_unsafe_math_optimizations))
25956 return NULL_TREE;
25958 if (md_fn)
25959 /* Machine dependent builtins. */
25960 switch (fn)
25962 /* Vectorized version of sqrt to rsqrt conversion. */
25963 case IX86_BUILTIN_SQRTPS_NR:
25964 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25966 default:
25967 return NULL_TREE;
25969 else
25970 /* Normal builtins. */
25971 switch (fn)
25973 /* Sqrt to rsqrt conversion. */
25974 case BUILT_IN_SQRTF:
25975 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25977 default:
25978 return NULL_TREE;
25982 /* Store OPERAND to the memory after reload is completed. This means
25983 that we can't easily use assign_stack_local. */
25985 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25987 rtx result;
25989 gcc_assert (reload_completed);
25990 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25992 result = gen_rtx_MEM (mode,
25993 gen_rtx_PLUS (Pmode,
25994 stack_pointer_rtx,
25995 GEN_INT (-RED_ZONE_SIZE)));
25996 emit_move_insn (result, operand);
25998 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
26000 switch (mode)
26002 case HImode:
26003 case SImode:
26004 operand = gen_lowpart (DImode, operand);
26005 /* FALLTHRU */
26006 case DImode:
26007 emit_insn (
26008 gen_rtx_SET (VOIDmode,
26009 gen_rtx_MEM (DImode,
26010 gen_rtx_PRE_DEC (DImode,
26011 stack_pointer_rtx)),
26012 operand));
26013 break;
26014 default:
26015 gcc_unreachable ();
26017 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26019 else
26021 switch (mode)
26023 case DImode:
26025 rtx operands[2];
26026 split_di (&operand, 1, operands, operands + 1);
26027 emit_insn (
26028 gen_rtx_SET (VOIDmode,
26029 gen_rtx_MEM (SImode,
26030 gen_rtx_PRE_DEC (Pmode,
26031 stack_pointer_rtx)),
26032 operands[1]));
26033 emit_insn (
26034 gen_rtx_SET (VOIDmode,
26035 gen_rtx_MEM (SImode,
26036 gen_rtx_PRE_DEC (Pmode,
26037 stack_pointer_rtx)),
26038 operands[0]));
26040 break;
26041 case HImode:
26042 /* Store HImodes as SImodes. */
26043 operand = gen_lowpart (SImode, operand);
26044 /* FALLTHRU */
26045 case SImode:
26046 emit_insn (
26047 gen_rtx_SET (VOIDmode,
26048 gen_rtx_MEM (GET_MODE (operand),
26049 gen_rtx_PRE_DEC (SImode,
26050 stack_pointer_rtx)),
26051 operand));
26052 break;
26053 default:
26054 gcc_unreachable ();
26056 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26058 return result;
26061 /* Free operand from the memory. */
26062 void
26063 ix86_free_from_memory (enum machine_mode mode)
26065 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
26067 int size;
26069 if (mode == DImode || TARGET_64BIT)
26070 size = 8;
26071 else
26072 size = 4;
26073 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26074 to pop or add instruction if registers are available. */
26075 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26076 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26077 GEN_INT (size))));
26081 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26082 QImode must go into class Q_REGS.
26083 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26084 movdf to do mem-to-mem moves through integer regs. */
26085 enum reg_class
26086 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26088 enum machine_mode mode = GET_MODE (x);
26090 /* We're only allowed to return a subclass of CLASS. Many of the
26091 following checks fail for NO_REGS, so eliminate that early. */
26092 if (regclass == NO_REGS)
26093 return NO_REGS;
26095 /* All classes can load zeros. */
26096 if (x == CONST0_RTX (mode))
26097 return regclass;
26099 /* Force constants into memory if we are loading a (nonzero) constant into
26100 an MMX or SSE register. This is because there are no MMX/SSE instructions
26101 to load from a constant. */
26102 if (CONSTANT_P (x)
26103 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26104 return NO_REGS;
26106 /* Prefer SSE regs only, if we can use them for math. */
26107 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26108 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26110 /* Floating-point constants need more complex checks. */
26111 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26113 /* General regs can load everything. */
26114 if (reg_class_subset_p (regclass, GENERAL_REGS))
26115 return regclass;
26117 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26118 zero above. We only want to wind up preferring 80387 registers if
26119 we plan on doing computation with them. */
26120 if (TARGET_80387
26121 && standard_80387_constant_p (x))
26123 /* Limit class to non-sse. */
26124 if (regclass == FLOAT_SSE_REGS)
26125 return FLOAT_REGS;
26126 if (regclass == FP_TOP_SSE_REGS)
26127 return FP_TOP_REG;
26128 if (regclass == FP_SECOND_SSE_REGS)
26129 return FP_SECOND_REG;
26130 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26131 return regclass;
26134 return NO_REGS;
26137 /* Generally when we see PLUS here, it's the function invariant
26138 (plus soft-fp const_int). Which can only be computed into general
26139 regs. */
26140 if (GET_CODE (x) == PLUS)
26141 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26143 /* QImode constants are easy to load, but non-constant QImode data
26144 must go into Q_REGS. */
26145 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26147 if (reg_class_subset_p (regclass, Q_REGS))
26148 return regclass;
26149 if (reg_class_subset_p (Q_REGS, regclass))
26150 return Q_REGS;
26151 return NO_REGS;
26154 return regclass;
26157 /* Discourage putting floating-point values in SSE registers unless
26158 SSE math is being used, and likewise for the 387 registers. */
26159 enum reg_class
26160 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26162 enum machine_mode mode = GET_MODE (x);
26164 /* Restrict the output reload class to the register bank that we are doing
26165 math on. If we would like not to return a subset of CLASS, reject this
26166 alternative: if reload cannot do this, it will still use its choice. */
26167 mode = GET_MODE (x);
26168 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26169 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26171 if (X87_FLOAT_MODE_P (mode))
26173 if (regclass == FP_TOP_SSE_REGS)
26174 return FP_TOP_REG;
26175 else if (regclass == FP_SECOND_SSE_REGS)
26176 return FP_SECOND_REG;
26177 else
26178 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26181 return regclass;
26184 static enum reg_class
26185 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
26186 enum machine_mode mode,
26187 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26189 /* QImode spills from non-QI registers require
26190 intermediate register on 32bit targets. */
26191 if (!in_p && mode == QImode && !TARGET_64BIT
26192 && (rclass == GENERAL_REGS
26193 || rclass == LEGACY_REGS
26194 || rclass == INDEX_REGS))
26196 int regno;
26198 if (REG_P (x))
26199 regno = REGNO (x);
26200 else
26201 regno = -1;
26203 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26204 regno = true_regnum (x);
26206 /* Return Q_REGS if the operand is in memory. */
26207 if (regno == -1)
26208 return Q_REGS;
26211 return NO_REGS;
26214 /* If we are copying between general and FP registers, we need a memory
26215 location. The same is true for SSE and MMX registers.
26217 To optimize register_move_cost performance, allow inline variant.
26219 The macro can't work reliably when one of the CLASSES is class containing
26220 registers from multiple units (SSE, MMX, integer). We avoid this by never
26221 combining those units in single alternative in the machine description.
26222 Ensure that this constraint holds to avoid unexpected surprises.
26224 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26225 enforce these sanity checks. */
26227 static inline int
26228 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26229 enum machine_mode mode, int strict)
26231 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26232 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26233 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26234 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26235 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26236 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26238 gcc_assert (!strict);
26239 return true;
26242 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26243 return true;
26245 /* ??? This is a lie. We do have moves between mmx/general, and for
26246 mmx/sse2. But by saying we need secondary memory we discourage the
26247 register allocator from using the mmx registers unless needed. */
26248 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26249 return true;
26251 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26253 /* SSE1 doesn't have any direct moves from other classes. */
26254 if (!TARGET_SSE2)
26255 return true;
26257 /* If the target says that inter-unit moves are more expensive
26258 than moving through memory, then don't generate them. */
26259 if (!TARGET_INTER_UNIT_MOVES)
26260 return true;
26262 /* Between SSE and general, we have moves no larger than word size. */
26263 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26264 return true;
26267 return false;
26271 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26272 enum machine_mode mode, int strict)
26274 return inline_secondary_memory_needed (class1, class2, mode, strict);
26277 /* Return true if the registers in CLASS cannot represent the change from
26278 modes FROM to TO. */
26280 bool
26281 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26282 enum reg_class regclass)
26284 if (from == to)
26285 return false;
26287 /* x87 registers can't do subreg at all, as all values are reformatted
26288 to extended precision. */
26289 if (MAYBE_FLOAT_CLASS_P (regclass))
26290 return true;
26292 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26294 /* Vector registers do not support QI or HImode loads. If we don't
26295 disallow a change to these modes, reload will assume it's ok to
26296 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26297 the vec_dupv4hi pattern. */
26298 if (GET_MODE_SIZE (from) < 4)
26299 return true;
26301 /* Vector registers do not support subreg with nonzero offsets, which
26302 are otherwise valid for integer registers. Since we can't see
26303 whether we have a nonzero offset from here, prohibit all
26304 nonparadoxical subregs changing size. */
26305 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26306 return true;
26309 return false;
26312 /* Return the cost of moving data of mode M between a
26313 register and memory. A value of 2 is the default; this cost is
26314 relative to those in `REGISTER_MOVE_COST'.
26316 This function is used extensively by register_move_cost that is used to
26317 build tables at startup. Make it inline in this case.
26318 When IN is 2, return maximum of in and out move cost.
26320 If moving between registers and memory is more expensive than
26321 between two registers, you should define this macro to express the
26322 relative cost.
26324 Model also increased moving costs of QImode registers in non
26325 Q_REGS classes.
26327 static inline int
26328 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26329 int in)
26331 int cost;
26332 if (FLOAT_CLASS_P (regclass))
26334 int index;
26335 switch (mode)
26337 case SFmode:
26338 index = 0;
26339 break;
26340 case DFmode:
26341 index = 1;
26342 break;
26343 case XFmode:
26344 index = 2;
26345 break;
26346 default:
26347 return 100;
26349 if (in == 2)
26350 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26351 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26353 if (SSE_CLASS_P (regclass))
26355 int index;
26356 switch (GET_MODE_SIZE (mode))
26358 case 4:
26359 index = 0;
26360 break;
26361 case 8:
26362 index = 1;
26363 break;
26364 case 16:
26365 index = 2;
26366 break;
26367 default:
26368 return 100;
26370 if (in == 2)
26371 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26372 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26374 if (MMX_CLASS_P (regclass))
26376 int index;
26377 switch (GET_MODE_SIZE (mode))
26379 case 4:
26380 index = 0;
26381 break;
26382 case 8:
26383 index = 1;
26384 break;
26385 default:
26386 return 100;
26388 if (in)
26389 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26390 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26392 switch (GET_MODE_SIZE (mode))
26394 case 1:
26395 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26397 if (!in)
26398 return ix86_cost->int_store[0];
26399 if (TARGET_PARTIAL_REG_DEPENDENCY
26400 && optimize_function_for_speed_p (cfun))
26401 cost = ix86_cost->movzbl_load;
26402 else
26403 cost = ix86_cost->int_load[0];
26404 if (in == 2)
26405 return MAX (cost, ix86_cost->int_store[0]);
26406 return cost;
26408 else
26410 if (in == 2)
26411 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26412 if (in)
26413 return ix86_cost->movzbl_load;
26414 else
26415 return ix86_cost->int_store[0] + 4;
26417 break;
26418 case 2:
26419 if (in == 2)
26420 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26421 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26422 default:
26423 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26424 if (mode == TFmode)
26425 mode = XFmode;
26426 if (in == 2)
26427 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26428 else if (in)
26429 cost = ix86_cost->int_load[2];
26430 else
26431 cost = ix86_cost->int_store[2];
26432 return (cost * (((int) GET_MODE_SIZE (mode)
26433 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26438 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26440 return inline_memory_move_cost (mode, regclass, in);
26444 /* Return the cost of moving data from a register in class CLASS1 to
26445 one in class CLASS2.
26447 It is not required that the cost always equal 2 when FROM is the same as TO;
26448 on some machines it is expensive to move between registers if they are not
26449 general registers. */
26452 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26453 enum reg_class class2)
26455 /* In case we require secondary memory, compute cost of the store followed
26456 by load. In order to avoid bad register allocation choices, we need
26457 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26459 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26461 int cost = 1;
26463 cost += inline_memory_move_cost (mode, class1, 2);
26464 cost += inline_memory_move_cost (mode, class2, 2);
26466 /* In case of copying from general_purpose_register we may emit multiple
26467 stores followed by single load causing memory size mismatch stall.
26468 Count this as arbitrarily high cost of 20. */
26469 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26470 cost += 20;
26472 /* In the case of FP/MMX moves, the registers actually overlap, and we
26473 have to switch modes in order to treat them differently. */
26474 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26475 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26476 cost += 20;
26478 return cost;
26481 /* Moves between SSE/MMX and integer unit are expensive. */
26482 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26483 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26485 /* ??? By keeping returned value relatively high, we limit the number
26486 of moves between integer and MMX/SSE registers for all targets.
26487 Additionally, high value prevents problem with x86_modes_tieable_p(),
26488 where integer modes in MMX/SSE registers are not tieable
26489 because of missing QImode and HImode moves to, from or between
26490 MMX/SSE registers. */
26491 return MAX (8, ix86_cost->mmxsse_to_integer);
26493 if (MAYBE_FLOAT_CLASS_P (class1))
26494 return ix86_cost->fp_move;
26495 if (MAYBE_SSE_CLASS_P (class1))
26496 return ix86_cost->sse_move;
26497 if (MAYBE_MMX_CLASS_P (class1))
26498 return ix86_cost->mmx_move;
26499 return 2;
26502 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26504 bool
26505 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26507 /* Flags and only flags can only hold CCmode values. */
26508 if (CC_REGNO_P (regno))
26509 return GET_MODE_CLASS (mode) == MODE_CC;
26510 if (GET_MODE_CLASS (mode) == MODE_CC
26511 || GET_MODE_CLASS (mode) == MODE_RANDOM
26512 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26513 return 0;
26514 if (FP_REGNO_P (regno))
26515 return VALID_FP_MODE_P (mode);
26516 if (SSE_REGNO_P (regno))
26518 /* We implement the move patterns for all vector modes into and
26519 out of SSE registers, even when no operation instructions
26520 are available. OImode move is available only when AVX is
26521 enabled. */
26522 return ((TARGET_AVX && mode == OImode)
26523 || VALID_AVX256_REG_MODE (mode)
26524 || VALID_SSE_REG_MODE (mode)
26525 || VALID_SSE2_REG_MODE (mode)
26526 || VALID_MMX_REG_MODE (mode)
26527 || VALID_MMX_REG_MODE_3DNOW (mode));
26529 if (MMX_REGNO_P (regno))
26531 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26532 so if the register is available at all, then we can move data of
26533 the given mode into or out of it. */
26534 return (VALID_MMX_REG_MODE (mode)
26535 || VALID_MMX_REG_MODE_3DNOW (mode));
26538 if (mode == QImode)
26540 /* Take care for QImode values - they can be in non-QI regs,
26541 but then they do cause partial register stalls. */
26542 if (regno <= BX_REG || TARGET_64BIT)
26543 return 1;
26544 if (!TARGET_PARTIAL_REG_STALL)
26545 return 1;
26546 return reload_in_progress || reload_completed;
26548 /* We handle both integer and floats in the general purpose registers. */
26549 else if (VALID_INT_MODE_P (mode))
26550 return 1;
26551 else if (VALID_FP_MODE_P (mode))
26552 return 1;
26553 else if (VALID_DFP_MODE_P (mode))
26554 return 1;
26555 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26556 on to use that value in smaller contexts, this can easily force a
26557 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26558 supporting DImode, allow it. */
26559 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26560 return 1;
26562 return 0;
26565 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26566 tieable integer mode. */
26568 static bool
26569 ix86_tieable_integer_mode_p (enum machine_mode mode)
26571 switch (mode)
26573 case HImode:
26574 case SImode:
26575 return true;
26577 case QImode:
26578 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26580 case DImode:
26581 return TARGET_64BIT;
26583 default:
26584 return false;
26588 /* Return true if MODE1 is accessible in a register that can hold MODE2
26589 without copying. That is, all register classes that can hold MODE2
26590 can also hold MODE1. */
26592 bool
26593 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26595 if (mode1 == mode2)
26596 return true;
26598 if (ix86_tieable_integer_mode_p (mode1)
26599 && ix86_tieable_integer_mode_p (mode2))
26600 return true;
26602 /* MODE2 being XFmode implies fp stack or general regs, which means we
26603 can tie any smaller floating point modes to it. Note that we do not
26604 tie this with TFmode. */
26605 if (mode2 == XFmode)
26606 return mode1 == SFmode || mode1 == DFmode;
26608 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26609 that we can tie it with SFmode. */
26610 if (mode2 == DFmode)
26611 return mode1 == SFmode;
26613 /* If MODE2 is only appropriate for an SSE register, then tie with
26614 any other mode acceptable to SSE registers. */
26615 if (GET_MODE_SIZE (mode2) == 16
26616 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26617 return (GET_MODE_SIZE (mode1) == 16
26618 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26620 /* If MODE2 is appropriate for an MMX register, then tie
26621 with any other mode acceptable to MMX registers. */
26622 if (GET_MODE_SIZE (mode2) == 8
26623 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26624 return (GET_MODE_SIZE (mode1) == 8
26625 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26627 return false;
26630 /* Compute a (partial) cost for rtx X. Return true if the complete
26631 cost has been computed, and false if subexpressions should be
26632 scanned. In either case, *TOTAL contains the cost result. */
26634 static bool
26635 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26637 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26638 enum machine_mode mode = GET_MODE (x);
26639 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26641 switch (code)
26643 case CONST_INT:
26644 case CONST:
26645 case LABEL_REF:
26646 case SYMBOL_REF:
26647 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26648 *total = 3;
26649 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26650 *total = 2;
26651 else if (flag_pic && SYMBOLIC_CONST (x)
26652 && (!TARGET_64BIT
26653 || (!GET_CODE (x) != LABEL_REF
26654 && (GET_CODE (x) != SYMBOL_REF
26655 || !SYMBOL_REF_LOCAL_P (x)))))
26656 *total = 1;
26657 else
26658 *total = 0;
26659 return true;
26661 case CONST_DOUBLE:
26662 if (mode == VOIDmode)
26663 *total = 0;
26664 else
26665 switch (standard_80387_constant_p (x))
26667 case 1: /* 0.0 */
26668 *total = 1;
26669 break;
26670 default: /* Other constants */
26671 *total = 2;
26672 break;
26673 case 0:
26674 case -1:
26675 /* Start with (MEM (SYMBOL_REF)), since that's where
26676 it'll probably end up. Add a penalty for size. */
26677 *total = (COSTS_N_INSNS (1)
26678 + (flag_pic != 0 && !TARGET_64BIT)
26679 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26680 break;
26682 return true;
26684 case ZERO_EXTEND:
26685 /* The zero extensions is often completely free on x86_64, so make
26686 it as cheap as possible. */
26687 if (TARGET_64BIT && mode == DImode
26688 && GET_MODE (XEXP (x, 0)) == SImode)
26689 *total = 1;
26690 else if (TARGET_ZERO_EXTEND_WITH_AND)
26691 *total = cost->add;
26692 else
26693 *total = cost->movzx;
26694 return false;
26696 case SIGN_EXTEND:
26697 *total = cost->movsx;
26698 return false;
26700 case ASHIFT:
26701 if (CONST_INT_P (XEXP (x, 1))
26702 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26704 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26705 if (value == 1)
26707 *total = cost->add;
26708 return false;
26710 if ((value == 2 || value == 3)
26711 && cost->lea <= cost->shift_const)
26713 *total = cost->lea;
26714 return false;
26717 /* FALLTHRU */
26719 case ROTATE:
26720 case ASHIFTRT:
26721 case LSHIFTRT:
26722 case ROTATERT:
26723 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26725 if (CONST_INT_P (XEXP (x, 1)))
26727 if (INTVAL (XEXP (x, 1)) > 32)
26728 *total = cost->shift_const + COSTS_N_INSNS (2);
26729 else
26730 *total = cost->shift_const * 2;
26732 else
26734 if (GET_CODE (XEXP (x, 1)) == AND)
26735 *total = cost->shift_var * 2;
26736 else
26737 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26740 else
26742 if (CONST_INT_P (XEXP (x, 1)))
26743 *total = cost->shift_const;
26744 else
26745 *total = cost->shift_var;
26747 return false;
26749 case MULT:
26750 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26752 /* ??? SSE scalar cost should be used here. */
26753 *total = cost->fmul;
26754 return false;
26756 else if (X87_FLOAT_MODE_P (mode))
26758 *total = cost->fmul;
26759 return false;
26761 else if (FLOAT_MODE_P (mode))
26763 /* ??? SSE vector cost should be used here. */
26764 *total = cost->fmul;
26765 return false;
26767 else
26769 rtx op0 = XEXP (x, 0);
26770 rtx op1 = XEXP (x, 1);
26771 int nbits;
26772 if (CONST_INT_P (XEXP (x, 1)))
26774 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26775 for (nbits = 0; value != 0; value &= value - 1)
26776 nbits++;
26778 else
26779 /* This is arbitrary. */
26780 nbits = 7;
26782 /* Compute costs correctly for widening multiplication. */
26783 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26784 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26785 == GET_MODE_SIZE (mode))
26787 int is_mulwiden = 0;
26788 enum machine_mode inner_mode = GET_MODE (op0);
26790 if (GET_CODE (op0) == GET_CODE (op1))
26791 is_mulwiden = 1, op1 = XEXP (op1, 0);
26792 else if (CONST_INT_P (op1))
26794 if (GET_CODE (op0) == SIGN_EXTEND)
26795 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26796 == INTVAL (op1);
26797 else
26798 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26801 if (is_mulwiden)
26802 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26805 *total = (cost->mult_init[MODE_INDEX (mode)]
26806 + nbits * cost->mult_bit
26807 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26809 return true;
26812 case DIV:
26813 case UDIV:
26814 case MOD:
26815 case UMOD:
26816 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26817 /* ??? SSE cost should be used here. */
26818 *total = cost->fdiv;
26819 else if (X87_FLOAT_MODE_P (mode))
26820 *total = cost->fdiv;
26821 else if (FLOAT_MODE_P (mode))
26822 /* ??? SSE vector cost should be used here. */
26823 *total = cost->fdiv;
26824 else
26825 *total = cost->divide[MODE_INDEX (mode)];
26826 return false;
26828 case PLUS:
26829 if (GET_MODE_CLASS (mode) == MODE_INT
26830 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26832 if (GET_CODE (XEXP (x, 0)) == PLUS
26833 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26834 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26835 && CONSTANT_P (XEXP (x, 1)))
26837 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26838 if (val == 2 || val == 4 || val == 8)
26840 *total = cost->lea;
26841 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26842 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26843 outer_code, speed);
26844 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26845 return true;
26848 else if (GET_CODE (XEXP (x, 0)) == MULT
26849 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26851 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26852 if (val == 2 || val == 4 || val == 8)
26854 *total = cost->lea;
26855 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26856 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26857 return true;
26860 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26862 *total = cost->lea;
26863 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26864 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26865 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26866 return true;
26869 /* FALLTHRU */
26871 case MINUS:
26872 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26874 /* ??? SSE cost should be used here. */
26875 *total = cost->fadd;
26876 return false;
26878 else if (X87_FLOAT_MODE_P (mode))
26880 *total = cost->fadd;
26881 return false;
26883 else if (FLOAT_MODE_P (mode))
26885 /* ??? SSE vector cost should be used here. */
26886 *total = cost->fadd;
26887 return false;
26889 /* FALLTHRU */
26891 case AND:
26892 case IOR:
26893 case XOR:
26894 if (!TARGET_64BIT && mode == DImode)
26896 *total = (cost->add * 2
26897 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26898 << (GET_MODE (XEXP (x, 0)) != DImode))
26899 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26900 << (GET_MODE (XEXP (x, 1)) != DImode)));
26901 return true;
26903 /* FALLTHRU */
26905 case NEG:
26906 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26908 /* ??? SSE cost should be used here. */
26909 *total = cost->fchs;
26910 return false;
26912 else if (X87_FLOAT_MODE_P (mode))
26914 *total = cost->fchs;
26915 return false;
26917 else if (FLOAT_MODE_P (mode))
26919 /* ??? SSE vector cost should be used here. */
26920 *total = cost->fchs;
26921 return false;
26923 /* FALLTHRU */
26925 case NOT:
26926 if (!TARGET_64BIT && mode == DImode)
26927 *total = cost->add * 2;
26928 else
26929 *total = cost->add;
26930 return false;
26932 case COMPARE:
26933 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26934 && XEXP (XEXP (x, 0), 1) == const1_rtx
26935 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26936 && XEXP (x, 1) == const0_rtx)
26938 /* This kind of construct is implemented using test[bwl].
26939 Treat it as if we had an AND. */
26940 *total = (cost->add
26941 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26942 + rtx_cost (const1_rtx, outer_code, speed));
26943 return true;
26945 return false;
26947 case FLOAT_EXTEND:
26948 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26949 *total = 0;
26950 return false;
26952 case ABS:
26953 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26954 /* ??? SSE cost should be used here. */
26955 *total = cost->fabs;
26956 else if (X87_FLOAT_MODE_P (mode))
26957 *total = cost->fabs;
26958 else if (FLOAT_MODE_P (mode))
26959 /* ??? SSE vector cost should be used here. */
26960 *total = cost->fabs;
26961 return false;
26963 case SQRT:
26964 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26965 /* ??? SSE cost should be used here. */
26966 *total = cost->fsqrt;
26967 else if (X87_FLOAT_MODE_P (mode))
26968 *total = cost->fsqrt;
26969 else if (FLOAT_MODE_P (mode))
26970 /* ??? SSE vector cost should be used here. */
26971 *total = cost->fsqrt;
26972 return false;
26974 case UNSPEC:
26975 if (XINT (x, 1) == UNSPEC_TP)
26976 *total = 0;
26977 return false;
26979 default:
26980 return false;
26984 #if TARGET_MACHO
26986 static int current_machopic_label_num;
26988 /* Given a symbol name and its associated stub, write out the
26989 definition of the stub. */
26991 void
26992 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26994 unsigned int length;
26995 char *binder_name, *symbol_name, lazy_ptr_name[32];
26996 int label = ++current_machopic_label_num;
26998 /* For 64-bit we shouldn't get here. */
26999 gcc_assert (!TARGET_64BIT);
27001 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27002 symb = (*targetm.strip_name_encoding) (symb);
27004 length = strlen (stub);
27005 binder_name = XALLOCAVEC (char, length + 32);
27006 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27008 length = strlen (symb);
27009 symbol_name = XALLOCAVEC (char, length + 32);
27010 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27012 sprintf (lazy_ptr_name, "L%d$lz", label);
27014 if (MACHOPIC_PURE)
27015 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27016 else
27017 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27019 fprintf (file, "%s:\n", stub);
27020 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27022 if (MACHOPIC_PURE)
27024 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27025 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27026 fprintf (file, "\tjmp\t*%%edx\n");
27028 else
27029 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27031 fprintf (file, "%s:\n", binder_name);
27033 if (MACHOPIC_PURE)
27035 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27036 fprintf (file, "\tpushl\t%%eax\n");
27038 else
27039 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27041 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
27043 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27044 fprintf (file, "%s:\n", lazy_ptr_name);
27045 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27046 fprintf (file, "\t.long %s\n", binder_name);
27049 void
27050 darwin_x86_file_end (void)
27052 darwin_file_end ();
27053 ix86_file_end ();
27055 #endif /* TARGET_MACHO */
27057 /* Order the registers for register allocator. */
27059 void
27060 x86_order_regs_for_local_alloc (void)
27062 int pos = 0;
27063 int i;
27065 /* First allocate the local general purpose registers. */
27066 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27067 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27068 reg_alloc_order [pos++] = i;
27070 /* Global general purpose registers. */
27071 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27072 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27073 reg_alloc_order [pos++] = i;
27075 /* x87 registers come first in case we are doing FP math
27076 using them. */
27077 if (!TARGET_SSE_MATH)
27078 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27079 reg_alloc_order [pos++] = i;
27081 /* SSE registers. */
27082 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27083 reg_alloc_order [pos++] = i;
27084 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27085 reg_alloc_order [pos++] = i;
27087 /* x87 registers. */
27088 if (TARGET_SSE_MATH)
27089 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27090 reg_alloc_order [pos++] = i;
27092 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27093 reg_alloc_order [pos++] = i;
27095 /* Initialize the rest of array as we do not allocate some registers
27096 at all. */
27097 while (pos < FIRST_PSEUDO_REGISTER)
27098 reg_alloc_order [pos++] = 0;
27101 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27102 struct attribute_spec.handler. */
27103 static tree
27104 ix86_handle_abi_attribute (tree *node, tree name,
27105 tree args ATTRIBUTE_UNUSED,
27106 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27108 if (TREE_CODE (*node) != FUNCTION_TYPE
27109 && TREE_CODE (*node) != METHOD_TYPE
27110 && TREE_CODE (*node) != FIELD_DECL
27111 && TREE_CODE (*node) != TYPE_DECL)
27113 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27114 name);
27115 *no_add_attrs = true;
27116 return NULL_TREE;
27118 if (!TARGET_64BIT)
27120 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27121 name);
27122 *no_add_attrs = true;
27123 return NULL_TREE;
27126 /* Can combine regparm with all attributes but fastcall. */
27127 if (is_attribute_p ("ms_abi", name))
27129 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27131 error ("ms_abi and sysv_abi attributes are not compatible");
27134 return NULL_TREE;
27136 else if (is_attribute_p ("sysv_abi", name))
27138 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27140 error ("ms_abi and sysv_abi attributes are not compatible");
27143 return NULL_TREE;
27146 return NULL_TREE;
27149 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27150 struct attribute_spec.handler. */
27151 static tree
27152 ix86_handle_struct_attribute (tree *node, tree name,
27153 tree args ATTRIBUTE_UNUSED,
27154 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27156 tree *type = NULL;
27157 if (DECL_P (*node))
27159 if (TREE_CODE (*node) == TYPE_DECL)
27160 type = &TREE_TYPE (*node);
27162 else
27163 type = node;
27165 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27166 || TREE_CODE (*type) == UNION_TYPE)))
27168 warning (OPT_Wattributes, "%qE attribute ignored",
27169 name);
27170 *no_add_attrs = true;
27173 else if ((is_attribute_p ("ms_struct", name)
27174 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27175 || ((is_attribute_p ("gcc_struct", name)
27176 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27178 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27179 name);
27180 *no_add_attrs = true;
27183 return NULL_TREE;
27186 static bool
27187 ix86_ms_bitfield_layout_p (const_tree record_type)
27189 return (TARGET_MS_BITFIELD_LAYOUT &&
27190 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27191 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27194 /* Returns an expression indicating where the this parameter is
27195 located on entry to the FUNCTION. */
27197 static rtx
27198 x86_this_parameter (tree function)
27200 tree type = TREE_TYPE (function);
27201 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27202 int nregs;
27204 if (TARGET_64BIT)
27206 const int *parm_regs;
27208 if (ix86_function_type_abi (type) == MS_ABI)
27209 parm_regs = x86_64_ms_abi_int_parameter_registers;
27210 else
27211 parm_regs = x86_64_int_parameter_registers;
27212 return gen_rtx_REG (DImode, parm_regs[aggr]);
27215 nregs = ix86_function_regparm (type, function);
27217 if (nregs > 0 && !stdarg_p (type))
27219 int regno;
27221 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27222 regno = aggr ? DX_REG : CX_REG;
27223 else
27225 regno = AX_REG;
27226 if (aggr)
27228 regno = DX_REG;
27229 if (nregs == 1)
27230 return gen_rtx_MEM (SImode,
27231 plus_constant (stack_pointer_rtx, 4));
27234 return gen_rtx_REG (SImode, regno);
27237 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27240 /* Determine whether x86_output_mi_thunk can succeed. */
27242 static bool
27243 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27244 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27245 HOST_WIDE_INT vcall_offset, const_tree function)
27247 /* 64-bit can handle anything. */
27248 if (TARGET_64BIT)
27249 return true;
27251 /* For 32-bit, everything's fine if we have one free register. */
27252 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27253 return true;
27255 /* Need a free register for vcall_offset. */
27256 if (vcall_offset)
27257 return false;
27259 /* Need a free register for GOT references. */
27260 if (flag_pic && !(*targetm.binds_local_p) (function))
27261 return false;
27263 /* Otherwise ok. */
27264 return true;
27267 /* Output the assembler code for a thunk function. THUNK_DECL is the
27268 declaration for the thunk function itself, FUNCTION is the decl for
27269 the target function. DELTA is an immediate constant offset to be
27270 added to THIS. If VCALL_OFFSET is nonzero, the word at
27271 *(*this + vcall_offset) should be added to THIS. */
27273 static void
27274 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27275 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27276 HOST_WIDE_INT vcall_offset, tree function)
27278 rtx xops[3];
27279 rtx this_param = x86_this_parameter (function);
27280 rtx this_reg, tmp;
27282 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27283 pull it in now and let DELTA benefit. */
27284 if (REG_P (this_param))
27285 this_reg = this_param;
27286 else if (vcall_offset)
27288 /* Put the this parameter into %eax. */
27289 xops[0] = this_param;
27290 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27291 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27293 else
27294 this_reg = NULL_RTX;
27296 /* Adjust the this parameter by a fixed constant. */
27297 if (delta)
27299 xops[0] = GEN_INT (delta);
27300 xops[1] = this_reg ? this_reg : this_param;
27301 if (TARGET_64BIT)
27303 if (!x86_64_general_operand (xops[0], DImode))
27305 tmp = gen_rtx_REG (DImode, R10_REG);
27306 xops[1] = tmp;
27307 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27308 xops[0] = tmp;
27309 xops[1] = this_param;
27311 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27313 else
27314 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27317 /* Adjust the this parameter by a value stored in the vtable. */
27318 if (vcall_offset)
27320 if (TARGET_64BIT)
27321 tmp = gen_rtx_REG (DImode, R10_REG);
27322 else
27324 int tmp_regno = CX_REG;
27325 if (lookup_attribute ("fastcall",
27326 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27327 tmp_regno = AX_REG;
27328 tmp = gen_rtx_REG (SImode, tmp_regno);
27331 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27332 xops[1] = tmp;
27333 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27335 /* Adjust the this parameter. */
27336 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27337 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27339 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27340 xops[0] = GEN_INT (vcall_offset);
27341 xops[1] = tmp2;
27342 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27343 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27345 xops[1] = this_reg;
27346 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27349 /* If necessary, drop THIS back to its stack slot. */
27350 if (this_reg && this_reg != this_param)
27352 xops[0] = this_reg;
27353 xops[1] = this_param;
27354 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27357 xops[0] = XEXP (DECL_RTL (function), 0);
27358 if (TARGET_64BIT)
27360 if (!flag_pic || (*targetm.binds_local_p) (function))
27361 output_asm_insn ("jmp\t%P0", xops);
27362 /* All thunks should be in the same object as their target,
27363 and thus binds_local_p should be true. */
27364 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27365 gcc_unreachable ();
27366 else
27368 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27369 tmp = gen_rtx_CONST (Pmode, tmp);
27370 tmp = gen_rtx_MEM (QImode, tmp);
27371 xops[0] = tmp;
27372 output_asm_insn ("jmp\t%A0", xops);
27375 else
27377 if (!flag_pic || (*targetm.binds_local_p) (function))
27378 output_asm_insn ("jmp\t%P0", xops);
27379 else
27380 #if TARGET_MACHO
27381 if (TARGET_MACHO)
27383 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27384 tmp = (gen_rtx_SYMBOL_REF
27385 (Pmode,
27386 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27387 tmp = gen_rtx_MEM (QImode, tmp);
27388 xops[0] = tmp;
27389 output_asm_insn ("jmp\t%0", xops);
27391 else
27392 #endif /* TARGET_MACHO */
27394 tmp = gen_rtx_REG (SImode, CX_REG);
27395 output_set_got (tmp, NULL_RTX);
27397 xops[1] = tmp;
27398 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27399 output_asm_insn ("jmp\t{*}%1", xops);
27404 static void
27405 x86_file_start (void)
27407 default_file_start ();
27408 #if TARGET_MACHO
27409 darwin_file_start ();
27410 #endif
27411 if (X86_FILE_START_VERSION_DIRECTIVE)
27412 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27413 if (X86_FILE_START_FLTUSED)
27414 fputs ("\t.global\t__fltused\n", asm_out_file);
27415 if (ix86_asm_dialect == ASM_INTEL)
27416 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27420 x86_field_alignment (tree field, int computed)
27422 enum machine_mode mode;
27423 tree type = TREE_TYPE (field);
27425 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27426 return computed;
27427 mode = TYPE_MODE (strip_array_types (type));
27428 if (mode == DFmode || mode == DCmode
27429 || GET_MODE_CLASS (mode) == MODE_INT
27430 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27431 return MIN (32, computed);
27432 return computed;
27435 /* Output assembler code to FILE to increment profiler label # LABELNO
27436 for profiling a function entry. */
27437 void
27438 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27440 if (TARGET_64BIT)
27442 #ifndef NO_PROFILE_COUNTERS
27443 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27444 #endif
27446 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27447 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27448 else
27449 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27451 else if (flag_pic)
27453 #ifndef NO_PROFILE_COUNTERS
27454 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27455 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27456 #endif
27457 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27459 else
27461 #ifndef NO_PROFILE_COUNTERS
27462 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27463 PROFILE_COUNT_REGISTER);
27464 #endif
27465 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27469 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27470 /* We don't have exact information about the insn sizes, but we may assume
27471 quite safely that we are informed about all 1 byte insns and memory
27472 address sizes. This is enough to eliminate unnecessary padding in
27473 99% of cases. */
27475 static int
27476 min_insn_size (rtx insn)
27478 int l = 0, len;
27480 if (!INSN_P (insn) || !active_insn_p (insn))
27481 return 0;
27483 /* Discard alignments we've emit and jump instructions. */
27484 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27485 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27486 return 0;
27487 if (JUMP_TABLE_DATA_P (insn))
27488 return 0;
27490 /* Important case - calls are always 5 bytes.
27491 It is common to have many calls in the row. */
27492 if (CALL_P (insn)
27493 && symbolic_reference_mentioned_p (PATTERN (insn))
27494 && !SIBLING_CALL_P (insn))
27495 return 5;
27496 len = get_attr_length (insn);
27497 if (len <= 1)
27498 return 1;
27500 /* For normal instructions we rely on get_attr_length being exact,
27501 with a few exceptions. */
27502 if (!JUMP_P (insn))
27504 enum attr_type type = get_attr_type (insn);
27506 switch (type)
27508 case TYPE_MULTI:
27509 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27510 || asm_noperands (PATTERN (insn)) >= 0)
27511 return 0;
27512 break;
27513 case TYPE_OTHER:
27514 case TYPE_FCMP:
27515 break;
27516 default:
27517 /* Otherwise trust get_attr_length. */
27518 return len;
27521 l = get_attr_length_address (insn);
27522 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27523 l = 4;
27525 if (l)
27526 return 1+l;
27527 else
27528 return 2;
27531 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27532 window. */
27534 static void
27535 ix86_avoid_jump_mispredicts (void)
27537 rtx insn, start = get_insns ();
27538 int nbytes = 0, njumps = 0;
27539 int isjump = 0;
27541 /* Look for all minimal intervals of instructions containing 4 jumps.
27542 The intervals are bounded by START and INSN. NBYTES is the total
27543 size of instructions in the interval including INSN and not including
27544 START. When the NBYTES is smaller than 16 bytes, it is possible
27545 that the end of START and INSN ends up in the same 16byte page.
27547 The smallest offset in the page INSN can start is the case where START
27548 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27549 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27551 for (insn = start; insn; insn = NEXT_INSN (insn))
27553 int min_size;
27555 if (LABEL_P (insn))
27557 int align = label_to_alignment (insn);
27558 int max_skip = label_to_max_skip (insn);
27560 if (max_skip > 15)
27561 max_skip = 15;
27562 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27563 already in the current 16 byte page, because otherwise
27564 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27565 bytes to reach 16 byte boundary. */
27566 if (align <= 0
27567 || (align <= 3 && max_skip != (1 << align) - 1))
27568 max_skip = 0;
27569 if (dump_file)
27570 fprintf (dump_file, "Label %i with max_skip %i\n",
27571 INSN_UID (insn), max_skip);
27572 if (max_skip)
27574 while (nbytes + max_skip >= 16)
27576 start = NEXT_INSN (start);
27577 if ((JUMP_P (start)
27578 && GET_CODE (PATTERN (start)) != ADDR_VEC
27579 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27580 || CALL_P (start))
27581 njumps--, isjump = 1;
27582 else
27583 isjump = 0;
27584 nbytes -= min_insn_size (start);
27587 continue;
27590 min_size = min_insn_size (insn);
27591 nbytes += min_size;
27592 if (dump_file)
27593 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27594 INSN_UID (insn), min_size);
27595 if ((JUMP_P (insn)
27596 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27597 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27598 || CALL_P (insn))
27599 njumps++;
27600 else
27601 continue;
27603 while (njumps > 3)
27605 start = NEXT_INSN (start);
27606 if ((JUMP_P (start)
27607 && GET_CODE (PATTERN (start)) != ADDR_VEC
27608 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27609 || CALL_P (start))
27610 njumps--, isjump = 1;
27611 else
27612 isjump = 0;
27613 nbytes -= min_insn_size (start);
27615 gcc_assert (njumps >= 0);
27616 if (dump_file)
27617 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27618 INSN_UID (start), INSN_UID (insn), nbytes);
27620 if (njumps == 3 && isjump && nbytes < 16)
27622 int padsize = 15 - nbytes + min_insn_size (insn);
27624 if (dump_file)
27625 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27626 INSN_UID (insn), padsize);
27627 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27631 #endif
27633 /* AMD Athlon works faster
27634 when RET is not destination of conditional jump or directly preceded
27635 by other jump instruction. We avoid the penalty by inserting NOP just
27636 before the RET instructions in such cases. */
27637 static void
27638 ix86_pad_returns (void)
27640 edge e;
27641 edge_iterator ei;
27643 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27645 basic_block bb = e->src;
27646 rtx ret = BB_END (bb);
27647 rtx prev;
27648 bool replace = false;
27650 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27651 || optimize_bb_for_size_p (bb))
27652 continue;
27653 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27654 if (active_insn_p (prev) || LABEL_P (prev))
27655 break;
27656 if (prev && LABEL_P (prev))
27658 edge e;
27659 edge_iterator ei;
27661 FOR_EACH_EDGE (e, ei, bb->preds)
27662 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27663 && !(e->flags & EDGE_FALLTHRU))
27664 replace = true;
27666 if (!replace)
27668 prev = prev_active_insn (ret);
27669 if (prev
27670 && ((JUMP_P (prev) && any_condjump_p (prev))
27671 || CALL_P (prev)))
27672 replace = true;
27673 /* Empty functions get branch mispredict even when the jump destination
27674 is not visible to us. */
27675 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27676 replace = true;
27678 if (replace)
27680 emit_jump_insn_before (gen_return_internal_long (), ret);
27681 delete_insn (ret);
27686 /* Implement machine specific optimizations. We implement padding of returns
27687 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27688 static void
27689 ix86_reorg (void)
27691 if (optimize && optimize_function_for_speed_p (cfun))
27693 if (TARGET_PAD_RETURNS)
27694 ix86_pad_returns ();
27695 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27696 if (TARGET_FOUR_JUMP_LIMIT)
27697 ix86_avoid_jump_mispredicts ();
27698 #endif
27702 /* Return nonzero when QImode register that must be represented via REX prefix
27703 is used. */
27704 bool
27705 x86_extended_QIreg_mentioned_p (rtx insn)
27707 int i;
27708 extract_insn_cached (insn);
27709 for (i = 0; i < recog_data.n_operands; i++)
27710 if (REG_P (recog_data.operand[i])
27711 && REGNO (recog_data.operand[i]) > BX_REG)
27712 return true;
27713 return false;
27716 /* Return nonzero when P points to register encoded via REX prefix.
27717 Called via for_each_rtx. */
27718 static int
27719 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27721 unsigned int regno;
27722 if (!REG_P (*p))
27723 return 0;
27724 regno = REGNO (*p);
27725 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27728 /* Return true when INSN mentions register that must be encoded using REX
27729 prefix. */
27730 bool
27731 x86_extended_reg_mentioned_p (rtx insn)
27733 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27734 extended_reg_mentioned_1, NULL);
27737 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27738 optabs would emit if we didn't have TFmode patterns. */
27740 void
27741 x86_emit_floatuns (rtx operands[2])
27743 rtx neglab, donelab, i0, i1, f0, in, out;
27744 enum machine_mode mode, inmode;
27746 inmode = GET_MODE (operands[1]);
27747 gcc_assert (inmode == SImode || inmode == DImode);
27749 out = operands[0];
27750 in = force_reg (inmode, operands[1]);
27751 mode = GET_MODE (out);
27752 neglab = gen_label_rtx ();
27753 donelab = gen_label_rtx ();
27754 f0 = gen_reg_rtx (mode);
27756 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27758 expand_float (out, in, 0);
27760 emit_jump_insn (gen_jump (donelab));
27761 emit_barrier ();
27763 emit_label (neglab);
27765 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27766 1, OPTAB_DIRECT);
27767 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27768 1, OPTAB_DIRECT);
27769 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27771 expand_float (f0, i0, 0);
27773 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27775 emit_label (donelab);
27778 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27779 with all elements equal to VAR. Return true if successful. */
27781 static bool
27782 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27783 rtx target, rtx val)
27785 enum machine_mode hmode, smode, wsmode, wvmode;
27786 rtx x;
27788 switch (mode)
27790 case V2SImode:
27791 case V2SFmode:
27792 if (!mmx_ok)
27793 return false;
27794 /* FALLTHRU */
27796 case V2DFmode:
27797 case V2DImode:
27798 case V4SFmode:
27799 case V4SImode:
27800 val = force_reg (GET_MODE_INNER (mode), val);
27801 x = gen_rtx_VEC_DUPLICATE (mode, val);
27802 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27803 return true;
27805 case V4HImode:
27806 if (!mmx_ok)
27807 return false;
27808 if (TARGET_SSE || TARGET_3DNOW_A)
27810 val = gen_lowpart (SImode, val);
27811 x = gen_rtx_TRUNCATE (HImode, val);
27812 x = gen_rtx_VEC_DUPLICATE (mode, x);
27813 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27814 return true;
27816 else
27818 smode = HImode;
27819 wsmode = SImode;
27820 wvmode = V2SImode;
27821 goto widen;
27824 case V8QImode:
27825 if (!mmx_ok)
27826 return false;
27827 smode = QImode;
27828 wsmode = HImode;
27829 wvmode = V4HImode;
27830 goto widen;
27831 case V8HImode:
27832 if (TARGET_SSE2)
27834 rtx tmp1, tmp2;
27835 /* Extend HImode to SImode using a paradoxical SUBREG. */
27836 tmp1 = gen_reg_rtx (SImode);
27837 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27838 /* Insert the SImode value as low element of V4SImode vector. */
27839 tmp2 = gen_reg_rtx (V4SImode);
27840 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27841 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27842 CONST0_RTX (V4SImode),
27843 const1_rtx);
27844 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27845 /* Cast the V4SImode vector back to a V8HImode vector. */
27846 tmp1 = gen_reg_rtx (V8HImode);
27847 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27848 /* Duplicate the low short through the whole low SImode word. */
27849 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27850 /* Cast the V8HImode vector back to a V4SImode vector. */
27851 tmp2 = gen_reg_rtx (V4SImode);
27852 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27853 /* Replicate the low element of the V4SImode vector. */
27854 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27855 /* Cast the V2SImode back to V8HImode, and store in target. */
27856 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27857 return true;
27859 smode = HImode;
27860 wsmode = SImode;
27861 wvmode = V4SImode;
27862 goto widen;
27863 case V16QImode:
27864 if (TARGET_SSE2)
27866 rtx tmp1, tmp2;
27867 /* Extend QImode to SImode using a paradoxical SUBREG. */
27868 tmp1 = gen_reg_rtx (SImode);
27869 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27870 /* Insert the SImode value as low element of V4SImode vector. */
27871 tmp2 = gen_reg_rtx (V4SImode);
27872 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27873 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27874 CONST0_RTX (V4SImode),
27875 const1_rtx);
27876 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27877 /* Cast the V4SImode vector back to a V16QImode vector. */
27878 tmp1 = gen_reg_rtx (V16QImode);
27879 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27880 /* Duplicate the low byte through the whole low SImode word. */
27881 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27882 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27883 /* Cast the V16QImode vector back to a V4SImode vector. */
27884 tmp2 = gen_reg_rtx (V4SImode);
27885 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27886 /* Replicate the low element of the V4SImode vector. */
27887 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27888 /* Cast the V2SImode back to V16QImode, and store in target. */
27889 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27890 return true;
27892 smode = QImode;
27893 wsmode = HImode;
27894 wvmode = V8HImode;
27895 goto widen;
27896 widen:
27897 /* Replicate the value once into the next wider mode and recurse. */
27898 val = convert_modes (wsmode, smode, val, true);
27899 x = expand_simple_binop (wsmode, ASHIFT, val,
27900 GEN_INT (GET_MODE_BITSIZE (smode)),
27901 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27902 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27904 x = gen_reg_rtx (wvmode);
27905 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27906 gcc_unreachable ();
27907 emit_move_insn (target, gen_lowpart (mode, x));
27908 return true;
27910 case V4DFmode:
27911 hmode = V2DFmode;
27912 goto half;
27913 case V4DImode:
27914 hmode = V2DImode;
27915 goto half;
27916 case V8SFmode:
27917 hmode = V4SFmode;
27918 goto half;
27919 case V8SImode:
27920 hmode = V4SImode;
27921 goto half;
27922 case V16HImode:
27923 hmode = V8HImode;
27924 goto half;
27925 case V32QImode:
27926 hmode = V16QImode;
27927 goto half;
27928 half:
27930 rtx tmp = gen_reg_rtx (hmode);
27931 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27932 emit_insn (gen_rtx_SET (VOIDmode, target,
27933 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27935 return true;
27937 default:
27938 return false;
27942 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27943 whose ONE_VAR element is VAR, and other elements are zero. Return true
27944 if successful. */
27946 static bool
27947 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27948 rtx target, rtx var, int one_var)
27950 enum machine_mode vsimode;
27951 rtx new_target;
27952 rtx x, tmp;
27953 bool use_vector_set = false;
27955 switch (mode)
27957 case V2DImode:
27958 /* For SSE4.1, we normally use vector set. But if the second
27959 element is zero and inter-unit moves are OK, we use movq
27960 instead. */
27961 use_vector_set = (TARGET_64BIT
27962 && TARGET_SSE4_1
27963 && !(TARGET_INTER_UNIT_MOVES
27964 && one_var == 0));
27965 break;
27966 case V16QImode:
27967 case V4SImode:
27968 case V4SFmode:
27969 use_vector_set = TARGET_SSE4_1;
27970 break;
27971 case V8HImode:
27972 use_vector_set = TARGET_SSE2;
27973 break;
27974 case V4HImode:
27975 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27976 break;
27977 case V32QImode:
27978 case V16HImode:
27979 case V8SImode:
27980 case V8SFmode:
27981 case V4DFmode:
27982 use_vector_set = TARGET_AVX;
27983 break;
27984 case V4DImode:
27985 /* Use ix86_expand_vector_set in 64bit mode only. */
27986 use_vector_set = TARGET_AVX && TARGET_64BIT;
27987 break;
27988 default:
27989 break;
27992 if (use_vector_set)
27994 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27995 var = force_reg (GET_MODE_INNER (mode), var);
27996 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27997 return true;
28000 switch (mode)
28002 case V2SFmode:
28003 case V2SImode:
28004 if (!mmx_ok)
28005 return false;
28006 /* FALLTHRU */
28008 case V2DFmode:
28009 case V2DImode:
28010 if (one_var != 0)
28011 return false;
28012 var = force_reg (GET_MODE_INNER (mode), var);
28013 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28014 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28015 return true;
28017 case V4SFmode:
28018 case V4SImode:
28019 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28020 new_target = gen_reg_rtx (mode);
28021 else
28022 new_target = target;
28023 var = force_reg (GET_MODE_INNER (mode), var);
28024 x = gen_rtx_VEC_DUPLICATE (mode, var);
28025 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28026 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28027 if (one_var != 0)
28029 /* We need to shuffle the value to the correct position, so
28030 create a new pseudo to store the intermediate result. */
28032 /* With SSE2, we can use the integer shuffle insns. */
28033 if (mode != V4SFmode && TARGET_SSE2)
28035 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28036 GEN_INT (1),
28037 GEN_INT (one_var == 1 ? 0 : 1),
28038 GEN_INT (one_var == 2 ? 0 : 1),
28039 GEN_INT (one_var == 3 ? 0 : 1)));
28040 if (target != new_target)
28041 emit_move_insn (target, new_target);
28042 return true;
28045 /* Otherwise convert the intermediate result to V4SFmode and
28046 use the SSE1 shuffle instructions. */
28047 if (mode != V4SFmode)
28049 tmp = gen_reg_rtx (V4SFmode);
28050 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28052 else
28053 tmp = new_target;
28055 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28056 GEN_INT (1),
28057 GEN_INT (one_var == 1 ? 0 : 1),
28058 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28059 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28061 if (mode != V4SFmode)
28062 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28063 else if (tmp != target)
28064 emit_move_insn (target, tmp);
28066 else if (target != new_target)
28067 emit_move_insn (target, new_target);
28068 return true;
28070 case V8HImode:
28071 case V16QImode:
28072 vsimode = V4SImode;
28073 goto widen;
28074 case V4HImode:
28075 case V8QImode:
28076 if (!mmx_ok)
28077 return false;
28078 vsimode = V2SImode;
28079 goto widen;
28080 widen:
28081 if (one_var != 0)
28082 return false;
28084 /* Zero extend the variable element to SImode and recurse. */
28085 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28087 x = gen_reg_rtx (vsimode);
28088 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28089 var, one_var))
28090 gcc_unreachable ();
28092 emit_move_insn (target, gen_lowpart (mode, x));
28093 return true;
28095 default:
28096 return false;
28100 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28101 consisting of the values in VALS. It is known that all elements
28102 except ONE_VAR are constants. Return true if successful. */
28104 static bool
28105 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28106 rtx target, rtx vals, int one_var)
28108 rtx var = XVECEXP (vals, 0, one_var);
28109 enum machine_mode wmode;
28110 rtx const_vec, x;
28112 const_vec = copy_rtx (vals);
28113 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28114 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28116 switch (mode)
28118 case V2DFmode:
28119 case V2DImode:
28120 case V2SFmode:
28121 case V2SImode:
28122 /* For the two element vectors, it's just as easy to use
28123 the general case. */
28124 return false;
28126 case V4DImode:
28127 /* Use ix86_expand_vector_set in 64bit mode only. */
28128 if (!TARGET_64BIT)
28129 return false;
28130 case V4DFmode:
28131 case V8SFmode:
28132 case V8SImode:
28133 case V16HImode:
28134 case V32QImode:
28135 case V4SFmode:
28136 case V4SImode:
28137 case V8HImode:
28138 case V4HImode:
28139 break;
28141 case V16QImode:
28142 if (TARGET_SSE4_1)
28143 break;
28144 wmode = V8HImode;
28145 goto widen;
28146 case V8QImode:
28147 wmode = V4HImode;
28148 goto widen;
28149 widen:
28150 /* There's no way to set one QImode entry easily. Combine
28151 the variable value with its adjacent constant value, and
28152 promote to an HImode set. */
28153 x = XVECEXP (vals, 0, one_var ^ 1);
28154 if (one_var & 1)
28156 var = convert_modes (HImode, QImode, var, true);
28157 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28158 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28159 x = GEN_INT (INTVAL (x) & 0xff);
28161 else
28163 var = convert_modes (HImode, QImode, var, true);
28164 x = gen_int_mode (INTVAL (x) << 8, HImode);
28166 if (x != const0_rtx)
28167 var = expand_simple_binop (HImode, IOR, var, x, var,
28168 1, OPTAB_LIB_WIDEN);
28170 x = gen_reg_rtx (wmode);
28171 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28172 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28174 emit_move_insn (target, gen_lowpart (mode, x));
28175 return true;
28177 default:
28178 return false;
28181 emit_move_insn (target, const_vec);
28182 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28183 return true;
28186 /* A subroutine of ix86_expand_vector_init_general. Use vector
28187 concatenate to handle the most general case: all values variable,
28188 and none identical. */
28190 static void
28191 ix86_expand_vector_init_concat (enum machine_mode mode,
28192 rtx target, rtx *ops, int n)
28194 enum machine_mode cmode, hmode = VOIDmode;
28195 rtx first[8], second[4];
28196 rtvec v;
28197 int i, j;
28199 switch (n)
28201 case 2:
28202 switch (mode)
28204 case V8SImode:
28205 cmode = V4SImode;
28206 break;
28207 case V8SFmode:
28208 cmode = V4SFmode;
28209 break;
28210 case V4DImode:
28211 cmode = V2DImode;
28212 break;
28213 case V4DFmode:
28214 cmode = V2DFmode;
28215 break;
28216 case V4SImode:
28217 cmode = V2SImode;
28218 break;
28219 case V4SFmode:
28220 cmode = V2SFmode;
28221 break;
28222 case V2DImode:
28223 cmode = DImode;
28224 break;
28225 case V2SImode:
28226 cmode = SImode;
28227 break;
28228 case V2DFmode:
28229 cmode = DFmode;
28230 break;
28231 case V2SFmode:
28232 cmode = SFmode;
28233 break;
28234 default:
28235 gcc_unreachable ();
28238 if (!register_operand (ops[1], cmode))
28239 ops[1] = force_reg (cmode, ops[1]);
28240 if (!register_operand (ops[0], cmode))
28241 ops[0] = force_reg (cmode, ops[0]);
28242 emit_insn (gen_rtx_SET (VOIDmode, target,
28243 gen_rtx_VEC_CONCAT (mode, ops[0],
28244 ops[1])));
28245 break;
28247 case 4:
28248 switch (mode)
28250 case V4DImode:
28251 cmode = V2DImode;
28252 break;
28253 case V4DFmode:
28254 cmode = V2DFmode;
28255 break;
28256 case V4SImode:
28257 cmode = V2SImode;
28258 break;
28259 case V4SFmode:
28260 cmode = V2SFmode;
28261 break;
28262 default:
28263 gcc_unreachable ();
28265 goto half;
28267 case 8:
28268 switch (mode)
28270 case V8SImode:
28271 cmode = V2SImode;
28272 hmode = V4SImode;
28273 break;
28274 case V8SFmode:
28275 cmode = V2SFmode;
28276 hmode = V4SFmode;
28277 break;
28278 default:
28279 gcc_unreachable ();
28281 goto half;
28283 half:
28284 /* FIXME: We process inputs backward to help RA. PR 36222. */
28285 i = n - 1;
28286 j = (n >> 1) - 1;
28287 for (; i > 0; i -= 2, j--)
28289 first[j] = gen_reg_rtx (cmode);
28290 v = gen_rtvec (2, ops[i - 1], ops[i]);
28291 ix86_expand_vector_init (false, first[j],
28292 gen_rtx_PARALLEL (cmode, v));
28295 n >>= 1;
28296 if (n > 2)
28298 gcc_assert (hmode != VOIDmode);
28299 for (i = j = 0; i < n; i += 2, j++)
28301 second[j] = gen_reg_rtx (hmode);
28302 ix86_expand_vector_init_concat (hmode, second [j],
28303 &first [i], 2);
28305 n >>= 1;
28306 ix86_expand_vector_init_concat (mode, target, second, n);
28308 else
28309 ix86_expand_vector_init_concat (mode, target, first, n);
28310 break;
28312 default:
28313 gcc_unreachable ();
28317 /* A subroutine of ix86_expand_vector_init_general. Use vector
28318 interleave to handle the most general case: all values variable,
28319 and none identical. */
28321 static void
28322 ix86_expand_vector_init_interleave (enum machine_mode mode,
28323 rtx target, rtx *ops, int n)
28325 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28326 int i, j;
28327 rtx op0, op1;
28328 rtx (*gen_load_even) (rtx, rtx, rtx);
28329 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28330 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28332 switch (mode)
28334 case V8HImode:
28335 gen_load_even = gen_vec_setv8hi;
28336 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28337 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28338 inner_mode = HImode;
28339 first_imode = V4SImode;
28340 second_imode = V2DImode;
28341 third_imode = VOIDmode;
28342 break;
28343 case V16QImode:
28344 gen_load_even = gen_vec_setv16qi;
28345 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28346 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28347 inner_mode = QImode;
28348 first_imode = V8HImode;
28349 second_imode = V4SImode;
28350 third_imode = V2DImode;
28351 break;
28352 default:
28353 gcc_unreachable ();
28356 for (i = 0; i < n; i++)
28358 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28359 op0 = gen_reg_rtx (SImode);
28360 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28362 /* Insert the SImode value as low element of V4SImode vector. */
28363 op1 = gen_reg_rtx (V4SImode);
28364 op0 = gen_rtx_VEC_MERGE (V4SImode,
28365 gen_rtx_VEC_DUPLICATE (V4SImode,
28366 op0),
28367 CONST0_RTX (V4SImode),
28368 const1_rtx);
28369 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28371 /* Cast the V4SImode vector back to a vector in orignal mode. */
28372 op0 = gen_reg_rtx (mode);
28373 emit_move_insn (op0, gen_lowpart (mode, op1));
28375 /* Load even elements into the second positon. */
28376 emit_insn ((*gen_load_even) (op0,
28377 force_reg (inner_mode,
28378 ops [i + i + 1]),
28379 const1_rtx));
28381 /* Cast vector to FIRST_IMODE vector. */
28382 ops[i] = gen_reg_rtx (first_imode);
28383 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28386 /* Interleave low FIRST_IMODE vectors. */
28387 for (i = j = 0; i < n; i += 2, j++)
28389 op0 = gen_reg_rtx (first_imode);
28390 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28392 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28393 ops[j] = gen_reg_rtx (second_imode);
28394 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28397 /* Interleave low SECOND_IMODE vectors. */
28398 switch (second_imode)
28400 case V4SImode:
28401 for (i = j = 0; i < n / 2; i += 2, j++)
28403 op0 = gen_reg_rtx (second_imode);
28404 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28405 ops[i + 1]));
28407 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28408 vector. */
28409 ops[j] = gen_reg_rtx (third_imode);
28410 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28412 second_imode = V2DImode;
28413 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28414 /* FALLTHRU */
28416 case V2DImode:
28417 op0 = gen_reg_rtx (second_imode);
28418 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28419 ops[1]));
28421 /* Cast the SECOND_IMODE vector back to a vector on original
28422 mode. */
28423 emit_insn (gen_rtx_SET (VOIDmode, target,
28424 gen_lowpart (mode, op0)));
28425 break;
28427 default:
28428 gcc_unreachable ();
28432 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28433 all values variable, and none identical. */
28435 static void
28436 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28437 rtx target, rtx vals)
28439 rtx ops[32], op0, op1;
28440 enum machine_mode half_mode = VOIDmode;
28441 int n, i;
28443 switch (mode)
28445 case V2SFmode:
28446 case V2SImode:
28447 if (!mmx_ok && !TARGET_SSE)
28448 break;
28449 /* FALLTHRU */
28451 case V8SFmode:
28452 case V8SImode:
28453 case V4DFmode:
28454 case V4DImode:
28455 case V4SFmode:
28456 case V4SImode:
28457 case V2DFmode:
28458 case V2DImode:
28459 n = GET_MODE_NUNITS (mode);
28460 for (i = 0; i < n; i++)
28461 ops[i] = XVECEXP (vals, 0, i);
28462 ix86_expand_vector_init_concat (mode, target, ops, n);
28463 return;
28465 case V32QImode:
28466 half_mode = V16QImode;
28467 goto half;
28469 case V16HImode:
28470 half_mode = V8HImode;
28471 goto half;
28473 half:
28474 n = GET_MODE_NUNITS (mode);
28475 for (i = 0; i < n; i++)
28476 ops[i] = XVECEXP (vals, 0, i);
28477 op0 = gen_reg_rtx (half_mode);
28478 op1 = gen_reg_rtx (half_mode);
28479 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28480 n >> 2);
28481 ix86_expand_vector_init_interleave (half_mode, op1,
28482 &ops [n >> 1], n >> 2);
28483 emit_insn (gen_rtx_SET (VOIDmode, target,
28484 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28485 return;
28487 case V16QImode:
28488 if (!TARGET_SSE4_1)
28489 break;
28490 /* FALLTHRU */
28492 case V8HImode:
28493 if (!TARGET_SSE2)
28494 break;
28496 /* Don't use ix86_expand_vector_init_interleave if we can't
28497 move from GPR to SSE register directly. */
28498 if (!TARGET_INTER_UNIT_MOVES)
28499 break;
28501 n = GET_MODE_NUNITS (mode);
28502 for (i = 0; i < n; i++)
28503 ops[i] = XVECEXP (vals, 0, i);
28504 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28505 return;
28507 case V4HImode:
28508 case V8QImode:
28509 break;
28511 default:
28512 gcc_unreachable ();
28516 int i, j, n_elts, n_words, n_elt_per_word;
28517 enum machine_mode inner_mode;
28518 rtx words[4], shift;
28520 inner_mode = GET_MODE_INNER (mode);
28521 n_elts = GET_MODE_NUNITS (mode);
28522 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28523 n_elt_per_word = n_elts / n_words;
28524 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28526 for (i = 0; i < n_words; ++i)
28528 rtx word = NULL_RTX;
28530 for (j = 0; j < n_elt_per_word; ++j)
28532 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28533 elt = convert_modes (word_mode, inner_mode, elt, true);
28535 if (j == 0)
28536 word = elt;
28537 else
28539 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28540 word, 1, OPTAB_LIB_WIDEN);
28541 word = expand_simple_binop (word_mode, IOR, word, elt,
28542 word, 1, OPTAB_LIB_WIDEN);
28546 words[i] = word;
28549 if (n_words == 1)
28550 emit_move_insn (target, gen_lowpart (mode, words[0]));
28551 else if (n_words == 2)
28553 rtx tmp = gen_reg_rtx (mode);
28554 emit_clobber (tmp);
28555 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28556 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28557 emit_move_insn (target, tmp);
28559 else if (n_words == 4)
28561 rtx tmp = gen_reg_rtx (V4SImode);
28562 gcc_assert (word_mode == SImode);
28563 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28564 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28565 emit_move_insn (target, gen_lowpart (mode, tmp));
28567 else
28568 gcc_unreachable ();
28572 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28573 instructions unless MMX_OK is true. */
28575 void
28576 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28578 enum machine_mode mode = GET_MODE (target);
28579 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28580 int n_elts = GET_MODE_NUNITS (mode);
28581 int n_var = 0, one_var = -1;
28582 bool all_same = true, all_const_zero = true;
28583 int i;
28584 rtx x;
28586 for (i = 0; i < n_elts; ++i)
28588 x = XVECEXP (vals, 0, i);
28589 if (!(CONST_INT_P (x)
28590 || GET_CODE (x) == CONST_DOUBLE
28591 || GET_CODE (x) == CONST_FIXED))
28592 n_var++, one_var = i;
28593 else if (x != CONST0_RTX (inner_mode))
28594 all_const_zero = false;
28595 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28596 all_same = false;
28599 /* Constants are best loaded from the constant pool. */
28600 if (n_var == 0)
28602 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28603 return;
28606 /* If all values are identical, broadcast the value. */
28607 if (all_same
28608 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28609 XVECEXP (vals, 0, 0)))
28610 return;
28612 /* Values where only one field is non-constant are best loaded from
28613 the pool and overwritten via move later. */
28614 if (n_var == 1)
28616 if (all_const_zero
28617 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28618 XVECEXP (vals, 0, one_var),
28619 one_var))
28620 return;
28622 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28623 return;
28626 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28629 void
28630 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28632 enum machine_mode mode = GET_MODE (target);
28633 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28634 enum machine_mode half_mode;
28635 bool use_vec_merge = false;
28636 rtx tmp;
28637 static rtx (*gen_extract[6][2]) (rtx, rtx)
28639 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28640 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28641 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28642 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28643 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28644 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28646 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28648 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28649 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28650 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28651 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28652 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28653 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28655 int i, j, n;
28657 switch (mode)
28659 case V2SFmode:
28660 case V2SImode:
28661 if (mmx_ok)
28663 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28664 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28665 if (elt == 0)
28666 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28667 else
28668 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28669 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28670 return;
28672 break;
28674 case V2DImode:
28675 use_vec_merge = TARGET_SSE4_1;
28676 if (use_vec_merge)
28677 break;
28679 case V2DFmode:
28681 rtx op0, op1;
28683 /* For the two element vectors, we implement a VEC_CONCAT with
28684 the extraction of the other element. */
28686 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28687 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28689 if (elt == 0)
28690 op0 = val, op1 = tmp;
28691 else
28692 op0 = tmp, op1 = val;
28694 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28695 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28697 return;
28699 case V4SFmode:
28700 use_vec_merge = TARGET_SSE4_1;
28701 if (use_vec_merge)
28702 break;
28704 switch (elt)
28706 case 0:
28707 use_vec_merge = true;
28708 break;
28710 case 1:
28711 /* tmp = target = A B C D */
28712 tmp = copy_to_reg (target);
28713 /* target = A A B B */
28714 emit_insn (gen_sse_unpcklps (target, target, target));
28715 /* target = X A B B */
28716 ix86_expand_vector_set (false, target, val, 0);
28717 /* target = A X C D */
28718 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28719 GEN_INT (1), GEN_INT (0),
28720 GEN_INT (2+4), GEN_INT (3+4)));
28721 return;
28723 case 2:
28724 /* tmp = target = A B C D */
28725 tmp = copy_to_reg (target);
28726 /* tmp = X B C D */
28727 ix86_expand_vector_set (false, tmp, val, 0);
28728 /* target = A B X D */
28729 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28730 GEN_INT (0), GEN_INT (1),
28731 GEN_INT (0+4), GEN_INT (3+4)));
28732 return;
28734 case 3:
28735 /* tmp = target = A B C D */
28736 tmp = copy_to_reg (target);
28737 /* tmp = X B C D */
28738 ix86_expand_vector_set (false, tmp, val, 0);
28739 /* target = A B X D */
28740 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28741 GEN_INT (0), GEN_INT (1),
28742 GEN_INT (2+4), GEN_INT (0+4)));
28743 return;
28745 default:
28746 gcc_unreachable ();
28748 break;
28750 case V4SImode:
28751 use_vec_merge = TARGET_SSE4_1;
28752 if (use_vec_merge)
28753 break;
28755 /* Element 0 handled by vec_merge below. */
28756 if (elt == 0)
28758 use_vec_merge = true;
28759 break;
28762 if (TARGET_SSE2)
28764 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28765 store into element 0, then shuffle them back. */
28767 rtx order[4];
28769 order[0] = GEN_INT (elt);
28770 order[1] = const1_rtx;
28771 order[2] = const2_rtx;
28772 order[3] = GEN_INT (3);
28773 order[elt] = const0_rtx;
28775 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28776 order[1], order[2], order[3]));
28778 ix86_expand_vector_set (false, target, val, 0);
28780 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28781 order[1], order[2], order[3]));
28783 else
28785 /* For SSE1, we have to reuse the V4SF code. */
28786 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28787 gen_lowpart (SFmode, val), elt);
28789 return;
28791 case V8HImode:
28792 use_vec_merge = TARGET_SSE2;
28793 break;
28794 case V4HImode:
28795 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28796 break;
28798 case V16QImode:
28799 use_vec_merge = TARGET_SSE4_1;
28800 break;
28802 case V8QImode:
28803 break;
28805 case V32QImode:
28806 half_mode = V16QImode;
28807 j = 0;
28808 n = 16;
28809 goto half;
28811 case V16HImode:
28812 half_mode = V8HImode;
28813 j = 1;
28814 n = 8;
28815 goto half;
28817 case V8SImode:
28818 half_mode = V4SImode;
28819 j = 2;
28820 n = 4;
28821 goto half;
28823 case V4DImode:
28824 half_mode = V2DImode;
28825 j = 3;
28826 n = 2;
28827 goto half;
28829 case V8SFmode:
28830 half_mode = V4SFmode;
28831 j = 4;
28832 n = 4;
28833 goto half;
28835 case V4DFmode:
28836 half_mode = V2DFmode;
28837 j = 5;
28838 n = 2;
28839 goto half;
28841 half:
28842 /* Compute offset. */
28843 i = elt / n;
28844 elt %= n;
28846 gcc_assert (i <= 1);
28848 /* Extract the half. */
28849 tmp = gen_reg_rtx (half_mode);
28850 emit_insn ((*gen_extract[j][i]) (tmp, target));
28852 /* Put val in tmp at elt. */
28853 ix86_expand_vector_set (false, tmp, val, elt);
28855 /* Put it back. */
28856 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28857 return;
28859 default:
28860 break;
28863 if (use_vec_merge)
28865 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28866 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28867 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28869 else
28871 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28873 emit_move_insn (mem, target);
28875 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28876 emit_move_insn (tmp, val);
28878 emit_move_insn (target, mem);
28882 void
28883 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28885 enum machine_mode mode = GET_MODE (vec);
28886 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28887 bool use_vec_extr = false;
28888 rtx tmp;
28890 switch (mode)
28892 case V2SImode:
28893 case V2SFmode:
28894 if (!mmx_ok)
28895 break;
28896 /* FALLTHRU */
28898 case V2DFmode:
28899 case V2DImode:
28900 use_vec_extr = true;
28901 break;
28903 case V4SFmode:
28904 use_vec_extr = TARGET_SSE4_1;
28905 if (use_vec_extr)
28906 break;
28908 switch (elt)
28910 case 0:
28911 tmp = vec;
28912 break;
28914 case 1:
28915 case 3:
28916 tmp = gen_reg_rtx (mode);
28917 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28918 GEN_INT (elt), GEN_INT (elt),
28919 GEN_INT (elt+4), GEN_INT (elt+4)));
28920 break;
28922 case 2:
28923 tmp = gen_reg_rtx (mode);
28924 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28925 break;
28927 default:
28928 gcc_unreachable ();
28930 vec = tmp;
28931 use_vec_extr = true;
28932 elt = 0;
28933 break;
28935 case V4SImode:
28936 use_vec_extr = TARGET_SSE4_1;
28937 if (use_vec_extr)
28938 break;
28940 if (TARGET_SSE2)
28942 switch (elt)
28944 case 0:
28945 tmp = vec;
28946 break;
28948 case 1:
28949 case 3:
28950 tmp = gen_reg_rtx (mode);
28951 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28952 GEN_INT (elt), GEN_INT (elt),
28953 GEN_INT (elt), GEN_INT (elt)));
28954 break;
28956 case 2:
28957 tmp = gen_reg_rtx (mode);
28958 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28959 break;
28961 default:
28962 gcc_unreachable ();
28964 vec = tmp;
28965 use_vec_extr = true;
28966 elt = 0;
28968 else
28970 /* For SSE1, we have to reuse the V4SF code. */
28971 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28972 gen_lowpart (V4SFmode, vec), elt);
28973 return;
28975 break;
28977 case V8HImode:
28978 use_vec_extr = TARGET_SSE2;
28979 break;
28980 case V4HImode:
28981 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28982 break;
28984 case V16QImode:
28985 use_vec_extr = TARGET_SSE4_1;
28986 break;
28988 case V8QImode:
28989 /* ??? Could extract the appropriate HImode element and shift. */
28990 default:
28991 break;
28994 if (use_vec_extr)
28996 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28997 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28999 /* Let the rtl optimizers know about the zero extension performed. */
29000 if (inner_mode == QImode || inner_mode == HImode)
29002 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29003 target = gen_lowpart (SImode, target);
29006 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29008 else
29010 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29012 emit_move_insn (mem, vec);
29014 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29015 emit_move_insn (target, tmp);
29019 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29020 pattern to reduce; DEST is the destination; IN is the input vector. */
29022 void
29023 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29025 rtx tmp1, tmp2, tmp3;
29027 tmp1 = gen_reg_rtx (V4SFmode);
29028 tmp2 = gen_reg_rtx (V4SFmode);
29029 tmp3 = gen_reg_rtx (V4SFmode);
29031 emit_insn (gen_sse_movhlps (tmp1, in, in));
29032 emit_insn (fn (tmp2, tmp1, in));
29034 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29035 GEN_INT (1), GEN_INT (1),
29036 GEN_INT (1+4), GEN_INT (1+4)));
29037 emit_insn (fn (dest, tmp2, tmp3));
29040 /* Target hook for scalar_mode_supported_p. */
29041 static bool
29042 ix86_scalar_mode_supported_p (enum machine_mode mode)
29044 if (DECIMAL_FLOAT_MODE_P (mode))
29045 return true;
29046 else if (mode == TFmode)
29047 return true;
29048 else
29049 return default_scalar_mode_supported_p (mode);
29052 /* Implements target hook vector_mode_supported_p. */
29053 static bool
29054 ix86_vector_mode_supported_p (enum machine_mode mode)
29056 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29057 return true;
29058 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29059 return true;
29060 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29061 return true;
29062 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29063 return true;
29064 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29065 return true;
29066 return false;
29069 /* Target hook for c_mode_for_suffix. */
29070 static enum machine_mode
29071 ix86_c_mode_for_suffix (char suffix)
29073 if (suffix == 'q')
29074 return TFmode;
29075 if (suffix == 'w')
29076 return XFmode;
29078 return VOIDmode;
29081 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29083 We do this in the new i386 backend to maintain source compatibility
29084 with the old cc0-based compiler. */
29086 static tree
29087 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29088 tree inputs ATTRIBUTE_UNUSED,
29089 tree clobbers)
29091 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29092 clobbers);
29093 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29094 clobbers);
29095 return clobbers;
29098 /* Implements target vector targetm.asm.encode_section_info. This
29099 is not used by netware. */
29101 static void ATTRIBUTE_UNUSED
29102 ix86_encode_section_info (tree decl, rtx rtl, int first)
29104 default_encode_section_info (decl, rtl, first);
29106 if (TREE_CODE (decl) == VAR_DECL
29107 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29108 && ix86_in_large_data_p (decl))
29109 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29112 /* Worker function for REVERSE_CONDITION. */
29114 enum rtx_code
29115 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29117 return (mode != CCFPmode && mode != CCFPUmode
29118 ? reverse_condition (code)
29119 : reverse_condition_maybe_unordered (code));
29122 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29123 to OPERANDS[0]. */
29125 const char *
29126 output_387_reg_move (rtx insn, rtx *operands)
29128 if (REG_P (operands[0]))
29130 if (REG_P (operands[1])
29131 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29133 if (REGNO (operands[0]) == FIRST_STACK_REG)
29134 return output_387_ffreep (operands, 0);
29135 return "fstp\t%y0";
29137 if (STACK_TOP_P (operands[0]))
29138 return "fld%Z1\t%y1";
29139 return "fst\t%y0";
29141 else if (MEM_P (operands[0]))
29143 gcc_assert (REG_P (operands[1]));
29144 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29145 return "fstp%Z0\t%y0";
29146 else
29148 /* There is no non-popping store to memory for XFmode.
29149 So if we need one, follow the store with a load. */
29150 if (GET_MODE (operands[0]) == XFmode)
29151 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29152 else
29153 return "fst%Z0\t%y0";
29156 else
29157 gcc_unreachable();
29160 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29161 FP status register is set. */
29163 void
29164 ix86_emit_fp_unordered_jump (rtx label)
29166 rtx reg = gen_reg_rtx (HImode);
29167 rtx temp;
29169 emit_insn (gen_x86_fnstsw_1 (reg));
29171 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29173 emit_insn (gen_x86_sahf_1 (reg));
29175 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29176 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29178 else
29180 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29182 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29183 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29186 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29187 gen_rtx_LABEL_REF (VOIDmode, label),
29188 pc_rtx);
29189 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29191 emit_jump_insn (temp);
29192 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29195 /* Output code to perform a log1p XFmode calculation. */
29197 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29199 rtx label1 = gen_label_rtx ();
29200 rtx label2 = gen_label_rtx ();
29202 rtx tmp = gen_reg_rtx (XFmode);
29203 rtx tmp2 = gen_reg_rtx (XFmode);
29204 rtx test;
29206 emit_insn (gen_absxf2 (tmp, op1));
29207 test = gen_rtx_GE (VOIDmode, tmp,
29208 CONST_DOUBLE_FROM_REAL_VALUE (
29209 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29210 XFmode));
29211 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29213 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29214 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29215 emit_jump (label2);
29217 emit_label (label1);
29218 emit_move_insn (tmp, CONST1_RTX (XFmode));
29219 emit_insn (gen_addxf3 (tmp, op1, tmp));
29220 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29221 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29223 emit_label (label2);
29226 /* Output code to perform a Newton-Rhapson approximation of a single precision
29227 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29229 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29231 rtx x0, x1, e0, e1, two;
29233 x0 = gen_reg_rtx (mode);
29234 e0 = gen_reg_rtx (mode);
29235 e1 = gen_reg_rtx (mode);
29236 x1 = gen_reg_rtx (mode);
29238 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29240 if (VECTOR_MODE_P (mode))
29241 two = ix86_build_const_vector (SFmode, true, two);
29243 two = force_reg (mode, two);
29245 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29247 /* x0 = rcp(b) estimate */
29248 emit_insn (gen_rtx_SET (VOIDmode, x0,
29249 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29250 UNSPEC_RCP)));
29251 /* e0 = x0 * b */
29252 emit_insn (gen_rtx_SET (VOIDmode, e0,
29253 gen_rtx_MULT (mode, x0, b)));
29254 /* e1 = 2. - e0 */
29255 emit_insn (gen_rtx_SET (VOIDmode, e1,
29256 gen_rtx_MINUS (mode, two, e0)));
29257 /* x1 = x0 * e1 */
29258 emit_insn (gen_rtx_SET (VOIDmode, x1,
29259 gen_rtx_MULT (mode, x0, e1)));
29260 /* res = a * x1 */
29261 emit_insn (gen_rtx_SET (VOIDmode, res,
29262 gen_rtx_MULT (mode, a, x1)));
29265 /* Output code to perform a Newton-Rhapson approximation of a
29266 single precision floating point [reciprocal] square root. */
29268 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29269 bool recip)
29271 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29272 REAL_VALUE_TYPE r;
29274 x0 = gen_reg_rtx (mode);
29275 e0 = gen_reg_rtx (mode);
29276 e1 = gen_reg_rtx (mode);
29277 e2 = gen_reg_rtx (mode);
29278 e3 = gen_reg_rtx (mode);
29280 real_from_integer (&r, VOIDmode, -3, -1, 0);
29281 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29283 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29284 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29286 if (VECTOR_MODE_P (mode))
29288 mthree = ix86_build_const_vector (SFmode, true, mthree);
29289 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29292 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29293 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29295 /* x0 = rsqrt(a) estimate */
29296 emit_insn (gen_rtx_SET (VOIDmode, x0,
29297 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29298 UNSPEC_RSQRT)));
29300 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29301 if (!recip)
29303 rtx zero, mask;
29305 zero = gen_reg_rtx (mode);
29306 mask = gen_reg_rtx (mode);
29308 zero = force_reg (mode, CONST0_RTX(mode));
29309 emit_insn (gen_rtx_SET (VOIDmode, mask,
29310 gen_rtx_NE (mode, zero, a)));
29312 emit_insn (gen_rtx_SET (VOIDmode, x0,
29313 gen_rtx_AND (mode, x0, mask)));
29316 /* e0 = x0 * a */
29317 emit_insn (gen_rtx_SET (VOIDmode, e0,
29318 gen_rtx_MULT (mode, x0, a)));
29319 /* e1 = e0 * x0 */
29320 emit_insn (gen_rtx_SET (VOIDmode, e1,
29321 gen_rtx_MULT (mode, e0, x0)));
29323 /* e2 = e1 - 3. */
29324 mthree = force_reg (mode, mthree);
29325 emit_insn (gen_rtx_SET (VOIDmode, e2,
29326 gen_rtx_PLUS (mode, e1, mthree)));
29328 mhalf = force_reg (mode, mhalf);
29329 if (recip)
29330 /* e3 = -.5 * x0 */
29331 emit_insn (gen_rtx_SET (VOIDmode, e3,
29332 gen_rtx_MULT (mode, x0, mhalf)));
29333 else
29334 /* e3 = -.5 * e0 */
29335 emit_insn (gen_rtx_SET (VOIDmode, e3,
29336 gen_rtx_MULT (mode, e0, mhalf)));
29337 /* ret = e2 * e3 */
29338 emit_insn (gen_rtx_SET (VOIDmode, res,
29339 gen_rtx_MULT (mode, e2, e3)));
29342 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29344 static void ATTRIBUTE_UNUSED
29345 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29346 tree decl)
29348 /* With Binutils 2.15, the "@unwind" marker must be specified on
29349 every occurrence of the ".eh_frame" section, not just the first
29350 one. */
29351 if (TARGET_64BIT
29352 && strcmp (name, ".eh_frame") == 0)
29354 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29355 flags & SECTION_WRITE ? "aw" : "a");
29356 return;
29358 default_elf_asm_named_section (name, flags, decl);
29361 /* Return the mangling of TYPE if it is an extended fundamental type. */
29363 static const char *
29364 ix86_mangle_type (const_tree type)
29366 type = TYPE_MAIN_VARIANT (type);
29368 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29369 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29370 return NULL;
29372 switch (TYPE_MODE (type))
29374 case TFmode:
29375 /* __float128 is "g". */
29376 return "g";
29377 case XFmode:
29378 /* "long double" or __float80 is "e". */
29379 return "e";
29380 default:
29381 return NULL;
29385 /* For 32-bit code we can save PIC register setup by using
29386 __stack_chk_fail_local hidden function instead of calling
29387 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29388 register, so it is better to call __stack_chk_fail directly. */
29390 static tree
29391 ix86_stack_protect_fail (void)
29393 return TARGET_64BIT
29394 ? default_external_stack_protect_fail ()
29395 : default_hidden_stack_protect_fail ();
29398 /* Select a format to encode pointers in exception handling data. CODE
29399 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29400 true if the symbol may be affected by dynamic relocations.
29402 ??? All x86 object file formats are capable of representing this.
29403 After all, the relocation needed is the same as for the call insn.
29404 Whether or not a particular assembler allows us to enter such, I
29405 guess we'll have to see. */
29407 asm_preferred_eh_data_format (int code, int global)
29409 if (flag_pic)
29411 int type = DW_EH_PE_sdata8;
29412 if (!TARGET_64BIT
29413 || ix86_cmodel == CM_SMALL_PIC
29414 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29415 type = DW_EH_PE_sdata4;
29416 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29418 if (ix86_cmodel == CM_SMALL
29419 || (ix86_cmodel == CM_MEDIUM && code))
29420 return DW_EH_PE_udata4;
29421 return DW_EH_PE_absptr;
29424 /* Expand copysign from SIGN to the positive value ABS_VALUE
29425 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29426 the sign-bit. */
29427 static void
29428 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29430 enum machine_mode mode = GET_MODE (sign);
29431 rtx sgn = gen_reg_rtx (mode);
29432 if (mask == NULL_RTX)
29434 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29435 if (!VECTOR_MODE_P (mode))
29437 /* We need to generate a scalar mode mask in this case. */
29438 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29439 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29440 mask = gen_reg_rtx (mode);
29441 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29444 else
29445 mask = gen_rtx_NOT (mode, mask);
29446 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29447 gen_rtx_AND (mode, mask, sign)));
29448 emit_insn (gen_rtx_SET (VOIDmode, result,
29449 gen_rtx_IOR (mode, abs_value, sgn)));
29452 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29453 mask for masking out the sign-bit is stored in *SMASK, if that is
29454 non-null. */
29455 static rtx
29456 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29458 enum machine_mode mode = GET_MODE (op0);
29459 rtx xa, mask;
29461 xa = gen_reg_rtx (mode);
29462 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29463 if (!VECTOR_MODE_P (mode))
29465 /* We need to generate a scalar mode mask in this case. */
29466 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29467 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29468 mask = gen_reg_rtx (mode);
29469 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29471 emit_insn (gen_rtx_SET (VOIDmode, xa,
29472 gen_rtx_AND (mode, op0, mask)));
29474 if (smask)
29475 *smask = mask;
29477 return xa;
29480 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29481 swapping the operands if SWAP_OPERANDS is true. The expanded
29482 code is a forward jump to a newly created label in case the
29483 comparison is true. The generated label rtx is returned. */
29484 static rtx
29485 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29486 bool swap_operands)
29488 rtx label, tmp;
29490 if (swap_operands)
29492 tmp = op0;
29493 op0 = op1;
29494 op1 = tmp;
29497 label = gen_label_rtx ();
29498 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29499 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29500 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29501 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29502 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29503 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29504 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29505 JUMP_LABEL (tmp) = label;
29507 return label;
29510 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29511 using comparison code CODE. Operands are swapped for the comparison if
29512 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29513 static rtx
29514 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29515 bool swap_operands)
29517 enum machine_mode mode = GET_MODE (op0);
29518 rtx mask = gen_reg_rtx (mode);
29520 if (swap_operands)
29522 rtx tmp = op0;
29523 op0 = op1;
29524 op1 = tmp;
29527 if (mode == DFmode)
29528 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29529 gen_rtx_fmt_ee (code, mode, op0, op1)));
29530 else
29531 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29532 gen_rtx_fmt_ee (code, mode, op0, op1)));
29534 return mask;
29537 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29538 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29539 static rtx
29540 ix86_gen_TWO52 (enum machine_mode mode)
29542 REAL_VALUE_TYPE TWO52r;
29543 rtx TWO52;
29545 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29546 TWO52 = const_double_from_real_value (TWO52r, mode);
29547 TWO52 = force_reg (mode, TWO52);
29549 return TWO52;
29552 /* Expand SSE sequence for computing lround from OP1 storing
29553 into OP0. */
29554 void
29555 ix86_expand_lround (rtx op0, rtx op1)
29557 /* C code for the stuff we're doing below:
29558 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29559 return (long)tmp;
29561 enum machine_mode mode = GET_MODE (op1);
29562 const struct real_format *fmt;
29563 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29564 rtx adj;
29566 /* load nextafter (0.5, 0.0) */
29567 fmt = REAL_MODE_FORMAT (mode);
29568 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29569 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29571 /* adj = copysign (0.5, op1) */
29572 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29573 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29575 /* adj = op1 + adj */
29576 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29578 /* op0 = (imode)adj */
29579 expand_fix (op0, adj, 0);
29582 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29583 into OPERAND0. */
29584 void
29585 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29587 /* C code for the stuff we're doing below (for do_floor):
29588 xi = (long)op1;
29589 xi -= (double)xi > op1 ? 1 : 0;
29590 return xi;
29592 enum machine_mode fmode = GET_MODE (op1);
29593 enum machine_mode imode = GET_MODE (op0);
29594 rtx ireg, freg, label, tmp;
29596 /* reg = (long)op1 */
29597 ireg = gen_reg_rtx (imode);
29598 expand_fix (ireg, op1, 0);
29600 /* freg = (double)reg */
29601 freg = gen_reg_rtx (fmode);
29602 expand_float (freg, ireg, 0);
29604 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29605 label = ix86_expand_sse_compare_and_jump (UNLE,
29606 freg, op1, !do_floor);
29607 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29608 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29609 emit_move_insn (ireg, tmp);
29611 emit_label (label);
29612 LABEL_NUSES (label) = 1;
29614 emit_move_insn (op0, ireg);
29617 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29618 result in OPERAND0. */
29619 void
29620 ix86_expand_rint (rtx operand0, rtx operand1)
29622 /* C code for the stuff we're doing below:
29623 xa = fabs (operand1);
29624 if (!isless (xa, 2**52))
29625 return operand1;
29626 xa = xa + 2**52 - 2**52;
29627 return copysign (xa, operand1);
29629 enum machine_mode mode = GET_MODE (operand0);
29630 rtx res, xa, label, TWO52, mask;
29632 res = gen_reg_rtx (mode);
29633 emit_move_insn (res, operand1);
29635 /* xa = abs (operand1) */
29636 xa = ix86_expand_sse_fabs (res, &mask);
29638 /* if (!isless (xa, TWO52)) goto label; */
29639 TWO52 = ix86_gen_TWO52 (mode);
29640 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29642 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29643 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29645 ix86_sse_copysign_to_positive (res, xa, res, mask);
29647 emit_label (label);
29648 LABEL_NUSES (label) = 1;
29650 emit_move_insn (operand0, res);
29653 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29654 into OPERAND0. */
29655 void
29656 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29658 /* C code for the stuff we expand below.
29659 double xa = fabs (x), x2;
29660 if (!isless (xa, TWO52))
29661 return x;
29662 xa = xa + TWO52 - TWO52;
29663 x2 = copysign (xa, x);
29664 Compensate. Floor:
29665 if (x2 > x)
29666 x2 -= 1;
29667 Compensate. Ceil:
29668 if (x2 < x)
29669 x2 -= -1;
29670 return x2;
29672 enum machine_mode mode = GET_MODE (operand0);
29673 rtx xa, TWO52, tmp, label, one, res, mask;
29675 TWO52 = ix86_gen_TWO52 (mode);
29677 /* Temporary for holding the result, initialized to the input
29678 operand to ease control flow. */
29679 res = gen_reg_rtx (mode);
29680 emit_move_insn (res, operand1);
29682 /* xa = abs (operand1) */
29683 xa = ix86_expand_sse_fabs (res, &mask);
29685 /* if (!isless (xa, TWO52)) goto label; */
29686 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29688 /* xa = xa + TWO52 - TWO52; */
29689 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29690 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29692 /* xa = copysign (xa, operand1) */
29693 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29695 /* generate 1.0 or -1.0 */
29696 one = force_reg (mode,
29697 const_double_from_real_value (do_floor
29698 ? dconst1 : dconstm1, mode));
29700 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29701 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29702 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29703 gen_rtx_AND (mode, one, tmp)));
29704 /* We always need to subtract here to preserve signed zero. */
29705 tmp = expand_simple_binop (mode, MINUS,
29706 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29707 emit_move_insn (res, tmp);
29709 emit_label (label);
29710 LABEL_NUSES (label) = 1;
29712 emit_move_insn (operand0, res);
29715 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29716 into OPERAND0. */
29717 void
29718 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29720 /* C code for the stuff we expand below.
29721 double xa = fabs (x), x2;
29722 if (!isless (xa, TWO52))
29723 return x;
29724 x2 = (double)(long)x;
29725 Compensate. Floor:
29726 if (x2 > x)
29727 x2 -= 1;
29728 Compensate. Ceil:
29729 if (x2 < x)
29730 x2 += 1;
29731 if (HONOR_SIGNED_ZEROS (mode))
29732 return copysign (x2, x);
29733 return x2;
29735 enum machine_mode mode = GET_MODE (operand0);
29736 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29738 TWO52 = ix86_gen_TWO52 (mode);
29740 /* Temporary for holding the result, initialized to the input
29741 operand to ease control flow. */
29742 res = gen_reg_rtx (mode);
29743 emit_move_insn (res, operand1);
29745 /* xa = abs (operand1) */
29746 xa = ix86_expand_sse_fabs (res, &mask);
29748 /* if (!isless (xa, TWO52)) goto label; */
29749 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29751 /* xa = (double)(long)x */
29752 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29753 expand_fix (xi, res, 0);
29754 expand_float (xa, xi, 0);
29756 /* generate 1.0 */
29757 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29759 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29760 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29761 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29762 gen_rtx_AND (mode, one, tmp)));
29763 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29764 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29765 emit_move_insn (res, tmp);
29767 if (HONOR_SIGNED_ZEROS (mode))
29768 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29770 emit_label (label);
29771 LABEL_NUSES (label) = 1;
29773 emit_move_insn (operand0, res);
29776 /* Expand SSE sequence for computing round from OPERAND1 storing
29777 into OPERAND0. Sequence that works without relying on DImode truncation
29778 via cvttsd2siq that is only available on 64bit targets. */
29779 void
29780 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29782 /* C code for the stuff we expand below.
29783 double xa = fabs (x), xa2, x2;
29784 if (!isless (xa, TWO52))
29785 return x;
29786 Using the absolute value and copying back sign makes
29787 -0.0 -> -0.0 correct.
29788 xa2 = xa + TWO52 - TWO52;
29789 Compensate.
29790 dxa = xa2 - xa;
29791 if (dxa <= -0.5)
29792 xa2 += 1;
29793 else if (dxa > 0.5)
29794 xa2 -= 1;
29795 x2 = copysign (xa2, x);
29796 return x2;
29798 enum machine_mode mode = GET_MODE (operand0);
29799 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29801 TWO52 = ix86_gen_TWO52 (mode);
29803 /* Temporary for holding the result, initialized to the input
29804 operand to ease control flow. */
29805 res = gen_reg_rtx (mode);
29806 emit_move_insn (res, operand1);
29808 /* xa = abs (operand1) */
29809 xa = ix86_expand_sse_fabs (res, &mask);
29811 /* if (!isless (xa, TWO52)) goto label; */
29812 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29814 /* xa2 = xa + TWO52 - TWO52; */
29815 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29816 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29818 /* dxa = xa2 - xa; */
29819 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29821 /* generate 0.5, 1.0 and -0.5 */
29822 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29823 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29824 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29825 0, OPTAB_DIRECT);
29827 /* Compensate. */
29828 tmp = gen_reg_rtx (mode);
29829 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29830 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29831 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29832 gen_rtx_AND (mode, one, tmp)));
29833 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29834 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29835 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29836 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29837 gen_rtx_AND (mode, one, tmp)));
29838 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29840 /* res = copysign (xa2, operand1) */
29841 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29843 emit_label (label);
29844 LABEL_NUSES (label) = 1;
29846 emit_move_insn (operand0, res);
29849 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29850 into OPERAND0. */
29851 void
29852 ix86_expand_trunc (rtx operand0, rtx operand1)
29854 /* C code for SSE variant we expand below.
29855 double xa = fabs (x), x2;
29856 if (!isless (xa, TWO52))
29857 return x;
29858 x2 = (double)(long)x;
29859 if (HONOR_SIGNED_ZEROS (mode))
29860 return copysign (x2, x);
29861 return x2;
29863 enum machine_mode mode = GET_MODE (operand0);
29864 rtx xa, xi, TWO52, label, res, mask;
29866 TWO52 = ix86_gen_TWO52 (mode);
29868 /* Temporary for holding the result, initialized to the input
29869 operand to ease control flow. */
29870 res = gen_reg_rtx (mode);
29871 emit_move_insn (res, operand1);
29873 /* xa = abs (operand1) */
29874 xa = ix86_expand_sse_fabs (res, &mask);
29876 /* if (!isless (xa, TWO52)) goto label; */
29877 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29879 /* x = (double)(long)x */
29880 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29881 expand_fix (xi, res, 0);
29882 expand_float (res, xi, 0);
29884 if (HONOR_SIGNED_ZEROS (mode))
29885 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29887 emit_label (label);
29888 LABEL_NUSES (label) = 1;
29890 emit_move_insn (operand0, res);
29893 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29894 into OPERAND0. */
29895 void
29896 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29898 enum machine_mode mode = GET_MODE (operand0);
29899 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29901 /* C code for SSE variant we expand below.
29902 double xa = fabs (x), x2;
29903 if (!isless (xa, TWO52))
29904 return x;
29905 xa2 = xa + TWO52 - TWO52;
29906 Compensate:
29907 if (xa2 > xa)
29908 xa2 -= 1.0;
29909 x2 = copysign (xa2, x);
29910 return x2;
29913 TWO52 = ix86_gen_TWO52 (mode);
29915 /* Temporary for holding the result, initialized to the input
29916 operand to ease control flow. */
29917 res = gen_reg_rtx (mode);
29918 emit_move_insn (res, operand1);
29920 /* xa = abs (operand1) */
29921 xa = ix86_expand_sse_fabs (res, &smask);
29923 /* if (!isless (xa, TWO52)) goto label; */
29924 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29926 /* res = xa + TWO52 - TWO52; */
29927 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29928 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29929 emit_move_insn (res, tmp);
29931 /* generate 1.0 */
29932 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29934 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29935 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29936 emit_insn (gen_rtx_SET (VOIDmode, mask,
29937 gen_rtx_AND (mode, mask, one)));
29938 tmp = expand_simple_binop (mode, MINUS,
29939 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29940 emit_move_insn (res, tmp);
29942 /* res = copysign (res, operand1) */
29943 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29945 emit_label (label);
29946 LABEL_NUSES (label) = 1;
29948 emit_move_insn (operand0, res);
29951 /* Expand SSE sequence for computing round from OPERAND1 storing
29952 into OPERAND0. */
29953 void
29954 ix86_expand_round (rtx operand0, rtx operand1)
29956 /* C code for the stuff we're doing below:
29957 double xa = fabs (x);
29958 if (!isless (xa, TWO52))
29959 return x;
29960 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29961 return copysign (xa, x);
29963 enum machine_mode mode = GET_MODE (operand0);
29964 rtx res, TWO52, xa, label, xi, half, mask;
29965 const struct real_format *fmt;
29966 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29968 /* Temporary for holding the result, initialized to the input
29969 operand to ease control flow. */
29970 res = gen_reg_rtx (mode);
29971 emit_move_insn (res, operand1);
29973 TWO52 = ix86_gen_TWO52 (mode);
29974 xa = ix86_expand_sse_fabs (res, &mask);
29975 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29977 /* load nextafter (0.5, 0.0) */
29978 fmt = REAL_MODE_FORMAT (mode);
29979 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29980 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29982 /* xa = xa + 0.5 */
29983 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29984 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29986 /* xa = (double)(int64_t)xa */
29987 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29988 expand_fix (xi, xa, 0);
29989 expand_float (xa, xi, 0);
29991 /* res = copysign (xa, operand1) */
29992 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29994 emit_label (label);
29995 LABEL_NUSES (label) = 1;
29997 emit_move_insn (operand0, res);
30001 /* Validate whether a SSE5 instruction is valid or not.
30002 OPERANDS is the array of operands.
30003 NUM is the number of operands.
30004 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
30005 NUM_MEMORY is the maximum number of memory operands to accept.
30006 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
30008 bool
30009 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
30010 bool uses_oc0, int num_memory, bool commutative)
30012 int mem_mask;
30013 int mem_count;
30014 int i;
30016 /* Count the number of memory arguments */
30017 mem_mask = 0;
30018 mem_count = 0;
30019 for (i = 0; i < num; i++)
30021 enum machine_mode mode = GET_MODE (operands[i]);
30022 if (register_operand (operands[i], mode))
30025 else if (memory_operand (operands[i], mode))
30027 mem_mask |= (1 << i);
30028 mem_count++;
30031 else
30033 rtx pattern = PATTERN (insn);
30035 /* allow 0 for pcmov */
30036 if (GET_CODE (pattern) != SET
30037 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
30038 || i < 2
30039 || operands[i] != CONST0_RTX (mode))
30040 return false;
30044 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
30045 a memory operation. */
30046 if (num_memory < 0)
30048 num_memory = -num_memory;
30049 if ((mem_mask & (1 << (num-1))) != 0)
30051 mem_mask &= ~(1 << (num-1));
30052 mem_count--;
30056 /* If there were no memory operations, allow the insn */
30057 if (mem_mask == 0)
30058 return true;
30060 /* Do not allow the destination register to be a memory operand. */
30061 else if (mem_mask & (1 << 0))
30062 return false;
30064 /* If there are too many memory operations, disallow the instruction. While
30065 the hardware only allows 1 memory reference, before register allocation
30066 for some insns, we allow two memory operations sometimes in order to allow
30067 code like the following to be optimized:
30069 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
30071 or similar cases that are vectorized into using the fmaddss
30072 instruction. */
30073 else if (mem_count > num_memory)
30074 return false;
30076 /* Don't allow more than one memory operation if not optimizing. */
30077 else if (mem_count > 1 && !optimize)
30078 return false;
30080 else if (num == 4 && mem_count == 1)
30082 /* formats (destination is the first argument), example fmaddss:
30083 xmm1, xmm1, xmm2, xmm3/mem
30084 xmm1, xmm1, xmm2/mem, xmm3
30085 xmm1, xmm2, xmm3/mem, xmm1
30086 xmm1, xmm2/mem, xmm3, xmm1 */
30087 if (uses_oc0)
30088 return ((mem_mask == (1 << 1))
30089 || (mem_mask == (1 << 2))
30090 || (mem_mask == (1 << 3)));
30092 /* format, example pmacsdd:
30093 xmm1, xmm2, xmm3/mem, xmm1 */
30094 if (commutative)
30095 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
30096 else
30097 return (mem_mask == (1 << 2));
30100 else if (num == 4 && num_memory == 2)
30102 /* If there are two memory operations, we can load one of the memory ops
30103 into the destination register. This is for optimizing the
30104 multiply/add ops, which the combiner has optimized both the multiply
30105 and the add insns to have a memory operation. We have to be careful
30106 that the destination doesn't overlap with the inputs. */
30107 rtx op0 = operands[0];
30109 if (reg_mentioned_p (op0, operands[1])
30110 || reg_mentioned_p (op0, operands[2])
30111 || reg_mentioned_p (op0, operands[3]))
30112 return false;
30114 /* formats (destination is the first argument), example fmaddss:
30115 xmm1, xmm1, xmm2, xmm3/mem
30116 xmm1, xmm1, xmm2/mem, xmm3
30117 xmm1, xmm2, xmm3/mem, xmm1
30118 xmm1, xmm2/mem, xmm3, xmm1
30120 For the oc0 case, we will load either operands[1] or operands[3] into
30121 operands[0], so any combination of 2 memory operands is ok. */
30122 if (uses_oc0)
30123 return true;
30125 /* format, example pmacsdd:
30126 xmm1, xmm2, xmm3/mem, xmm1
30128 For the integer multiply/add instructions be more restrictive and
30129 require operands[2] and operands[3] to be the memory operands. */
30130 if (commutative)
30131 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
30132 else
30133 return (mem_mask == ((1 << 2) | (1 << 3)));
30136 else if (num == 3 && num_memory == 1)
30138 /* formats, example protb:
30139 xmm1, xmm2, xmm3/mem
30140 xmm1, xmm2/mem, xmm3 */
30141 if (uses_oc0)
30142 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
30144 /* format, example comeq:
30145 xmm1, xmm2, xmm3/mem */
30146 else
30147 return (mem_mask == (1 << 2));
30150 else
30151 gcc_unreachable ();
30153 return false;
30157 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
30158 hardware will allow by using the destination register to load one of the
30159 memory operations. Presently this is used by the multiply/add routines to
30160 allow 2 memory references. */
30162 void
30163 ix86_expand_sse5_multiple_memory (rtx operands[],
30164 int num,
30165 enum machine_mode mode)
30167 rtx op0 = operands[0];
30168 if (num != 4
30169 || memory_operand (op0, mode)
30170 || reg_mentioned_p (op0, operands[1])
30171 || reg_mentioned_p (op0, operands[2])
30172 || reg_mentioned_p (op0, operands[3]))
30173 gcc_unreachable ();
30175 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
30176 the destination register. */
30177 if (memory_operand (operands[1], mode))
30179 emit_move_insn (op0, operands[1]);
30180 operands[1] = op0;
30182 else if (memory_operand (operands[3], mode))
30184 emit_move_insn (op0, operands[3]);
30185 operands[3] = op0;
30187 else
30188 gcc_unreachable ();
30190 return;
30194 /* Table of valid machine attributes. */
30195 static const struct attribute_spec ix86_attribute_table[] =
30197 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30198 /* Stdcall attribute says callee is responsible for popping arguments
30199 if they are not variable. */
30200 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30201 /* Fastcall attribute says callee is responsible for popping arguments
30202 if they are not variable. */
30203 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30204 /* Cdecl attribute says the callee is a normal C declaration */
30205 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30206 /* Regparm attribute specifies how many integer arguments are to be
30207 passed in registers. */
30208 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30209 /* Sseregparm attribute says we are using x86_64 calling conventions
30210 for FP arguments. */
30211 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30212 /* force_align_arg_pointer says this function realigns the stack at entry. */
30213 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30214 false, true, true, ix86_handle_cconv_attribute },
30215 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30216 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30217 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30218 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30219 #endif
30220 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30221 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30222 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30223 SUBTARGET_ATTRIBUTE_TABLE,
30224 #endif
30225 /* ms_abi and sysv_abi calling convention function attributes. */
30226 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30227 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30228 /* End element. */
30229 { NULL, 0, 0, false, false, false, NULL }
30232 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30233 static int
30234 x86_builtin_vectorization_cost (bool runtime_test)
30236 /* If the branch of the runtime test is taken - i.e. - the vectorized
30237 version is skipped - this incurs a misprediction cost (because the
30238 vectorized version is expected to be the fall-through). So we subtract
30239 the latency of a mispredicted branch from the costs that are incured
30240 when the vectorized version is executed.
30242 TODO: The values in individual target tables have to be tuned or new
30243 fields may be needed. For eg. on K8, the default branch path is the
30244 not-taken path. If the taken path is predicted correctly, the minimum
30245 penalty of going down the taken-path is 1 cycle. If the taken-path is
30246 not predicted correctly, then the minimum penalty is 10 cycles. */
30248 if (runtime_test)
30250 return (-(ix86_cost->cond_taken_branch_cost));
30252 else
30253 return 0;
30256 /* This function returns the calling abi specific va_list type node.
30257 It returns the FNDECL specific va_list type. */
30259 tree
30260 ix86_fn_abi_va_list (tree fndecl)
30262 if (!TARGET_64BIT)
30263 return va_list_type_node;
30264 gcc_assert (fndecl != NULL_TREE);
30266 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30267 return ms_va_list_type_node;
30268 else
30269 return sysv_va_list_type_node;
30272 /* Returns the canonical va_list type specified by TYPE. If there
30273 is no valid TYPE provided, it return NULL_TREE. */
30275 tree
30276 ix86_canonical_va_list_type (tree type)
30278 tree wtype, htype;
30280 /* Resolve references and pointers to va_list type. */
30281 if (INDIRECT_REF_P (type))
30282 type = TREE_TYPE (type);
30283 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30284 type = TREE_TYPE (type);
30286 if (TARGET_64BIT)
30288 wtype = va_list_type_node;
30289 gcc_assert (wtype != NULL_TREE);
30290 htype = type;
30291 if (TREE_CODE (wtype) == ARRAY_TYPE)
30293 /* If va_list is an array type, the argument may have decayed
30294 to a pointer type, e.g. by being passed to another function.
30295 In that case, unwrap both types so that we can compare the
30296 underlying records. */
30297 if (TREE_CODE (htype) == ARRAY_TYPE
30298 || POINTER_TYPE_P (htype))
30300 wtype = TREE_TYPE (wtype);
30301 htype = TREE_TYPE (htype);
30304 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30305 return va_list_type_node;
30306 wtype = sysv_va_list_type_node;
30307 gcc_assert (wtype != NULL_TREE);
30308 htype = type;
30309 if (TREE_CODE (wtype) == ARRAY_TYPE)
30311 /* If va_list is an array type, the argument may have decayed
30312 to a pointer type, e.g. by being passed to another function.
30313 In that case, unwrap both types so that we can compare the
30314 underlying records. */
30315 if (TREE_CODE (htype) == ARRAY_TYPE
30316 || POINTER_TYPE_P (htype))
30318 wtype = TREE_TYPE (wtype);
30319 htype = TREE_TYPE (htype);
30322 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30323 return sysv_va_list_type_node;
30324 wtype = ms_va_list_type_node;
30325 gcc_assert (wtype != NULL_TREE);
30326 htype = type;
30327 if (TREE_CODE (wtype) == ARRAY_TYPE)
30329 /* If va_list is an array type, the argument may have decayed
30330 to a pointer type, e.g. by being passed to another function.
30331 In that case, unwrap both types so that we can compare the
30332 underlying records. */
30333 if (TREE_CODE (htype) == ARRAY_TYPE
30334 || POINTER_TYPE_P (htype))
30336 wtype = TREE_TYPE (wtype);
30337 htype = TREE_TYPE (htype);
30340 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30341 return ms_va_list_type_node;
30342 return NULL_TREE;
30344 return std_canonical_va_list_type (type);
30347 /* Iterate through the target-specific builtin types for va_list.
30348 IDX denotes the iterator, *PTREE is set to the result type of
30349 the va_list builtin, and *PNAME to its internal type.
30350 Returns zero if there is no element for this index, otherwise
30351 IDX should be increased upon the next call.
30352 Note, do not iterate a base builtin's name like __builtin_va_list.
30353 Used from c_common_nodes_and_builtins. */
30356 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30358 if (!TARGET_64BIT)
30359 return 0;
30360 switch (idx) {
30361 case 0:
30362 *ptree = ms_va_list_type_node;
30363 *pname = "__builtin_ms_va_list";
30364 break;
30365 case 1:
30366 *ptree = sysv_va_list_type_node;
30367 *pname = "__builtin_sysv_va_list";
30368 break;
30369 default:
30370 return 0;
30372 return 1;
30375 /* Initialize the GCC target structure. */
30376 #undef TARGET_RETURN_IN_MEMORY
30377 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30379 #undef TARGET_LEGITIMIZE_ADDRESS
30380 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30382 #undef TARGET_ATTRIBUTE_TABLE
30383 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30384 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30385 # undef TARGET_MERGE_DECL_ATTRIBUTES
30386 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30387 #endif
30389 #undef TARGET_COMP_TYPE_ATTRIBUTES
30390 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30392 #undef TARGET_INIT_BUILTINS
30393 #define TARGET_INIT_BUILTINS ix86_init_builtins
30394 #undef TARGET_EXPAND_BUILTIN
30395 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30397 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30398 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30399 ix86_builtin_vectorized_function
30401 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30402 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30404 #undef TARGET_BUILTIN_RECIPROCAL
30405 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30407 #undef TARGET_ASM_FUNCTION_EPILOGUE
30408 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30410 #undef TARGET_ENCODE_SECTION_INFO
30411 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30412 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30413 #else
30414 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30415 #endif
30417 #undef TARGET_ASM_OPEN_PAREN
30418 #define TARGET_ASM_OPEN_PAREN ""
30419 #undef TARGET_ASM_CLOSE_PAREN
30420 #define TARGET_ASM_CLOSE_PAREN ""
30422 #undef TARGET_ASM_ALIGNED_HI_OP
30423 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30424 #undef TARGET_ASM_ALIGNED_SI_OP
30425 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30426 #ifdef ASM_QUAD
30427 #undef TARGET_ASM_ALIGNED_DI_OP
30428 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30429 #endif
30431 #undef TARGET_ASM_UNALIGNED_HI_OP
30432 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30433 #undef TARGET_ASM_UNALIGNED_SI_OP
30434 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30435 #undef TARGET_ASM_UNALIGNED_DI_OP
30436 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30438 #undef TARGET_SCHED_ADJUST_COST
30439 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30440 #undef TARGET_SCHED_ISSUE_RATE
30441 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30442 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30443 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30444 ia32_multipass_dfa_lookahead
30446 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30447 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30449 #ifdef HAVE_AS_TLS
30450 #undef TARGET_HAVE_TLS
30451 #define TARGET_HAVE_TLS true
30452 #endif
30453 #undef TARGET_CANNOT_FORCE_CONST_MEM
30454 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30455 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30456 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30458 #undef TARGET_DELEGITIMIZE_ADDRESS
30459 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30461 #undef TARGET_MS_BITFIELD_LAYOUT_P
30462 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30464 #if TARGET_MACHO
30465 #undef TARGET_BINDS_LOCAL_P
30466 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30467 #endif
30468 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30469 #undef TARGET_BINDS_LOCAL_P
30470 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30471 #endif
30473 #undef TARGET_ASM_OUTPUT_MI_THUNK
30474 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30475 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30476 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30478 #undef TARGET_ASM_FILE_START
30479 #define TARGET_ASM_FILE_START x86_file_start
30481 #undef TARGET_DEFAULT_TARGET_FLAGS
30482 #define TARGET_DEFAULT_TARGET_FLAGS \
30483 (TARGET_DEFAULT \
30484 | TARGET_SUBTARGET_DEFAULT \
30485 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30487 #undef TARGET_HANDLE_OPTION
30488 #define TARGET_HANDLE_OPTION ix86_handle_option
30490 #undef TARGET_RTX_COSTS
30491 #define TARGET_RTX_COSTS ix86_rtx_costs
30492 #undef TARGET_ADDRESS_COST
30493 #define TARGET_ADDRESS_COST ix86_address_cost
30495 #undef TARGET_FIXED_CONDITION_CODE_REGS
30496 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30497 #undef TARGET_CC_MODES_COMPATIBLE
30498 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30500 #undef TARGET_MACHINE_DEPENDENT_REORG
30501 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30503 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30504 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30506 #undef TARGET_BUILD_BUILTIN_VA_LIST
30507 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30509 #undef TARGET_FN_ABI_VA_LIST
30510 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30512 #undef TARGET_CANONICAL_VA_LIST_TYPE
30513 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30515 #undef TARGET_EXPAND_BUILTIN_VA_START
30516 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30518 #undef TARGET_MD_ASM_CLOBBERS
30519 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30521 #undef TARGET_PROMOTE_PROTOTYPES
30522 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30523 #undef TARGET_STRUCT_VALUE_RTX
30524 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30525 #undef TARGET_SETUP_INCOMING_VARARGS
30526 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30527 #undef TARGET_MUST_PASS_IN_STACK
30528 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30529 #undef TARGET_PASS_BY_REFERENCE
30530 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30531 #undef TARGET_INTERNAL_ARG_POINTER
30532 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30533 #undef TARGET_UPDATE_STACK_BOUNDARY
30534 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30535 #undef TARGET_GET_DRAP_RTX
30536 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30537 #undef TARGET_STRICT_ARGUMENT_NAMING
30538 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30540 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30541 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30543 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30544 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30546 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30547 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30549 #undef TARGET_C_MODE_FOR_SUFFIX
30550 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30552 #ifdef HAVE_AS_TLS
30553 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30554 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30555 #endif
30557 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30558 #undef TARGET_INSERT_ATTRIBUTES
30559 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30560 #endif
30562 #undef TARGET_MANGLE_TYPE
30563 #define TARGET_MANGLE_TYPE ix86_mangle_type
30565 #undef TARGET_STACK_PROTECT_FAIL
30566 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30568 #undef TARGET_FUNCTION_VALUE
30569 #define TARGET_FUNCTION_VALUE ix86_function_value
30571 #undef TARGET_SECONDARY_RELOAD
30572 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30574 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30575 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30577 #undef TARGET_SET_CURRENT_FUNCTION
30578 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30580 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30581 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30583 #undef TARGET_OPTION_SAVE
30584 #define TARGET_OPTION_SAVE ix86_function_specific_save
30586 #undef TARGET_OPTION_RESTORE
30587 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30589 #undef TARGET_OPTION_PRINT
30590 #define TARGET_OPTION_PRINT ix86_function_specific_print
30592 #undef TARGET_OPTION_CAN_INLINE_P
30593 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30595 #undef TARGET_EXPAND_TO_RTL_HOOK
30596 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30598 #undef TARGET_LEGITIMATE_ADDRESS_P
30599 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30601 #undef TARGET_FRAME_POINTER_REQUIRED
30602 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
30604 struct gcc_target targetm = TARGET_INITIALIZER;
30606 #include "gt-i386.h"