i386.h (enum ix86_fpcmp_strategy): New.
[official-gcc.git] / gcc / config / i386 / i386.c
blob81f3b9eb44b97a204079604e22771b58b212280f
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55 #include "cselib.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
62 #endif
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
70 : 4)
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78 const
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
97 0, /* "large" insn */
98 2, /* MOVE_RATIO */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 2, /* Branch cost */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
149 static const
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
169 3, /* MOVE_RATIO */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
195 1, /* Branch cost */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
219 static const
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
239 3, /* MOVE_RATIO */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
267 1, /* Branch cost */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
291 static const
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
311 6, /* MOVE_RATIO */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
337 2, /* Branch cost */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
361 static const
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
381 6, /* MOVE_RATIO */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
438 static const
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
458 4, /* MOVE_RATIO */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
485 1, /* Branch cost */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
509 static const
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
529 4, /* MOVE_RATIO */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
558 1, /* Branch cost */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 static const
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
602 9, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
628 5, /* Branch cost */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
655 static const
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
675 9, /* MOVE_RATIO */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
704 time). */
705 100, /* number of parallel prefetches */
706 3, /* Branch cost */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
753 9, /* MOVE_RATIO */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
775 /* On K8
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
778 On AMDFAM10
779 MOVD reg64, xmmreg Double FADD 3
780 1/1 1/1
781 MOVD reg32, xmmreg Double FADD 3
782 1/1 1/1 */
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
790 time). */
791 100, /* number of parallel prefetches */
792 2, /* Branch cost */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
821 static const
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
841 6, /* MOVE_RATIO */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
867 2, /* Branch cost */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
877 {-1, libcall}}},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
892 static const
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
912 17, /* MOVE_RATIO */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
938 1, /* Branch cost */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
949 {-1, libcall}}},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
965 static const
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
985 16, /* MOVE_RATIO */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1038 static const
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1112 static const
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1189 static const
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1304 m_486 | m_PENT,
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1320 ~m_386,
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1339 m_PPRO,
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1351 m_K6,
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1357 m_PENT4,
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1360 m_PPRO,
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1363 ~m_PENT,
1365 /* X86_TUNE_READ_MODIFY */
1366 ~(m_PENT | m_PPRO),
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1385 ~m_PPRO,
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1391 m_PPRO,
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1403 | m_GENERIC,
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1427 | m_AMDFAM10,
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1430 m_AMDFAM10,
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1436 m_ATHLON_K8,
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1439 m_AMD_MULTIPLE,
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1454 ~m_486,
1456 /* X86_TUNE_USE_FFREEP */
1457 m_AMD_MULTIPLE,
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1463 ~(m_AMDFAM10),
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1468 | m_GENERIC,
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1472 | m_GENERIC,
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1488 ~m_K8,
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1491 m_K8 | m_GENERIC64,
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1495 ~(m_386 | m_486),
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1502 machines. */
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1506 than a MOV. */
1507 m_PENT,
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1511 m_PENT,
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1516 m_K6,
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1519 from FP to FP. */
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1524 m_AMDFAM10,
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1529 m_CORE2,
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1533 m_ATOM,
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1546 ~m_386,
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1549 ~(m_386 | m_486),
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1552 ~m_386,
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1555 ~m_386,
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1560 | m_GENERIC;
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1570 epilogue code. */
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1587 /* FP registers */
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1590 /* arg pointer */
1591 NON_Q_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1594 /* SSE registers */
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1596 SSE_REGS, SSE_REGS,
1597 /* MMX registers */
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1599 MMX_REGS, MMX_REGS,
1600 /* REX registers */
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1605 SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1678 numbers.
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1726 unsigned short n;
1727 rtx rtl;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1734 [arguments]
1735 <- ARG_POINTER
1736 saved pc
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1740 [saved regs]
1742 [padding0]
1744 [saved SSE regs]
1746 [padding1] \
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1750 [frame] (
1752 [padding2] /
1754 struct ix86_frame
1756 int padding0;
1757 int nsseregs;
1758 int nregs;
1759 int padding1;
1760 int va_arg_size;
1761 HOST_WIDE_INT frame;
1762 int padding2;
1763 int outgoing_arguments_size;
1764 int red_zone_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1779 /* Asm dialect. */
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1781 /* TLS dialects. */
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1820 command line. */
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1849 tree x86_mfence;
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1860 X86_64_NO_CLASS,
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1863 X86_64_SSE_CLASS,
1864 X86_64_SSESF_CLASS,
1865 X86_64_SSEDF_CLASS,
1866 X86_64_SSEUP_CLASS,
1867 X86_64_X87_CLASS,
1868 X86_64_X87UP_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1870 X86_64_MEMORY_CLASS
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1885 rtx, rtx, int);
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1912 in memory. */
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1915 #endif
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1952 as -msse4.2. */
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1973 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1975 /* Define a set of ISAs which aren't available when a given ISA is
1976 disabled. MMX and SSE ISAs are handled separately. */
1978 #define OPTION_MASK_ISA_MMX_UNSET \
1979 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_UNSET \
1981 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1982 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1984 #define OPTION_MASK_ISA_SSE_UNSET \
1985 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1986 #define OPTION_MASK_ISA_SSE2_UNSET \
1987 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1988 #define OPTION_MASK_ISA_SSE3_UNSET \
1989 (OPTION_MASK_ISA_SSE3 \
1990 | OPTION_MASK_ISA_SSSE3_UNSET \
1991 | OPTION_MASK_ISA_SSE4A_UNSET )
1992 #define OPTION_MASK_ISA_SSSE3_UNSET \
1993 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1995 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1996 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1997 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1998 #define OPTION_MASK_ISA_AVX_UNSET \
1999 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
2000 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2002 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2003 as -mno-sse4.1. */
2004 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2006 #define OPTION_MASK_ISA_SSE4A_UNSET \
2007 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2008 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2009 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2010 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2011 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2012 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2013 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2014 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2015 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2018 /* Vectorization library interface and handlers. */
2019 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2020 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2021 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2023 /* Processor target table, indexed by processor number */
2024 struct ptt
2026 const struct processor_costs *cost; /* Processor costs */
2027 const int align_loop; /* Default alignments. */
2028 const int align_loop_max_skip;
2029 const int align_jump;
2030 const int align_jump_max_skip;
2031 const int align_func;
2034 static const struct ptt processor_target_table[PROCESSOR_max] =
2036 {&i386_cost, 4, 3, 4, 3, 4},
2037 {&i486_cost, 16, 15, 16, 15, 16},
2038 {&pentium_cost, 16, 7, 16, 7, 16},
2039 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2040 {&geode_cost, 0, 0, 0, 0, 0},
2041 {&k6_cost, 32, 7, 32, 7, 32},
2042 {&athlon_cost, 16, 7, 16, 7, 16},
2043 {&pentium4_cost, 0, 0, 0, 0, 0},
2044 {&k8_cost, 16, 7, 16, 7, 16},
2045 {&nocona_cost, 0, 0, 0, 0, 0},
2046 {&core2_cost, 16, 10, 16, 10, 16},
2047 {&generic32_cost, 16, 7, 16, 7, 16},
2048 {&generic64_cost, 16, 10, 16, 10, 16},
2049 {&amdfam10_cost, 32, 24, 32, 7, 32},
2050 {&atom_cost, 16, 7, 16, 7, 16}
2053 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2055 "generic",
2056 "i386",
2057 "i486",
2058 "pentium",
2059 "pentium-mmx",
2060 "pentiumpro",
2061 "pentium2",
2062 "pentium3",
2063 "pentium4",
2064 "pentium-m",
2065 "prescott",
2066 "nocona",
2067 "core2",
2068 "atom",
2069 "geode",
2070 "k6",
2071 "k6-2",
2072 "k6-3",
2073 "athlon",
2074 "athlon-4",
2075 "k8",
2076 "amdfam10"
2079 /* Implement TARGET_HANDLE_OPTION. */
2081 static bool
2082 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2084 switch (code)
2086 case OPT_mmmx:
2087 if (value)
2089 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2090 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2092 else
2094 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2095 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2097 return true;
2099 case OPT_m3dnow:
2100 if (value)
2102 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2103 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2105 else
2107 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2108 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2110 return true;
2112 case OPT_m3dnowa:
2113 return false;
2115 case OPT_msse:
2116 if (value)
2118 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2121 else
2123 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2124 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2126 return true;
2128 case OPT_msse2:
2129 if (value)
2131 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2132 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2134 else
2136 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2139 return true;
2141 case OPT_msse3:
2142 if (value)
2144 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2145 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2147 else
2149 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2152 return true;
2154 case OPT_mssse3:
2155 if (value)
2157 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2158 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2160 else
2162 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2165 return true;
2167 case OPT_msse4_1:
2168 if (value)
2170 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2171 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2173 else
2175 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2178 return true;
2180 case OPT_msse4_2:
2181 if (value)
2183 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2184 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2186 else
2188 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2191 return true;
2193 case OPT_mavx:
2194 if (value)
2196 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2197 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2199 else
2201 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2202 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2204 return true;
2206 case OPT_mfma:
2207 if (value)
2209 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2210 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2212 else
2214 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2217 return true;
2219 case OPT_msse4:
2220 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2222 return true;
2224 case OPT_mno_sse4:
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2227 return true;
2229 case OPT_msse4a:
2230 if (value)
2232 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2235 else
2237 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2238 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2240 return true;
2242 case OPT_msse5:
2243 if (value)
2245 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2248 else
2250 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2251 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2253 return true;
2255 case OPT_mabm:
2256 if (value)
2258 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2259 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2261 else
2263 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2264 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2266 return true;
2268 case OPT_mpopcnt:
2269 if (value)
2271 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2272 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2274 else
2276 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2277 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2279 return true;
2281 case OPT_msahf:
2282 if (value)
2284 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2285 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2287 else
2289 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2290 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2292 return true;
2294 case OPT_mcx16:
2295 if (value)
2297 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2298 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2300 else
2302 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2303 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2305 return true;
2307 case OPT_mmovbe:
2308 if (value)
2310 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2311 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2313 else
2315 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2316 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2318 return true;
2320 case OPT_mcrc32:
2321 if (value)
2323 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2324 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2326 else
2328 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2329 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2331 return true;
2333 case OPT_maes:
2334 if (value)
2336 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2337 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2339 else
2341 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2342 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2344 return true;
2346 case OPT_mpclmul:
2347 if (value)
2349 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2350 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2352 else
2354 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2357 return true;
2359 default:
2360 return true;
2364 /* Return a string the documents the current -m options. The caller is
2365 responsible for freeing the string. */
2367 static char *
2368 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2369 const char *fpmath, bool add_nl_p)
2371 struct ix86_target_opts
2373 const char *option; /* option string */
2374 int mask; /* isa mask options */
2377 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2378 preceding options while match those first. */
2379 static struct ix86_target_opts isa_opts[] =
2381 { "-m64", OPTION_MASK_ISA_64BIT },
2382 { "-msse5", OPTION_MASK_ISA_SSE5 },
2383 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2384 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2385 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2386 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2387 { "-msse3", OPTION_MASK_ISA_SSE3 },
2388 { "-msse2", OPTION_MASK_ISA_SSE2 },
2389 { "-msse", OPTION_MASK_ISA_SSE },
2390 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2391 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2392 { "-mmmx", OPTION_MASK_ISA_MMX },
2393 { "-mabm", OPTION_MASK_ISA_ABM },
2394 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2395 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2396 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2397 { "-maes", OPTION_MASK_ISA_AES },
2398 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2401 /* Flag options. */
2402 static struct ix86_target_opts flag_opts[] =
2404 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2405 { "-m80387", MASK_80387 },
2406 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2407 { "-malign-double", MASK_ALIGN_DOUBLE },
2408 { "-mcld", MASK_CLD },
2409 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2410 { "-mieee-fp", MASK_IEEE_FP },
2411 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2412 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2413 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2414 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2415 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2416 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2417 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2418 { "-mno-red-zone", MASK_NO_RED_ZONE },
2419 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2420 { "-mrecip", MASK_RECIP },
2421 { "-mrtd", MASK_RTD },
2422 { "-msseregparm", MASK_SSEREGPARM },
2423 { "-mstack-arg-probe", MASK_STACK_PROBE },
2424 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2427 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2429 char isa_other[40];
2430 char target_other[40];
2431 unsigned num = 0;
2432 unsigned i, j;
2433 char *ret;
2434 char *ptr;
2435 size_t len;
2436 size_t line_len;
2437 size_t sep_len;
2439 memset (opts, '\0', sizeof (opts));
2441 /* Add -march= option. */
2442 if (arch)
2444 opts[num][0] = "-march=";
2445 opts[num++][1] = arch;
2448 /* Add -mtune= option. */
2449 if (tune)
2451 opts[num][0] = "-mtune=";
2452 opts[num++][1] = tune;
2455 /* Pick out the options in isa options. */
2456 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2458 if ((isa & isa_opts[i].mask) != 0)
2460 opts[num++][0] = isa_opts[i].option;
2461 isa &= ~ isa_opts[i].mask;
2465 if (isa && add_nl_p)
2467 opts[num++][0] = isa_other;
2468 sprintf (isa_other, "(other isa: 0x%x)", isa);
2471 /* Add flag options. */
2472 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2474 if ((flags & flag_opts[i].mask) != 0)
2476 opts[num++][0] = flag_opts[i].option;
2477 flags &= ~ flag_opts[i].mask;
2481 if (flags && add_nl_p)
2483 opts[num++][0] = target_other;
2484 sprintf (target_other, "(other flags: 0x%x)", isa);
2487 /* Add -fpmath= option. */
2488 if (fpmath)
2490 opts[num][0] = "-mfpmath=";
2491 opts[num++][1] = fpmath;
2494 /* Any options? */
2495 if (num == 0)
2496 return NULL;
2498 gcc_assert (num < ARRAY_SIZE (opts));
2500 /* Size the string. */
2501 len = 0;
2502 sep_len = (add_nl_p) ? 3 : 1;
2503 for (i = 0; i < num; i++)
2505 len += sep_len;
2506 for (j = 0; j < 2; j++)
2507 if (opts[i][j])
2508 len += strlen (opts[i][j]);
2511 /* Build the string. */
2512 ret = ptr = (char *) xmalloc (len);
2513 line_len = 0;
2515 for (i = 0; i < num; i++)
2517 size_t len2[2];
2519 for (j = 0; j < 2; j++)
2520 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2522 if (i != 0)
2524 *ptr++ = ' ';
2525 line_len++;
2527 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2529 *ptr++ = '\\';
2530 *ptr++ = '\n';
2531 line_len = 0;
2535 for (j = 0; j < 2; j++)
2536 if (opts[i][j])
2538 memcpy (ptr, opts[i][j], len2[j]);
2539 ptr += len2[j];
2540 line_len += len2[j];
2544 *ptr = '\0';
2545 gcc_assert (ret + len >= ptr);
2547 return ret;
2550 /* Function that is callable from the debugger to print the current
2551 options. */
2552 void
2553 ix86_debug_options (void)
2555 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2556 ix86_arch_string, ix86_tune_string,
2557 ix86_fpmath_string, true);
2559 if (opts)
2561 fprintf (stderr, "%s\n\n", opts);
2562 free (opts);
2564 else
2565 fprintf (stderr, "<no options>\n\n");
2567 return;
2570 /* Sometimes certain combinations of command options do not make
2571 sense on a particular target machine. You can define a macro
2572 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2573 defined, is executed once just after all the command options have
2574 been parsed.
2576 Don't use this macro to turn on various extra optimizations for
2577 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2579 void
2580 override_options (bool main_args_p)
2582 int i;
2583 unsigned int ix86_arch_mask, ix86_tune_mask;
2584 const char *prefix;
2585 const char *suffix;
2586 const char *sw;
2588 /* Comes from final.c -- no real reason to change it. */
2589 #define MAX_CODE_ALIGN 16
2591 enum pta_flags
2593 PTA_SSE = 1 << 0,
2594 PTA_SSE2 = 1 << 1,
2595 PTA_SSE3 = 1 << 2,
2596 PTA_MMX = 1 << 3,
2597 PTA_PREFETCH_SSE = 1 << 4,
2598 PTA_3DNOW = 1 << 5,
2599 PTA_3DNOW_A = 1 << 6,
2600 PTA_64BIT = 1 << 7,
2601 PTA_SSSE3 = 1 << 8,
2602 PTA_CX16 = 1 << 9,
2603 PTA_POPCNT = 1 << 10,
2604 PTA_ABM = 1 << 11,
2605 PTA_SSE4A = 1 << 12,
2606 PTA_NO_SAHF = 1 << 13,
2607 PTA_SSE4_1 = 1 << 14,
2608 PTA_SSE4_2 = 1 << 15,
2609 PTA_SSE5 = 1 << 16,
2610 PTA_AES = 1 << 17,
2611 PTA_PCLMUL = 1 << 18,
2612 PTA_AVX = 1 << 19,
2613 PTA_FMA = 1 << 20,
2614 PTA_MOVBE = 1 << 21
2617 static struct pta
2619 const char *const name; /* processor name or nickname. */
2620 const enum processor_type processor;
2621 const enum attr_cpu schedule;
2622 const unsigned /*enum pta_flags*/ flags;
2624 const processor_alias_table[] =
2626 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2627 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2628 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2629 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2630 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2631 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2632 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2633 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2634 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2635 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2636 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2637 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2638 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2639 PTA_MMX | PTA_SSE},
2640 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2641 PTA_MMX | PTA_SSE},
2642 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2643 PTA_MMX | PTA_SSE | PTA_SSE2},
2644 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2645 PTA_MMX |PTA_SSE | PTA_SSE2},
2646 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2647 PTA_MMX | PTA_SSE | PTA_SSE2},
2648 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2649 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2650 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2651 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2652 | PTA_CX16 | PTA_NO_SAHF},
2653 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2654 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2655 | PTA_SSSE3 | PTA_CX16},
2656 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2657 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2658 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2659 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2660 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2661 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2662 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2663 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2664 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2665 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2666 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2667 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2668 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2669 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2670 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2671 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2672 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2673 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2674 {"x86-64", PROCESSOR_K8, CPU_K8,
2675 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2676 {"k8", PROCESSOR_K8, CPU_K8,
2677 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2678 | PTA_SSE2 | PTA_NO_SAHF},
2679 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2680 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2681 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2682 {"opteron", PROCESSOR_K8, CPU_K8,
2683 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2684 | PTA_SSE2 | PTA_NO_SAHF},
2685 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2686 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2687 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2688 {"athlon64", PROCESSOR_K8, CPU_K8,
2689 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2690 | PTA_SSE2 | PTA_NO_SAHF},
2691 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2692 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2693 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2694 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2695 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2696 | PTA_SSE2 | PTA_NO_SAHF},
2697 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2698 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2699 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2700 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2701 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2702 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2703 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2704 0 /* flags are only used for -march switch. */ },
2705 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2706 PTA_64BIT /* flags are only used for -march switch. */ },
2709 int const pta_size = ARRAY_SIZE (processor_alias_table);
2711 /* Set up prefix/suffix so the error messages refer to either the command
2712 line argument, or the attribute(target). */
2713 if (main_args_p)
2715 prefix = "-m";
2716 suffix = "";
2717 sw = "switch";
2719 else
2721 prefix = "option(\"";
2722 suffix = "\")";
2723 sw = "attribute";
2726 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2727 SUBTARGET_OVERRIDE_OPTIONS;
2728 #endif
2730 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2731 SUBSUBTARGET_OVERRIDE_OPTIONS;
2732 #endif
2734 /* -fPIC is the default for x86_64. */
2735 if (TARGET_MACHO && TARGET_64BIT)
2736 flag_pic = 2;
2738 /* Set the default values for switches whose default depends on TARGET_64BIT
2739 in case they weren't overwritten by command line options. */
2740 if (TARGET_64BIT)
2742 /* Mach-O doesn't support omitting the frame pointer for now. */
2743 if (flag_omit_frame_pointer == 2)
2744 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2745 if (flag_asynchronous_unwind_tables == 2)
2746 flag_asynchronous_unwind_tables = 1;
2747 if (flag_pcc_struct_return == 2)
2748 flag_pcc_struct_return = 0;
2750 else
2752 if (flag_omit_frame_pointer == 2)
2753 flag_omit_frame_pointer = 0;
2754 if (flag_asynchronous_unwind_tables == 2)
2755 flag_asynchronous_unwind_tables = 0;
2756 if (flag_pcc_struct_return == 2)
2757 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2760 /* Need to check -mtune=generic first. */
2761 if (ix86_tune_string)
2763 if (!strcmp (ix86_tune_string, "generic")
2764 || !strcmp (ix86_tune_string, "i686")
2765 /* As special support for cross compilers we read -mtune=native
2766 as -mtune=generic. With native compilers we won't see the
2767 -mtune=native, as it was changed by the driver. */
2768 || !strcmp (ix86_tune_string, "native"))
2770 if (TARGET_64BIT)
2771 ix86_tune_string = "generic64";
2772 else
2773 ix86_tune_string = "generic32";
2775 /* If this call is for setting the option attribute, allow the
2776 generic32/generic64 that was previously set. */
2777 else if (!main_args_p
2778 && (!strcmp (ix86_tune_string, "generic32")
2779 || !strcmp (ix86_tune_string, "generic64")))
2781 else if (!strncmp (ix86_tune_string, "generic", 7))
2782 error ("bad value (%s) for %stune=%s %s",
2783 ix86_tune_string, prefix, suffix, sw);
2785 else
2787 if (ix86_arch_string)
2788 ix86_tune_string = ix86_arch_string;
2789 if (!ix86_tune_string)
2791 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2792 ix86_tune_defaulted = 1;
2795 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2796 need to use a sensible tune option. */
2797 if (!strcmp (ix86_tune_string, "generic")
2798 || !strcmp (ix86_tune_string, "x86-64")
2799 || !strcmp (ix86_tune_string, "i686"))
2801 if (TARGET_64BIT)
2802 ix86_tune_string = "generic64";
2803 else
2804 ix86_tune_string = "generic32";
2807 if (ix86_stringop_string)
2809 if (!strcmp (ix86_stringop_string, "rep_byte"))
2810 stringop_alg = rep_prefix_1_byte;
2811 else if (!strcmp (ix86_stringop_string, "libcall"))
2812 stringop_alg = libcall;
2813 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2814 stringop_alg = rep_prefix_4_byte;
2815 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2816 && TARGET_64BIT)
2817 /* rep; movq isn't available in 32-bit code. */
2818 stringop_alg = rep_prefix_8_byte;
2819 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2820 stringop_alg = loop_1_byte;
2821 else if (!strcmp (ix86_stringop_string, "loop"))
2822 stringop_alg = loop;
2823 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2824 stringop_alg = unrolled_loop;
2825 else
2826 error ("bad value (%s) for %sstringop-strategy=%s %s",
2827 ix86_stringop_string, prefix, suffix, sw);
2829 if (!strcmp (ix86_tune_string, "x86-64"))
2830 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2831 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2832 prefix, suffix, prefix, suffix, prefix, suffix);
2834 if (!ix86_arch_string)
2835 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2836 else
2837 ix86_arch_specified = 1;
2839 if (!strcmp (ix86_arch_string, "generic"))
2840 error ("generic CPU can be used only for %stune=%s %s",
2841 prefix, suffix, sw);
2842 if (!strncmp (ix86_arch_string, "generic", 7))
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string, prefix, suffix, sw);
2846 /* Validate -mabi= value. */
2847 if (ix86_abi_string)
2849 if (strcmp (ix86_abi_string, "sysv") == 0)
2850 ix86_abi = SYSV_ABI;
2851 else if (strcmp (ix86_abi_string, "ms") == 0)
2852 ix86_abi = MS_ABI;
2853 else
2854 error ("unknown ABI (%s) for %sabi=%s %s",
2855 ix86_abi_string, prefix, suffix, sw);
2857 else
2858 ix86_abi = DEFAULT_ABI;
2860 if (ix86_cmodel_string != 0)
2862 if (!strcmp (ix86_cmodel_string, "small"))
2863 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2864 else if (!strcmp (ix86_cmodel_string, "medium"))
2865 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2866 else if (!strcmp (ix86_cmodel_string, "large"))
2867 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2868 else if (flag_pic)
2869 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2870 else if (!strcmp (ix86_cmodel_string, "32"))
2871 ix86_cmodel = CM_32;
2872 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2873 ix86_cmodel = CM_KERNEL;
2874 else
2875 error ("bad value (%s) for %scmodel=%s %s",
2876 ix86_cmodel_string, prefix, suffix, sw);
2878 else
2880 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2881 use of rip-relative addressing. This eliminates fixups that
2882 would otherwise be needed if this object is to be placed in a
2883 DLL, and is essentially just as efficient as direct addressing. */
2884 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2885 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2886 else if (TARGET_64BIT)
2887 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2888 else
2889 ix86_cmodel = CM_32;
2891 if (ix86_asm_string != 0)
2893 if (! TARGET_MACHO
2894 && !strcmp (ix86_asm_string, "intel"))
2895 ix86_asm_dialect = ASM_INTEL;
2896 else if (!strcmp (ix86_asm_string, "att"))
2897 ix86_asm_dialect = ASM_ATT;
2898 else
2899 error ("bad value (%s) for %sasm=%s %s",
2900 ix86_asm_string, prefix, suffix, sw);
2902 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2903 error ("code model %qs not supported in the %s bit mode",
2904 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2905 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2906 sorry ("%i-bit mode not compiled in",
2907 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2909 for (i = 0; i < pta_size; i++)
2910 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2912 ix86_schedule = processor_alias_table[i].schedule;
2913 ix86_arch = processor_alias_table[i].processor;
2914 /* Default cpu tuning to the architecture. */
2915 ix86_tune = ix86_arch;
2917 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2918 error ("CPU you selected does not support x86-64 "
2919 "instruction set");
2921 if (processor_alias_table[i].flags & PTA_MMX
2922 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2923 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2924 if (processor_alias_table[i].flags & PTA_3DNOW
2925 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2926 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2927 if (processor_alias_table[i].flags & PTA_3DNOW_A
2928 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2929 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2930 if (processor_alias_table[i].flags & PTA_SSE
2931 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2932 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2933 if (processor_alias_table[i].flags & PTA_SSE2
2934 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2935 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2936 if (processor_alias_table[i].flags & PTA_SSE3
2937 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2938 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2939 if (processor_alias_table[i].flags & PTA_SSSE3
2940 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2941 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2942 if (processor_alias_table[i].flags & PTA_SSE4_1
2943 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2944 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2945 if (processor_alias_table[i].flags & PTA_SSE4_2
2946 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2947 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2948 if (processor_alias_table[i].flags & PTA_AVX
2949 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2950 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2951 if (processor_alias_table[i].flags & PTA_FMA
2952 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2953 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2954 if (processor_alias_table[i].flags & PTA_SSE4A
2955 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2956 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2957 if (processor_alias_table[i].flags & PTA_SSE5
2958 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2959 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2960 if (processor_alias_table[i].flags & PTA_ABM
2961 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2962 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2963 if (processor_alias_table[i].flags & PTA_CX16
2964 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2965 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2966 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2967 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2968 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2969 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2970 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2971 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2972 if (processor_alias_table[i].flags & PTA_MOVBE
2973 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
2974 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
2975 if (processor_alias_table[i].flags & PTA_AES
2976 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2977 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2978 if (processor_alias_table[i].flags & PTA_PCLMUL
2979 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2980 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2981 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2982 x86_prefetch_sse = true;
2984 break;
2987 if (i == pta_size)
2988 error ("bad value (%s) for %sarch=%s %s",
2989 ix86_arch_string, prefix, suffix, sw);
2991 ix86_arch_mask = 1u << ix86_arch;
2992 for (i = 0; i < X86_ARCH_LAST; ++i)
2993 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2995 for (i = 0; i < pta_size; i++)
2996 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2998 ix86_schedule = processor_alias_table[i].schedule;
2999 ix86_tune = processor_alias_table[i].processor;
3000 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3002 if (ix86_tune_defaulted)
3004 ix86_tune_string = "x86-64";
3005 for (i = 0; i < pta_size; i++)
3006 if (! strcmp (ix86_tune_string,
3007 processor_alias_table[i].name))
3008 break;
3009 ix86_schedule = processor_alias_table[i].schedule;
3010 ix86_tune = processor_alias_table[i].processor;
3012 else
3013 error ("CPU you selected does not support x86-64 "
3014 "instruction set");
3016 /* Intel CPUs have always interpreted SSE prefetch instructions as
3017 NOPs; so, we can enable SSE prefetch instructions even when
3018 -mtune (rather than -march) points us to a processor that has them.
3019 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3020 higher processors. */
3021 if (TARGET_CMOVE
3022 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3023 x86_prefetch_sse = true;
3024 break;
3026 if (i == pta_size)
3027 error ("bad value (%s) for %stune=%s %s",
3028 ix86_tune_string, prefix, suffix, sw);
3030 ix86_tune_mask = 1u << ix86_tune;
3031 for (i = 0; i < X86_TUNE_LAST; ++i)
3032 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3034 if (optimize_size)
3035 ix86_cost = &ix86_size_cost;
3036 else
3037 ix86_cost = processor_target_table[ix86_tune].cost;
3039 /* Arrange to set up i386_stack_locals for all functions. */
3040 init_machine_status = ix86_init_machine_status;
3042 /* Validate -mregparm= value. */
3043 if (ix86_regparm_string)
3045 if (TARGET_64BIT)
3046 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3047 i = atoi (ix86_regparm_string);
3048 if (i < 0 || i > REGPARM_MAX)
3049 error ("%sregparm=%d%s is not between 0 and %d",
3050 prefix, i, suffix, REGPARM_MAX);
3051 else
3052 ix86_regparm = i;
3054 if (TARGET_64BIT)
3055 ix86_regparm = REGPARM_MAX;
3057 /* If the user has provided any of the -malign-* options,
3058 warn and use that value only if -falign-* is not set.
3059 Remove this code in GCC 3.2 or later. */
3060 if (ix86_align_loops_string)
3062 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3063 prefix, suffix, suffix);
3064 if (align_loops == 0)
3066 i = atoi (ix86_align_loops_string);
3067 if (i < 0 || i > MAX_CODE_ALIGN)
3068 error ("%salign-loops=%d%s is not between 0 and %d",
3069 prefix, i, suffix, MAX_CODE_ALIGN);
3070 else
3071 align_loops = 1 << i;
3075 if (ix86_align_jumps_string)
3077 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3078 prefix, suffix, suffix);
3079 if (align_jumps == 0)
3081 i = atoi (ix86_align_jumps_string);
3082 if (i < 0 || i > MAX_CODE_ALIGN)
3083 error ("%salign-loops=%d%s is not between 0 and %d",
3084 prefix, i, suffix, MAX_CODE_ALIGN);
3085 else
3086 align_jumps = 1 << i;
3090 if (ix86_align_funcs_string)
3092 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3093 prefix, suffix, suffix);
3094 if (align_functions == 0)
3096 i = atoi (ix86_align_funcs_string);
3097 if (i < 0 || i > MAX_CODE_ALIGN)
3098 error ("%salign-loops=%d%s is not between 0 and %d",
3099 prefix, i, suffix, MAX_CODE_ALIGN);
3100 else
3101 align_functions = 1 << i;
3105 /* Default align_* from the processor table. */
3106 if (align_loops == 0)
3108 align_loops = processor_target_table[ix86_tune].align_loop;
3109 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3111 if (align_jumps == 0)
3113 align_jumps = processor_target_table[ix86_tune].align_jump;
3114 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3116 if (align_functions == 0)
3118 align_functions = processor_target_table[ix86_tune].align_func;
3121 /* Validate -mbranch-cost= value, or provide default. */
3122 ix86_branch_cost = ix86_cost->branch_cost;
3123 if (ix86_branch_cost_string)
3125 i = atoi (ix86_branch_cost_string);
3126 if (i < 0 || i > 5)
3127 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3128 else
3129 ix86_branch_cost = i;
3131 if (ix86_section_threshold_string)
3133 i = atoi (ix86_section_threshold_string);
3134 if (i < 0)
3135 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3136 else
3137 ix86_section_threshold = i;
3140 if (ix86_tls_dialect_string)
3142 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3143 ix86_tls_dialect = TLS_DIALECT_GNU;
3144 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3145 ix86_tls_dialect = TLS_DIALECT_GNU2;
3146 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3147 ix86_tls_dialect = TLS_DIALECT_SUN;
3148 else
3149 error ("bad value (%s) for %stls-dialect=%s %s",
3150 ix86_tls_dialect_string, prefix, suffix, sw);
3153 if (ix87_precision_string)
3155 i = atoi (ix87_precision_string);
3156 if (i != 32 && i != 64 && i != 80)
3157 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3160 if (TARGET_64BIT)
3162 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3164 /* Enable by default the SSE and MMX builtins. Do allow the user to
3165 explicitly disable any of these. In particular, disabling SSE and
3166 MMX for kernel code is extremely useful. */
3167 if (!ix86_arch_specified)
3168 ix86_isa_flags
3169 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3170 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3172 if (TARGET_RTD)
3173 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3175 else
3177 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3179 if (!ix86_arch_specified)
3180 ix86_isa_flags
3181 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3183 /* i386 ABI does not specify red zone. It still makes sense to use it
3184 when programmer takes care to stack from being destroyed. */
3185 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3186 target_flags |= MASK_NO_RED_ZONE;
3189 /* Keep nonleaf frame pointers. */
3190 if (flag_omit_frame_pointer)
3191 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3192 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3193 flag_omit_frame_pointer = 1;
3195 /* If we're doing fast math, we don't care about comparison order
3196 wrt NaNs. This lets us use a shorter comparison sequence. */
3197 if (flag_finite_math_only)
3198 target_flags &= ~MASK_IEEE_FP;
3200 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3201 since the insns won't need emulation. */
3202 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3203 target_flags &= ~MASK_NO_FANCY_MATH_387;
3205 /* Likewise, if the target doesn't have a 387, or we've specified
3206 software floating point, don't use 387 inline intrinsics. */
3207 if (!TARGET_80387)
3208 target_flags |= MASK_NO_FANCY_MATH_387;
3210 /* Turn on MMX builtins for -msse. */
3211 if (TARGET_SSE)
3213 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3214 x86_prefetch_sse = true;
3217 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3218 if (TARGET_SSE4_2 || TARGET_ABM)
3219 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3221 /* Validate -mpreferred-stack-boundary= value or default it to
3222 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3223 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3224 if (ix86_preferred_stack_boundary_string)
3226 i = atoi (ix86_preferred_stack_boundary_string);
3227 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3228 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3229 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3230 else
3231 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3234 /* Set the default value for -mstackrealign. */
3235 if (ix86_force_align_arg_pointer == -1)
3236 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3238 /* Validate -mincoming-stack-boundary= value or default it to
3239 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3240 if (ix86_force_align_arg_pointer)
3241 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3242 else
3243 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3244 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3245 if (ix86_incoming_stack_boundary_string)
3247 i = atoi (ix86_incoming_stack_boundary_string);
3248 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3249 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3250 i, TARGET_64BIT ? 4 : 2);
3251 else
3253 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3254 ix86_incoming_stack_boundary
3255 = ix86_user_incoming_stack_boundary;
3259 /* Accept -msseregparm only if at least SSE support is enabled. */
3260 if (TARGET_SSEREGPARM
3261 && ! TARGET_SSE)
3262 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3264 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3265 if (ix86_fpmath_string != 0)
3267 if (! strcmp (ix86_fpmath_string, "387"))
3268 ix86_fpmath = FPMATH_387;
3269 else if (! strcmp (ix86_fpmath_string, "sse"))
3271 if (!TARGET_SSE)
3273 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3274 ix86_fpmath = FPMATH_387;
3276 else
3277 ix86_fpmath = FPMATH_SSE;
3279 else if (! strcmp (ix86_fpmath_string, "387,sse")
3280 || ! strcmp (ix86_fpmath_string, "387+sse")
3281 || ! strcmp (ix86_fpmath_string, "sse,387")
3282 || ! strcmp (ix86_fpmath_string, "sse+387")
3283 || ! strcmp (ix86_fpmath_string, "both"))
3285 if (!TARGET_SSE)
3287 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3288 ix86_fpmath = FPMATH_387;
3290 else if (!TARGET_80387)
3292 warning (0, "387 instruction set disabled, using SSE arithmetics");
3293 ix86_fpmath = FPMATH_SSE;
3295 else
3296 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3298 else
3299 error ("bad value (%s) for %sfpmath=%s %s",
3300 ix86_fpmath_string, prefix, suffix, sw);
3303 /* If the i387 is disabled, then do not return values in it. */
3304 if (!TARGET_80387)
3305 target_flags &= ~MASK_FLOAT_RETURNS;
3307 /* Use external vectorized library in vectorizing intrinsics. */
3308 if (ix86_veclibabi_string)
3310 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3311 ix86_veclib_handler = ix86_veclibabi_svml;
3312 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3313 ix86_veclib_handler = ix86_veclibabi_acml;
3314 else
3315 error ("unknown vectorization library ABI type (%s) for "
3316 "%sveclibabi=%s %s", ix86_veclibabi_string,
3317 prefix, suffix, sw);
3320 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3321 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3322 && !optimize_size)
3323 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3325 /* ??? Unwind info is not correct around the CFG unless either a frame
3326 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3327 unwind info generation to be aware of the CFG and propagating states
3328 around edges. */
3329 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3330 || flag_exceptions || flag_non_call_exceptions)
3331 && flag_omit_frame_pointer
3332 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3334 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3335 warning (0, "unwind tables currently require either a frame pointer "
3336 "or %saccumulate-outgoing-args%s for correctness",
3337 prefix, suffix);
3338 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3341 /* If stack probes are required, the space used for large function
3342 arguments on the stack must also be probed, so enable
3343 -maccumulate-outgoing-args so this happens in the prologue. */
3344 if (TARGET_STACK_PROBE
3345 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3347 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3348 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3349 "for correctness", prefix, suffix);
3350 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3353 /* For sane SSE instruction set generation we need fcomi instruction.
3354 It is safe to enable all CMOVE instructions. */
3355 if (TARGET_SSE)
3356 TARGET_CMOVE = 1;
3358 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3360 char *p;
3361 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3362 p = strchr (internal_label_prefix, 'X');
3363 internal_label_prefix_len = p - internal_label_prefix;
3364 *p = '\0';
3367 /* When scheduling description is not available, disable scheduler pass
3368 so it won't slow down the compilation and make x87 code slower. */
3369 if (!TARGET_SCHEDULE)
3370 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3372 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3373 set_param_value ("simultaneous-prefetches",
3374 ix86_cost->simultaneous_prefetches);
3375 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3376 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3377 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3378 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3379 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3380 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3382 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3383 can be optimized to ap = __builtin_next_arg (0). */
3384 if (!TARGET_64BIT)
3385 targetm.expand_builtin_va_start = NULL;
3387 if (TARGET_64BIT)
3389 ix86_gen_leave = gen_leave_rex64;
3390 ix86_gen_pop1 = gen_popdi1;
3391 ix86_gen_add3 = gen_adddi3;
3392 ix86_gen_sub3 = gen_subdi3;
3393 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3394 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3395 ix86_gen_monitor = gen_sse3_monitor64;
3396 ix86_gen_andsp = gen_anddi3;
3398 else
3400 ix86_gen_leave = gen_leave;
3401 ix86_gen_pop1 = gen_popsi1;
3402 ix86_gen_add3 = gen_addsi3;
3403 ix86_gen_sub3 = gen_subsi3;
3404 ix86_gen_sub3_carry = gen_subsi3_carry;
3405 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3406 ix86_gen_monitor = gen_sse3_monitor;
3407 ix86_gen_andsp = gen_andsi3;
3410 #ifdef USE_IX86_CLD
3411 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3412 if (!TARGET_64BIT)
3413 target_flags |= MASK_CLD & ~target_flags_explicit;
3414 #endif
3416 /* Save the initial options in case the user does function specific options */
3417 if (main_args_p)
3418 target_option_default_node = target_option_current_node
3419 = build_target_option_node ();
3422 /* Save the current options */
3424 static void
3425 ix86_function_specific_save (struct cl_target_option *ptr)
3427 ptr->arch = ix86_arch;
3428 ptr->schedule = ix86_schedule;
3429 ptr->tune = ix86_tune;
3430 ptr->fpmath = ix86_fpmath;
3431 ptr->branch_cost = ix86_branch_cost;
3432 ptr->tune_defaulted = ix86_tune_defaulted;
3433 ptr->arch_specified = ix86_arch_specified;
3434 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3435 ptr->target_flags_explicit = target_flags_explicit;
3437 /* The fields are char but the variables are not; make sure the
3438 values fit in the fields. */
3439 gcc_assert (ptr->arch == ix86_arch);
3440 gcc_assert (ptr->schedule == ix86_schedule);
3441 gcc_assert (ptr->tune == ix86_tune);
3442 gcc_assert (ptr->fpmath == ix86_fpmath);
3443 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3446 /* Restore the current options */
3448 static void
3449 ix86_function_specific_restore (struct cl_target_option *ptr)
3451 enum processor_type old_tune = ix86_tune;
3452 enum processor_type old_arch = ix86_arch;
3453 unsigned int ix86_arch_mask, ix86_tune_mask;
3454 int i;
3456 ix86_arch = (enum processor_type) ptr->arch;
3457 ix86_schedule = (enum attr_cpu) ptr->schedule;
3458 ix86_tune = (enum processor_type) ptr->tune;
3459 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3460 ix86_branch_cost = ptr->branch_cost;
3461 ix86_tune_defaulted = ptr->tune_defaulted;
3462 ix86_arch_specified = ptr->arch_specified;
3463 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3464 target_flags_explicit = ptr->target_flags_explicit;
3466 /* Recreate the arch feature tests if the arch changed */
3467 if (old_arch != ix86_arch)
3469 ix86_arch_mask = 1u << ix86_arch;
3470 for (i = 0; i < X86_ARCH_LAST; ++i)
3471 ix86_arch_features[i]
3472 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3475 /* Recreate the tune optimization tests */
3476 if (old_tune != ix86_tune)
3478 ix86_tune_mask = 1u << ix86_tune;
3479 for (i = 0; i < X86_TUNE_LAST; ++i)
3480 ix86_tune_features[i]
3481 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3485 /* Print the current options */
3487 static void
3488 ix86_function_specific_print (FILE *file, int indent,
3489 struct cl_target_option *ptr)
3491 char *target_string
3492 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3493 NULL, NULL, NULL, false);
3495 fprintf (file, "%*sarch = %d (%s)\n",
3496 indent, "",
3497 ptr->arch,
3498 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3499 ? cpu_names[ptr->arch]
3500 : "<unknown>"));
3502 fprintf (file, "%*stune = %d (%s)\n",
3503 indent, "",
3504 ptr->tune,
3505 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3506 ? cpu_names[ptr->tune]
3507 : "<unknown>"));
3509 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3510 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3511 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3512 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3514 if (target_string)
3516 fprintf (file, "%*s%s\n", indent, "", target_string);
3517 free (target_string);
3522 /* Inner function to process the attribute((target(...))), take an argument and
3523 set the current options from the argument. If we have a list, recursively go
3524 over the list. */
3526 static bool
3527 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3529 char *next_optstr;
3530 bool ret = true;
3532 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3533 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3534 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3535 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3537 enum ix86_opt_type
3539 ix86_opt_unknown,
3540 ix86_opt_yes,
3541 ix86_opt_no,
3542 ix86_opt_str,
3543 ix86_opt_isa
3546 static const struct
3548 const char *string;
3549 size_t len;
3550 enum ix86_opt_type type;
3551 int opt;
3552 int mask;
3553 } attrs[] = {
3554 /* isa options */
3555 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3556 IX86_ATTR_ISA ("abm", OPT_mabm),
3557 IX86_ATTR_ISA ("aes", OPT_maes),
3558 IX86_ATTR_ISA ("avx", OPT_mavx),
3559 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3560 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3561 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3562 IX86_ATTR_ISA ("sse", OPT_msse),
3563 IX86_ATTR_ISA ("sse2", OPT_msse2),
3564 IX86_ATTR_ISA ("sse3", OPT_msse3),
3565 IX86_ATTR_ISA ("sse4", OPT_msse4),
3566 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3567 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3568 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3569 IX86_ATTR_ISA ("sse5", OPT_msse5),
3570 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3572 /* string options */
3573 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3574 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3575 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3577 /* flag options */
3578 IX86_ATTR_YES ("cld",
3579 OPT_mcld,
3580 MASK_CLD),
3582 IX86_ATTR_NO ("fancy-math-387",
3583 OPT_mfancy_math_387,
3584 MASK_NO_FANCY_MATH_387),
3586 IX86_ATTR_NO ("fused-madd",
3587 OPT_mfused_madd,
3588 MASK_NO_FUSED_MADD),
3590 IX86_ATTR_YES ("ieee-fp",
3591 OPT_mieee_fp,
3592 MASK_IEEE_FP),
3594 IX86_ATTR_YES ("inline-all-stringops",
3595 OPT_minline_all_stringops,
3596 MASK_INLINE_ALL_STRINGOPS),
3598 IX86_ATTR_YES ("inline-stringops-dynamically",
3599 OPT_minline_stringops_dynamically,
3600 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3602 IX86_ATTR_NO ("align-stringops",
3603 OPT_mno_align_stringops,
3604 MASK_NO_ALIGN_STRINGOPS),
3606 IX86_ATTR_YES ("recip",
3607 OPT_mrecip,
3608 MASK_RECIP),
3612 /* If this is a list, recurse to get the options. */
3613 if (TREE_CODE (args) == TREE_LIST)
3615 bool ret = true;
3617 for (; args; args = TREE_CHAIN (args))
3618 if (TREE_VALUE (args)
3619 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3620 ret = false;
3622 return ret;
3625 else if (TREE_CODE (args) != STRING_CST)
3626 gcc_unreachable ();
3628 /* Handle multiple arguments separated by commas. */
3629 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3631 while (next_optstr && *next_optstr != '\0')
3633 char *p = next_optstr;
3634 char *orig_p = p;
3635 char *comma = strchr (next_optstr, ',');
3636 const char *opt_string;
3637 size_t len, opt_len;
3638 int opt;
3639 bool opt_set_p;
3640 char ch;
3641 unsigned i;
3642 enum ix86_opt_type type = ix86_opt_unknown;
3643 int mask = 0;
3645 if (comma)
3647 *comma = '\0';
3648 len = comma - next_optstr;
3649 next_optstr = comma + 1;
3651 else
3653 len = strlen (p);
3654 next_optstr = NULL;
3657 /* Recognize no-xxx. */
3658 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3660 opt_set_p = false;
3661 p += 3;
3662 len -= 3;
3664 else
3665 opt_set_p = true;
3667 /* Find the option. */
3668 ch = *p;
3669 opt = N_OPTS;
3670 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3672 type = attrs[i].type;
3673 opt_len = attrs[i].len;
3674 if (ch == attrs[i].string[0]
3675 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3676 && memcmp (p, attrs[i].string, opt_len) == 0)
3678 opt = attrs[i].opt;
3679 mask = attrs[i].mask;
3680 opt_string = attrs[i].string;
3681 break;
3685 /* Process the option. */
3686 if (opt == N_OPTS)
3688 error ("attribute(target(\"%s\")) is unknown", orig_p);
3689 ret = false;
3692 else if (type == ix86_opt_isa)
3693 ix86_handle_option (opt, p, opt_set_p);
3695 else if (type == ix86_opt_yes || type == ix86_opt_no)
3697 if (type == ix86_opt_no)
3698 opt_set_p = !opt_set_p;
3700 if (opt_set_p)
3701 target_flags |= mask;
3702 else
3703 target_flags &= ~mask;
3706 else if (type == ix86_opt_str)
3708 if (p_strings[opt])
3710 error ("option(\"%s\") was already specified", opt_string);
3711 ret = false;
3713 else
3714 p_strings[opt] = xstrdup (p + opt_len);
3717 else
3718 gcc_unreachable ();
3721 return ret;
3724 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3726 tree
3727 ix86_valid_target_attribute_tree (tree args)
3729 const char *orig_arch_string = ix86_arch_string;
3730 const char *orig_tune_string = ix86_tune_string;
3731 const char *orig_fpmath_string = ix86_fpmath_string;
3732 int orig_tune_defaulted = ix86_tune_defaulted;
3733 int orig_arch_specified = ix86_arch_specified;
3734 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3735 tree t = NULL_TREE;
3736 int i;
3737 struct cl_target_option *def
3738 = TREE_TARGET_OPTION (target_option_default_node);
3740 /* Process each of the options on the chain. */
3741 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3742 return NULL_TREE;
3744 /* If the changed options are different from the default, rerun override_options,
3745 and then save the options away. The string options are are attribute options,
3746 and will be undone when we copy the save structure. */
3747 if (ix86_isa_flags != def->ix86_isa_flags
3748 || target_flags != def->target_flags
3749 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3750 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3751 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3753 /* If we are using the default tune= or arch=, undo the string assigned,
3754 and use the default. */
3755 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3756 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3757 else if (!orig_arch_specified)
3758 ix86_arch_string = NULL;
3760 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3761 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3762 else if (orig_tune_defaulted)
3763 ix86_tune_string = NULL;
3765 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3766 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3767 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3768 else if (!TARGET_64BIT && TARGET_SSE)
3769 ix86_fpmath_string = "sse,387";
3771 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3772 override_options (false);
3774 /* Add any builtin functions with the new isa if any. */
3775 ix86_add_new_builtins (ix86_isa_flags);
3777 /* Save the current options unless we are validating options for
3778 #pragma. */
3779 t = build_target_option_node ();
3781 ix86_arch_string = orig_arch_string;
3782 ix86_tune_string = orig_tune_string;
3783 ix86_fpmath_string = orig_fpmath_string;
3785 /* Free up memory allocated to hold the strings */
3786 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3787 if (option_strings[i])
3788 free (option_strings[i]);
3791 return t;
3794 /* Hook to validate attribute((target("string"))). */
3796 static bool
3797 ix86_valid_target_attribute_p (tree fndecl,
3798 tree ARG_UNUSED (name),
3799 tree args,
3800 int ARG_UNUSED (flags))
3802 struct cl_target_option cur_target;
3803 bool ret = true;
3804 tree old_optimize = build_optimization_node ();
3805 tree new_target, new_optimize;
3806 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3808 /* If the function changed the optimization levels as well as setting target
3809 options, start with the optimizations specified. */
3810 if (func_optimize && func_optimize != old_optimize)
3811 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3813 /* The target attributes may also change some optimization flags, so update
3814 the optimization options if necessary. */
3815 cl_target_option_save (&cur_target);
3816 new_target = ix86_valid_target_attribute_tree (args);
3817 new_optimize = build_optimization_node ();
3819 if (!new_target)
3820 ret = false;
3822 else if (fndecl)
3824 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3826 if (old_optimize != new_optimize)
3827 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3830 cl_target_option_restore (&cur_target);
3832 if (old_optimize != new_optimize)
3833 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3835 return ret;
3839 /* Hook to determine if one function can safely inline another. */
3841 static bool
3842 ix86_can_inline_p (tree caller, tree callee)
3844 bool ret = false;
3845 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3846 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3848 /* If callee has no option attributes, then it is ok to inline. */
3849 if (!callee_tree)
3850 ret = true;
3852 /* If caller has no option attributes, but callee does then it is not ok to
3853 inline. */
3854 else if (!caller_tree)
3855 ret = false;
3857 else
3859 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3860 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3862 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3863 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3864 function. */
3865 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3866 != callee_opts->ix86_isa_flags)
3867 ret = false;
3869 /* See if we have the same non-isa options. */
3870 else if (caller_opts->target_flags != callee_opts->target_flags)
3871 ret = false;
3873 /* See if arch, tune, etc. are the same. */
3874 else if (caller_opts->arch != callee_opts->arch)
3875 ret = false;
3877 else if (caller_opts->tune != callee_opts->tune)
3878 ret = false;
3880 else if (caller_opts->fpmath != callee_opts->fpmath)
3881 ret = false;
3883 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3884 ret = false;
3886 else
3887 ret = true;
3890 return ret;
3894 /* Remember the last target of ix86_set_current_function. */
3895 static GTY(()) tree ix86_previous_fndecl;
3897 /* Establish appropriate back-end context for processing the function
3898 FNDECL. The argument might be NULL to indicate processing at top
3899 level, outside of any function scope. */
3900 static void
3901 ix86_set_current_function (tree fndecl)
3903 /* Only change the context if the function changes. This hook is called
3904 several times in the course of compiling a function, and we don't want to
3905 slow things down too much or call target_reinit when it isn't safe. */
3906 if (fndecl && fndecl != ix86_previous_fndecl)
3908 tree old_tree = (ix86_previous_fndecl
3909 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3910 : NULL_TREE);
3912 tree new_tree = (fndecl
3913 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3914 : NULL_TREE);
3916 ix86_previous_fndecl = fndecl;
3917 if (old_tree == new_tree)
3920 else if (new_tree)
3922 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3923 target_reinit ();
3926 else if (old_tree)
3928 struct cl_target_option *def
3929 = TREE_TARGET_OPTION (target_option_current_node);
3931 cl_target_option_restore (def);
3932 target_reinit ();
3938 /* Return true if this goes in large data/bss. */
3940 static bool
3941 ix86_in_large_data_p (tree exp)
3943 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3944 return false;
3946 /* Functions are never large data. */
3947 if (TREE_CODE (exp) == FUNCTION_DECL)
3948 return false;
3950 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3952 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3953 if (strcmp (section, ".ldata") == 0
3954 || strcmp (section, ".lbss") == 0)
3955 return true;
3956 return false;
3958 else
3960 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3962 /* If this is an incomplete type with size 0, then we can't put it
3963 in data because it might be too big when completed. */
3964 if (!size || size > ix86_section_threshold)
3965 return true;
3968 return false;
3971 /* Switch to the appropriate section for output of DECL.
3972 DECL is either a `VAR_DECL' node or a constant of some sort.
3973 RELOC indicates whether forming the initial value of DECL requires
3974 link-time relocations. */
3976 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3977 ATTRIBUTE_UNUSED;
3979 static section *
3980 x86_64_elf_select_section (tree decl, int reloc,
3981 unsigned HOST_WIDE_INT align)
3983 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3984 && ix86_in_large_data_p (decl))
3986 const char *sname = NULL;
3987 unsigned int flags = SECTION_WRITE;
3988 switch (categorize_decl_for_section (decl, reloc))
3990 case SECCAT_DATA:
3991 sname = ".ldata";
3992 break;
3993 case SECCAT_DATA_REL:
3994 sname = ".ldata.rel";
3995 break;
3996 case SECCAT_DATA_REL_LOCAL:
3997 sname = ".ldata.rel.local";
3998 break;
3999 case SECCAT_DATA_REL_RO:
4000 sname = ".ldata.rel.ro";
4001 break;
4002 case SECCAT_DATA_REL_RO_LOCAL:
4003 sname = ".ldata.rel.ro.local";
4004 break;
4005 case SECCAT_BSS:
4006 sname = ".lbss";
4007 flags |= SECTION_BSS;
4008 break;
4009 case SECCAT_RODATA:
4010 case SECCAT_RODATA_MERGE_STR:
4011 case SECCAT_RODATA_MERGE_STR_INIT:
4012 case SECCAT_RODATA_MERGE_CONST:
4013 sname = ".lrodata";
4014 flags = 0;
4015 break;
4016 case SECCAT_SRODATA:
4017 case SECCAT_SDATA:
4018 case SECCAT_SBSS:
4019 gcc_unreachable ();
4020 case SECCAT_TEXT:
4021 case SECCAT_TDATA:
4022 case SECCAT_TBSS:
4023 /* We don't split these for medium model. Place them into
4024 default sections and hope for best. */
4025 break;
4026 case SECCAT_EMUTLS_VAR:
4027 case SECCAT_EMUTLS_TMPL:
4028 gcc_unreachable ();
4030 if (sname)
4032 /* We might get called with string constants, but get_named_section
4033 doesn't like them as they are not DECLs. Also, we need to set
4034 flags in that case. */
4035 if (!DECL_P (decl))
4036 return get_section (sname, flags, NULL);
4037 return get_named_section (decl, sname, reloc);
4040 return default_elf_select_section (decl, reloc, align);
4043 /* Build up a unique section name, expressed as a
4044 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4045 RELOC indicates whether the initial value of EXP requires
4046 link-time relocations. */
4048 static void ATTRIBUTE_UNUSED
4049 x86_64_elf_unique_section (tree decl, int reloc)
4051 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4052 && ix86_in_large_data_p (decl))
4054 const char *prefix = NULL;
4055 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4056 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4058 switch (categorize_decl_for_section (decl, reloc))
4060 case SECCAT_DATA:
4061 case SECCAT_DATA_REL:
4062 case SECCAT_DATA_REL_LOCAL:
4063 case SECCAT_DATA_REL_RO:
4064 case SECCAT_DATA_REL_RO_LOCAL:
4065 prefix = one_only ? ".ld" : ".ldata";
4066 break;
4067 case SECCAT_BSS:
4068 prefix = one_only ? ".lb" : ".lbss";
4069 break;
4070 case SECCAT_RODATA:
4071 case SECCAT_RODATA_MERGE_STR:
4072 case SECCAT_RODATA_MERGE_STR_INIT:
4073 case SECCAT_RODATA_MERGE_CONST:
4074 prefix = one_only ? ".lr" : ".lrodata";
4075 break;
4076 case SECCAT_SRODATA:
4077 case SECCAT_SDATA:
4078 case SECCAT_SBSS:
4079 gcc_unreachable ();
4080 case SECCAT_TEXT:
4081 case SECCAT_TDATA:
4082 case SECCAT_TBSS:
4083 /* We don't split these for medium model. Place them into
4084 default sections and hope for best. */
4085 break;
4086 case SECCAT_EMUTLS_VAR:
4087 prefix = targetm.emutls.var_section;
4088 break;
4089 case SECCAT_EMUTLS_TMPL:
4090 prefix = targetm.emutls.tmpl_section;
4091 break;
4093 if (prefix)
4095 const char *name, *linkonce;
4096 char *string;
4098 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4099 name = targetm.strip_name_encoding (name);
4101 /* If we're using one_only, then there needs to be a .gnu.linkonce
4102 prefix to the section name. */
4103 linkonce = one_only ? ".gnu.linkonce" : "";
4105 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4107 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4108 return;
4111 default_unique_section (decl, reloc);
4114 #ifdef COMMON_ASM_OP
4115 /* This says how to output assembler code to declare an
4116 uninitialized external linkage data object.
4118 For medium model x86-64 we need to use .largecomm opcode for
4119 large objects. */
4120 void
4121 x86_elf_aligned_common (FILE *file,
4122 const char *name, unsigned HOST_WIDE_INT size,
4123 int align)
4125 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4126 && size > (unsigned int)ix86_section_threshold)
4127 fprintf (file, ".largecomm\t");
4128 else
4129 fprintf (file, "%s", COMMON_ASM_OP);
4130 assemble_name (file, name);
4131 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4132 size, align / BITS_PER_UNIT);
4134 #endif
4136 /* Utility function for targets to use in implementing
4137 ASM_OUTPUT_ALIGNED_BSS. */
4139 void
4140 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4141 const char *name, unsigned HOST_WIDE_INT size,
4142 int align)
4144 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4145 && size > (unsigned int)ix86_section_threshold)
4146 switch_to_section (get_named_section (decl, ".lbss", 0));
4147 else
4148 switch_to_section (bss_section);
4149 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4150 #ifdef ASM_DECLARE_OBJECT_NAME
4151 last_assemble_variable_decl = decl;
4152 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4153 #else
4154 /* Standard thing is just output label for the object. */
4155 ASM_OUTPUT_LABEL (file, name);
4156 #endif /* ASM_DECLARE_OBJECT_NAME */
4157 ASM_OUTPUT_SKIP (file, size ? size : 1);
4160 void
4161 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4163 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4164 make the problem with not enough registers even worse. */
4165 #ifdef INSN_SCHEDULING
4166 if (level > 1)
4167 flag_schedule_insns = 0;
4168 #endif
4170 if (TARGET_MACHO)
4171 /* The Darwin libraries never set errno, so we might as well
4172 avoid calling them when that's the only reason we would. */
4173 flag_errno_math = 0;
4175 /* The default values of these switches depend on the TARGET_64BIT
4176 that is not known at this moment. Mark these values with 2 and
4177 let user the to override these. In case there is no command line option
4178 specifying them, we will set the defaults in override_options. */
4179 if (optimize >= 1)
4180 flag_omit_frame_pointer = 2;
4181 flag_pcc_struct_return = 2;
4182 flag_asynchronous_unwind_tables = 2;
4183 flag_vect_cost_model = 1;
4184 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4185 SUBTARGET_OPTIMIZATION_OPTIONS;
4186 #endif
4189 /* Decide whether we can make a sibling call to a function. DECL is the
4190 declaration of the function being targeted by the call and EXP is the
4191 CALL_EXPR representing the call. */
4193 static bool
4194 ix86_function_ok_for_sibcall (tree decl, tree exp)
4196 tree func;
4197 rtx a, b;
4199 /* If we are generating position-independent code, we cannot sibcall
4200 optimize any indirect call, or a direct call to a global function,
4201 as the PLT requires %ebx be live. */
4202 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4203 return false;
4205 if (decl)
4206 func = decl;
4207 else
4209 func = TREE_TYPE (CALL_EXPR_FN (exp));
4210 if (POINTER_TYPE_P (func))
4211 func = TREE_TYPE (func);
4214 /* Check that the return value locations are the same. Like
4215 if we are returning floats on the 80387 register stack, we cannot
4216 make a sibcall from a function that doesn't return a float to a
4217 function that does or, conversely, from a function that does return
4218 a float to a function that doesn't; the necessary stack adjustment
4219 would not be executed. This is also the place we notice
4220 differences in the return value ABI. Note that it is ok for one
4221 of the functions to have void return type as long as the return
4222 value of the other is passed in a register. */
4223 a = ix86_function_value (TREE_TYPE (exp), func, false);
4224 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4225 cfun->decl, false);
4226 if (STACK_REG_P (a) || STACK_REG_P (b))
4228 if (!rtx_equal_p (a, b))
4229 return false;
4231 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4233 else if (!rtx_equal_p (a, b))
4234 return false;
4236 /* If this call is indirect, we'll need to be able to use a call-clobbered
4237 register for the address of the target function. Make sure that all
4238 such registers are not used for passing parameters. */
4239 if (!decl && !TARGET_64BIT)
4241 tree type;
4243 /* We're looking at the CALL_EXPR, we need the type of the function. */
4244 type = CALL_EXPR_FN (exp); /* pointer expression */
4245 type = TREE_TYPE (type); /* pointer type */
4246 type = TREE_TYPE (type); /* function type */
4248 if (ix86_function_regparm (type, NULL) >= 3)
4250 /* ??? Need to count the actual number of registers to be used,
4251 not the possible number of registers. Fix later. */
4252 return false;
4256 /* Dllimport'd functions are also called indirectly. */
4257 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4258 && !TARGET_64BIT
4259 && decl && DECL_DLLIMPORT_P (decl)
4260 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4261 return false;
4263 /* If we need to align the outgoing stack, then sibcalling would
4264 unalign the stack, which may break the called function. */
4265 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4266 return false;
4268 /* Otherwise okay. That also includes certain types of indirect calls. */
4269 return true;
4272 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4273 calling convention attributes;
4274 arguments as in struct attribute_spec.handler. */
4276 static tree
4277 ix86_handle_cconv_attribute (tree *node, tree name,
4278 tree args,
4279 int flags ATTRIBUTE_UNUSED,
4280 bool *no_add_attrs)
4282 if (TREE_CODE (*node) != FUNCTION_TYPE
4283 && TREE_CODE (*node) != METHOD_TYPE
4284 && TREE_CODE (*node) != FIELD_DECL
4285 && TREE_CODE (*node) != TYPE_DECL)
4287 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4288 name);
4289 *no_add_attrs = true;
4290 return NULL_TREE;
4293 /* Can combine regparm with all attributes but fastcall. */
4294 if (is_attribute_p ("regparm", name))
4296 tree cst;
4298 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4300 error ("fastcall and regparm attributes are not compatible");
4303 cst = TREE_VALUE (args);
4304 if (TREE_CODE (cst) != INTEGER_CST)
4306 warning (OPT_Wattributes,
4307 "%qE attribute requires an integer constant argument",
4308 name);
4309 *no_add_attrs = true;
4311 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4313 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4314 name, REGPARM_MAX);
4315 *no_add_attrs = true;
4318 return NULL_TREE;
4321 if (TARGET_64BIT)
4323 /* Do not warn when emulating the MS ABI. */
4324 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4325 warning (OPT_Wattributes, "%qE attribute ignored",
4326 name);
4327 *no_add_attrs = true;
4328 return NULL_TREE;
4331 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4332 if (is_attribute_p ("fastcall", name))
4334 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4336 error ("fastcall and cdecl attributes are not compatible");
4338 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4340 error ("fastcall and stdcall attributes are not compatible");
4342 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4344 error ("fastcall and regparm attributes are not compatible");
4348 /* Can combine stdcall with fastcall (redundant), regparm and
4349 sseregparm. */
4350 else if (is_attribute_p ("stdcall", name))
4352 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4354 error ("stdcall and cdecl attributes are not compatible");
4356 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4358 error ("stdcall and fastcall attributes are not compatible");
4362 /* Can combine cdecl with regparm and sseregparm. */
4363 else if (is_attribute_p ("cdecl", name))
4365 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4367 error ("stdcall and cdecl attributes are not compatible");
4369 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4371 error ("fastcall and cdecl attributes are not compatible");
4375 /* Can combine sseregparm with all attributes. */
4377 return NULL_TREE;
4380 /* Return 0 if the attributes for two types are incompatible, 1 if they
4381 are compatible, and 2 if they are nearly compatible (which causes a
4382 warning to be generated). */
4384 static int
4385 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4387 /* Check for mismatch of non-default calling convention. */
4388 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4390 if (TREE_CODE (type1) != FUNCTION_TYPE
4391 && TREE_CODE (type1) != METHOD_TYPE)
4392 return 1;
4394 /* Check for mismatched fastcall/regparm types. */
4395 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4396 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4397 || (ix86_function_regparm (type1, NULL)
4398 != ix86_function_regparm (type2, NULL)))
4399 return 0;
4401 /* Check for mismatched sseregparm types. */
4402 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4403 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4404 return 0;
4406 /* Check for mismatched return types (cdecl vs stdcall). */
4407 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4408 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4409 return 0;
4411 return 1;
4414 /* Return the regparm value for a function with the indicated TYPE and DECL.
4415 DECL may be NULL when calling function indirectly
4416 or considering a libcall. */
4418 static int
4419 ix86_function_regparm (const_tree type, const_tree decl)
4421 tree attr;
4422 int regparm;
4424 static bool error_issued;
4426 if (TARGET_64BIT)
4427 return (ix86_function_type_abi (type) == SYSV_ABI
4428 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4430 regparm = ix86_regparm;
4431 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4432 if (attr)
4434 regparm
4435 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4437 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4439 /* We can't use regparm(3) for nested functions because
4440 these pass static chain pointer in %ecx register. */
4441 if (!error_issued && regparm == 3
4442 && decl_function_context (decl)
4443 && !DECL_NO_STATIC_CHAIN (decl))
4445 error ("nested functions are limited to 2 register parameters");
4446 error_issued = true;
4447 return 0;
4451 return regparm;
4454 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4455 return 2;
4457 /* Use register calling convention for local functions when possible. */
4458 if (decl
4459 && TREE_CODE (decl) == FUNCTION_DECL
4460 && optimize
4461 && !profile_flag)
4463 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4464 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4465 if (i && i->local)
4467 int local_regparm, globals = 0, regno;
4468 struct function *f;
4470 /* Make sure no regparm register is taken by a
4471 fixed register variable. */
4472 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4473 if (fixed_regs[local_regparm])
4474 break;
4476 /* We can't use regparm(3) for nested functions as these use
4477 static chain pointer in third argument. */
4478 if (local_regparm == 3
4479 && decl_function_context (decl)
4480 && !DECL_NO_STATIC_CHAIN (decl))
4481 local_regparm = 2;
4483 /* If the function realigns its stackpointer, the prologue will
4484 clobber %ecx. If we've already generated code for the callee,
4485 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4486 scanning the attributes for the self-realigning property. */
4487 f = DECL_STRUCT_FUNCTION (decl);
4488 /* Since current internal arg pointer won't conflict with
4489 parameter passing regs, so no need to change stack
4490 realignment and adjust regparm number.
4492 Each fixed register usage increases register pressure,
4493 so less registers should be used for argument passing.
4494 This functionality can be overriden by an explicit
4495 regparm value. */
4496 for (regno = 0; regno <= DI_REG; regno++)
4497 if (fixed_regs[regno])
4498 globals++;
4500 local_regparm
4501 = globals < local_regparm ? local_regparm - globals : 0;
4503 if (local_regparm > regparm)
4504 regparm = local_regparm;
4508 return regparm;
4511 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4512 DFmode (2) arguments in SSE registers for a function with the
4513 indicated TYPE and DECL. DECL may be NULL when calling function
4514 indirectly or considering a libcall. Otherwise return 0. */
4516 static int
4517 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4519 gcc_assert (!TARGET_64BIT);
4521 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4522 by the sseregparm attribute. */
4523 if (TARGET_SSEREGPARM
4524 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4526 if (!TARGET_SSE)
4528 if (warn)
4530 if (decl)
4531 error ("Calling %qD with attribute sseregparm without "
4532 "SSE/SSE2 enabled", decl);
4533 else
4534 error ("Calling %qT with attribute sseregparm without "
4535 "SSE/SSE2 enabled", type);
4537 return 0;
4540 return 2;
4543 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4544 (and DFmode for SSE2) arguments in SSE registers. */
4545 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4547 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4548 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4549 if (i && i->local)
4550 return TARGET_SSE2 ? 2 : 1;
4553 return 0;
4556 /* Return true if EAX is live at the start of the function. Used by
4557 ix86_expand_prologue to determine if we need special help before
4558 calling allocate_stack_worker. */
4560 static bool
4561 ix86_eax_live_at_start_p (void)
4563 /* Cheat. Don't bother working forward from ix86_function_regparm
4564 to the function type to whether an actual argument is located in
4565 eax. Instead just look at cfg info, which is still close enough
4566 to correct at this point. This gives false positives for broken
4567 functions that might use uninitialized data that happens to be
4568 allocated in eax, but who cares? */
4569 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4572 /* Value is the number of bytes of arguments automatically
4573 popped when returning from a subroutine call.
4574 FUNDECL is the declaration node of the function (as a tree),
4575 FUNTYPE is the data type of the function (as a tree),
4576 or for a library call it is an identifier node for the subroutine name.
4577 SIZE is the number of bytes of arguments passed on the stack.
4579 On the 80386, the RTD insn may be used to pop them if the number
4580 of args is fixed, but if the number is variable then the caller
4581 must pop them all. RTD can't be used for library calls now
4582 because the library is compiled with the Unix compiler.
4583 Use of RTD is a selectable option, since it is incompatible with
4584 standard Unix calling sequences. If the option is not selected,
4585 the caller must always pop the args.
4587 The attribute stdcall is equivalent to RTD on a per module basis. */
4590 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4592 int rtd;
4594 /* None of the 64-bit ABIs pop arguments. */
4595 if (TARGET_64BIT)
4596 return 0;
4598 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4600 /* Cdecl functions override -mrtd, and never pop the stack. */
4601 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4603 /* Stdcall and fastcall functions will pop the stack if not
4604 variable args. */
4605 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4606 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4607 rtd = 1;
4609 if (rtd && ! stdarg_p (funtype))
4610 return size;
4613 /* Lose any fake structure return argument if it is passed on the stack. */
4614 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4615 && !KEEP_AGGREGATE_RETURN_POINTER)
4617 int nregs = ix86_function_regparm (funtype, fundecl);
4618 if (nregs == 0)
4619 return GET_MODE_SIZE (Pmode);
4622 return 0;
4625 /* Argument support functions. */
4627 /* Return true when register may be used to pass function parameters. */
4628 bool
4629 ix86_function_arg_regno_p (int regno)
4631 int i;
4632 const int *parm_regs;
4634 if (!TARGET_64BIT)
4636 if (TARGET_MACHO)
4637 return (regno < REGPARM_MAX
4638 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4639 else
4640 return (regno < REGPARM_MAX
4641 || (TARGET_MMX && MMX_REGNO_P (regno)
4642 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4643 || (TARGET_SSE && SSE_REGNO_P (regno)
4644 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4647 if (TARGET_MACHO)
4649 if (SSE_REGNO_P (regno) && TARGET_SSE)
4650 return true;
4652 else
4654 if (TARGET_SSE && SSE_REGNO_P (regno)
4655 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4656 return true;
4659 /* TODO: The function should depend on current function ABI but
4660 builtins.c would need updating then. Therefore we use the
4661 default ABI. */
4663 /* RAX is used as hidden argument to va_arg functions. */
4664 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4665 return true;
4667 if (ix86_abi == MS_ABI)
4668 parm_regs = x86_64_ms_abi_int_parameter_registers;
4669 else
4670 parm_regs = x86_64_int_parameter_registers;
4671 for (i = 0; i < (ix86_abi == MS_ABI
4672 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
4673 if (regno == parm_regs[i])
4674 return true;
4675 return false;
4678 /* Return if we do not know how to pass TYPE solely in registers. */
4680 static bool
4681 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4683 if (must_pass_in_stack_var_size_or_pad (mode, type))
4684 return true;
4686 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4687 The layout_type routine is crafty and tries to trick us into passing
4688 currently unsupported vector types on the stack by using TImode. */
4689 return (!TARGET_64BIT && mode == TImode
4690 && type && TREE_CODE (type) != VECTOR_TYPE);
4693 /* It returns the size, in bytes, of the area reserved for arguments passed
4694 in registers for the function represented by fndecl dependent to the used
4695 abi format. */
4697 ix86_reg_parm_stack_space (const_tree fndecl)
4699 enum calling_abi call_abi = SYSV_ABI;
4700 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4701 call_abi = ix86_function_abi (fndecl);
4702 else
4703 call_abi = ix86_function_type_abi (fndecl);
4704 if (call_abi == MS_ABI)
4705 return 32;
4706 return 0;
4709 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4710 call abi used. */
4711 enum calling_abi
4712 ix86_function_type_abi (const_tree fntype)
4714 if (TARGET_64BIT && fntype != NULL)
4716 enum calling_abi abi = ix86_abi;
4717 if (abi == SYSV_ABI)
4719 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4720 abi = MS_ABI;
4722 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4723 abi = SYSV_ABI;
4724 return abi;
4726 return ix86_abi;
4729 static enum calling_abi
4730 ix86_function_abi (const_tree fndecl)
4732 if (! fndecl)
4733 return ix86_abi;
4734 return ix86_function_type_abi (TREE_TYPE (fndecl));
4737 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4738 call abi used. */
4739 enum calling_abi
4740 ix86_cfun_abi (void)
4742 if (! cfun || ! TARGET_64BIT)
4743 return ix86_abi;
4744 return cfun->machine->call_abi;
4747 /* regclass.c */
4748 extern void init_regs (void);
4750 /* Implementation of call abi switching target hook. Specific to FNDECL
4751 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4752 for more details. */
4753 void
4754 ix86_call_abi_override (const_tree fndecl)
4756 if (fndecl == NULL_TREE)
4757 cfun->machine->call_abi = ix86_abi;
4758 else
4759 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4762 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4763 re-initialization of init_regs each time we switch function context since
4764 this is needed only during RTL expansion. */
4765 static void
4766 ix86_maybe_switch_abi (void)
4768 if (TARGET_64BIT &&
4769 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4770 reinit_regs ();
4773 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4774 for a call to a function whose data type is FNTYPE.
4775 For a library call, FNTYPE is 0. */
4777 void
4778 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4779 tree fntype, /* tree ptr for function decl */
4780 rtx libname, /* SYMBOL_REF of library name or 0 */
4781 tree fndecl)
4783 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4784 memset (cum, 0, sizeof (*cum));
4786 if (fndecl)
4787 cum->call_abi = ix86_function_abi (fndecl);
4788 else
4789 cum->call_abi = ix86_function_type_abi (fntype);
4790 /* Set up the number of registers to use for passing arguments. */
4792 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4793 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4794 cum->nregs = ix86_regparm;
4795 if (TARGET_64BIT)
4797 if (cum->call_abi != ix86_abi)
4798 cum->nregs = (ix86_abi != SYSV_ABI
4799 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4801 if (TARGET_SSE)
4803 cum->sse_nregs = SSE_REGPARM_MAX;
4804 if (TARGET_64BIT)
4806 if (cum->call_abi != ix86_abi)
4807 cum->sse_nregs = (ix86_abi != SYSV_ABI
4808 ? X86_64_SSE_REGPARM_MAX
4809 : X86_64_MS_SSE_REGPARM_MAX);
4812 if (TARGET_MMX)
4813 cum->mmx_nregs = MMX_REGPARM_MAX;
4814 cum->warn_avx = true;
4815 cum->warn_sse = true;
4816 cum->warn_mmx = true;
4818 /* Because type might mismatch in between caller and callee, we need to
4819 use actual type of function for local calls.
4820 FIXME: cgraph_analyze can be told to actually record if function uses
4821 va_start so for local functions maybe_vaarg can be made aggressive
4822 helping K&R code.
4823 FIXME: once typesytem is fixed, we won't need this code anymore. */
4824 if (i && i->local)
4825 fntype = TREE_TYPE (fndecl);
4826 cum->maybe_vaarg = (fntype
4827 ? (!prototype_p (fntype) || stdarg_p (fntype))
4828 : !libname);
4830 if (!TARGET_64BIT)
4832 /* If there are variable arguments, then we won't pass anything
4833 in registers in 32-bit mode. */
4834 if (stdarg_p (fntype))
4836 cum->nregs = 0;
4837 cum->sse_nregs = 0;
4838 cum->mmx_nregs = 0;
4839 cum->warn_avx = 0;
4840 cum->warn_sse = 0;
4841 cum->warn_mmx = 0;
4842 return;
4845 /* Use ecx and edx registers if function has fastcall attribute,
4846 else look for regparm information. */
4847 if (fntype)
4849 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4851 cum->nregs = 2;
4852 cum->fastcall = 1;
4854 else
4855 cum->nregs = ix86_function_regparm (fntype, fndecl);
4858 /* Set up the number of SSE registers used for passing SFmode
4859 and DFmode arguments. Warn for mismatching ABI. */
4860 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4864 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4865 But in the case of vector types, it is some vector mode.
4867 When we have only some of our vector isa extensions enabled, then there
4868 are some modes for which vector_mode_supported_p is false. For these
4869 modes, the generic vector support in gcc will choose some non-vector mode
4870 in order to implement the type. By computing the natural mode, we'll
4871 select the proper ABI location for the operand and not depend on whatever
4872 the middle-end decides to do with these vector types.
4874 The midde-end can't deal with the vector types > 16 bytes. In this
4875 case, we return the original mode and warn ABI change if CUM isn't
4876 NULL. */
4878 static enum machine_mode
4879 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4881 enum machine_mode mode = TYPE_MODE (type);
4883 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4885 HOST_WIDE_INT size = int_size_in_bytes (type);
4886 if ((size == 8 || size == 16 || size == 32)
4887 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4888 && TYPE_VECTOR_SUBPARTS (type) > 1)
4890 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4892 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4893 mode = MIN_MODE_VECTOR_FLOAT;
4894 else
4895 mode = MIN_MODE_VECTOR_INT;
4897 /* Get the mode which has this inner mode and number of units. */
4898 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4899 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4900 && GET_MODE_INNER (mode) == innermode)
4902 if (size == 32 && !TARGET_AVX)
4904 static bool warnedavx;
4906 if (cum
4907 && !warnedavx
4908 && cum->warn_avx)
4910 warnedavx = true;
4911 warning (0, "AVX vector argument without AVX "
4912 "enabled changes the ABI");
4914 return TYPE_MODE (type);
4916 else
4917 return mode;
4920 gcc_unreachable ();
4924 return mode;
4927 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4928 this may not agree with the mode that the type system has chosen for the
4929 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4930 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4932 static rtx
4933 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4934 unsigned int regno)
4936 rtx tmp;
4938 if (orig_mode != BLKmode)
4939 tmp = gen_rtx_REG (orig_mode, regno);
4940 else
4942 tmp = gen_rtx_REG (mode, regno);
4943 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4944 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4947 return tmp;
4950 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4951 of this code is to classify each 8bytes of incoming argument by the register
4952 class and assign registers accordingly. */
4954 /* Return the union class of CLASS1 and CLASS2.
4955 See the x86-64 PS ABI for details. */
4957 static enum x86_64_reg_class
4958 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4960 /* Rule #1: If both classes are equal, this is the resulting class. */
4961 if (class1 == class2)
4962 return class1;
4964 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4965 the other class. */
4966 if (class1 == X86_64_NO_CLASS)
4967 return class2;
4968 if (class2 == X86_64_NO_CLASS)
4969 return class1;
4971 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4972 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4973 return X86_64_MEMORY_CLASS;
4975 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4976 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4977 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4978 return X86_64_INTEGERSI_CLASS;
4979 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4980 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4981 return X86_64_INTEGER_CLASS;
4983 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4984 MEMORY is used. */
4985 if (class1 == X86_64_X87_CLASS
4986 || class1 == X86_64_X87UP_CLASS
4987 || class1 == X86_64_COMPLEX_X87_CLASS
4988 || class2 == X86_64_X87_CLASS
4989 || class2 == X86_64_X87UP_CLASS
4990 || class2 == X86_64_COMPLEX_X87_CLASS)
4991 return X86_64_MEMORY_CLASS;
4993 /* Rule #6: Otherwise class SSE is used. */
4994 return X86_64_SSE_CLASS;
4997 /* Classify the argument of type TYPE and mode MODE.
4998 CLASSES will be filled by the register class used to pass each word
4999 of the operand. The number of words is returned. In case the parameter
5000 should be passed in memory, 0 is returned. As a special case for zero
5001 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5003 BIT_OFFSET is used internally for handling records and specifies offset
5004 of the offset in bits modulo 256 to avoid overflow cases.
5006 See the x86-64 PS ABI for details.
5009 static int
5010 classify_argument (enum machine_mode mode, const_tree type,
5011 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5013 HOST_WIDE_INT bytes =
5014 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5015 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5017 /* Variable sized entities are always passed/returned in memory. */
5018 if (bytes < 0)
5019 return 0;
5021 if (mode != VOIDmode
5022 && targetm.calls.must_pass_in_stack (mode, type))
5023 return 0;
5025 if (type && AGGREGATE_TYPE_P (type))
5027 int i;
5028 tree field;
5029 enum x86_64_reg_class subclasses[MAX_CLASSES];
5031 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5032 if (bytes > 32)
5033 return 0;
5035 for (i = 0; i < words; i++)
5036 classes[i] = X86_64_NO_CLASS;
5038 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5039 signalize memory class, so handle it as special case. */
5040 if (!words)
5042 classes[0] = X86_64_NO_CLASS;
5043 return 1;
5046 /* Classify each field of record and merge classes. */
5047 switch (TREE_CODE (type))
5049 case RECORD_TYPE:
5050 /* And now merge the fields of structure. */
5051 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5053 if (TREE_CODE (field) == FIELD_DECL)
5055 int num;
5057 if (TREE_TYPE (field) == error_mark_node)
5058 continue;
5060 /* Bitfields are always classified as integer. Handle them
5061 early, since later code would consider them to be
5062 misaligned integers. */
5063 if (DECL_BIT_FIELD (field))
5065 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5066 i < ((int_bit_position (field) + (bit_offset % 64))
5067 + tree_low_cst (DECL_SIZE (field), 0)
5068 + 63) / 8 / 8; i++)
5069 classes[i] =
5070 merge_classes (X86_64_INTEGER_CLASS,
5071 classes[i]);
5073 else
5075 int pos;
5077 type = TREE_TYPE (field);
5079 /* Flexible array member is ignored. */
5080 if (TYPE_MODE (type) == BLKmode
5081 && TREE_CODE (type) == ARRAY_TYPE
5082 && TYPE_SIZE (type) == NULL_TREE
5083 && TYPE_DOMAIN (type) != NULL_TREE
5084 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5085 == NULL_TREE))
5087 static bool warned;
5089 if (!warned && warn_psabi)
5091 warned = true;
5092 inform (input_location,
5093 "The ABI of passing struct with"
5094 " a flexible array member has"
5095 " changed in GCC 4.4");
5097 continue;
5099 num = classify_argument (TYPE_MODE (type), type,
5100 subclasses,
5101 (int_bit_position (field)
5102 + bit_offset) % 256);
5103 if (!num)
5104 return 0;
5105 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5106 for (i = 0; i < num && (i + pos) < words; i++)
5107 classes[i + pos] =
5108 merge_classes (subclasses[i], classes[i + pos]);
5112 break;
5114 case ARRAY_TYPE:
5115 /* Arrays are handled as small records. */
5117 int num;
5118 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5119 TREE_TYPE (type), subclasses, bit_offset);
5120 if (!num)
5121 return 0;
5123 /* The partial classes are now full classes. */
5124 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5125 subclasses[0] = X86_64_SSE_CLASS;
5126 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5127 && !((bit_offset % 64) == 0 && bytes == 4))
5128 subclasses[0] = X86_64_INTEGER_CLASS;
5130 for (i = 0; i < words; i++)
5131 classes[i] = subclasses[i % num];
5133 break;
5135 case UNION_TYPE:
5136 case QUAL_UNION_TYPE:
5137 /* Unions are similar to RECORD_TYPE but offset is always 0.
5139 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5141 if (TREE_CODE (field) == FIELD_DECL)
5143 int num;
5145 if (TREE_TYPE (field) == error_mark_node)
5146 continue;
5148 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5149 TREE_TYPE (field), subclasses,
5150 bit_offset);
5151 if (!num)
5152 return 0;
5153 for (i = 0; i < num; i++)
5154 classes[i] = merge_classes (subclasses[i], classes[i]);
5157 break;
5159 default:
5160 gcc_unreachable ();
5163 if (words > 2)
5165 /* When size > 16 bytes, if the first one isn't
5166 X86_64_SSE_CLASS or any other ones aren't
5167 X86_64_SSEUP_CLASS, everything should be passed in
5168 memory. */
5169 if (classes[0] != X86_64_SSE_CLASS)
5170 return 0;
5172 for (i = 1; i < words; i++)
5173 if (classes[i] != X86_64_SSEUP_CLASS)
5174 return 0;
5177 /* Final merger cleanup. */
5178 for (i = 0; i < words; i++)
5180 /* If one class is MEMORY, everything should be passed in
5181 memory. */
5182 if (classes[i] == X86_64_MEMORY_CLASS)
5183 return 0;
5185 /* The X86_64_SSEUP_CLASS should be always preceded by
5186 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5187 if (classes[i] == X86_64_SSEUP_CLASS
5188 && classes[i - 1] != X86_64_SSE_CLASS
5189 && classes[i - 1] != X86_64_SSEUP_CLASS)
5191 /* The first one should never be X86_64_SSEUP_CLASS. */
5192 gcc_assert (i != 0);
5193 classes[i] = X86_64_SSE_CLASS;
5196 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5197 everything should be passed in memory. */
5198 if (classes[i] == X86_64_X87UP_CLASS
5199 && (classes[i - 1] != X86_64_X87_CLASS))
5201 static bool warned;
5203 /* The first one should never be X86_64_X87UP_CLASS. */
5204 gcc_assert (i != 0);
5205 if (!warned && warn_psabi)
5207 warned = true;
5208 inform (input_location,
5209 "The ABI of passing union with long double"
5210 " has changed in GCC 4.4");
5212 return 0;
5215 return words;
5218 /* Compute alignment needed. We align all types to natural boundaries with
5219 exception of XFmode that is aligned to 64bits. */
5220 if (mode != VOIDmode && mode != BLKmode)
5222 int mode_alignment = GET_MODE_BITSIZE (mode);
5224 if (mode == XFmode)
5225 mode_alignment = 128;
5226 else if (mode == XCmode)
5227 mode_alignment = 256;
5228 if (COMPLEX_MODE_P (mode))
5229 mode_alignment /= 2;
5230 /* Misaligned fields are always returned in memory. */
5231 if (bit_offset % mode_alignment)
5232 return 0;
5235 /* for V1xx modes, just use the base mode */
5236 if (VECTOR_MODE_P (mode) && mode != V1DImode
5237 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5238 mode = GET_MODE_INNER (mode);
5240 /* Classification of atomic types. */
5241 switch (mode)
5243 case SDmode:
5244 case DDmode:
5245 classes[0] = X86_64_SSE_CLASS;
5246 return 1;
5247 case TDmode:
5248 classes[0] = X86_64_SSE_CLASS;
5249 classes[1] = X86_64_SSEUP_CLASS;
5250 return 2;
5251 case DImode:
5252 case SImode:
5253 case HImode:
5254 case QImode:
5255 case CSImode:
5256 case CHImode:
5257 case CQImode:
5259 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5261 if (size <= 32)
5263 classes[0] = X86_64_INTEGERSI_CLASS;
5264 return 1;
5266 else if (size <= 64)
5268 classes[0] = X86_64_INTEGER_CLASS;
5269 return 1;
5271 else if (size <= 64+32)
5273 classes[0] = X86_64_INTEGER_CLASS;
5274 classes[1] = X86_64_INTEGERSI_CLASS;
5275 return 2;
5277 else if (size <= 64+64)
5279 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5280 return 2;
5282 else
5283 gcc_unreachable ();
5285 case CDImode:
5286 case TImode:
5287 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5288 return 2;
5289 case COImode:
5290 case OImode:
5291 /* OImode shouldn't be used directly. */
5292 gcc_unreachable ();
5293 case CTImode:
5294 return 0;
5295 case SFmode:
5296 if (!(bit_offset % 64))
5297 classes[0] = X86_64_SSESF_CLASS;
5298 else
5299 classes[0] = X86_64_SSE_CLASS;
5300 return 1;
5301 case DFmode:
5302 classes[0] = X86_64_SSEDF_CLASS;
5303 return 1;
5304 case XFmode:
5305 classes[0] = X86_64_X87_CLASS;
5306 classes[1] = X86_64_X87UP_CLASS;
5307 return 2;
5308 case TFmode:
5309 classes[0] = X86_64_SSE_CLASS;
5310 classes[1] = X86_64_SSEUP_CLASS;
5311 return 2;
5312 case SCmode:
5313 classes[0] = X86_64_SSE_CLASS;
5314 if (!(bit_offset % 64))
5315 return 1;
5316 else
5318 static bool warned;
5320 if (!warned && warn_psabi)
5322 warned = true;
5323 inform (input_location,
5324 "The ABI of passing structure with complex float"
5325 " member has changed in GCC 4.4");
5327 classes[1] = X86_64_SSESF_CLASS;
5328 return 2;
5330 case DCmode:
5331 classes[0] = X86_64_SSEDF_CLASS;
5332 classes[1] = X86_64_SSEDF_CLASS;
5333 return 2;
5334 case XCmode:
5335 classes[0] = X86_64_COMPLEX_X87_CLASS;
5336 return 1;
5337 case TCmode:
5338 /* This modes is larger than 16 bytes. */
5339 return 0;
5340 case V8SFmode:
5341 case V8SImode:
5342 case V32QImode:
5343 case V16HImode:
5344 case V4DFmode:
5345 case V4DImode:
5346 classes[0] = X86_64_SSE_CLASS;
5347 classes[1] = X86_64_SSEUP_CLASS;
5348 classes[2] = X86_64_SSEUP_CLASS;
5349 classes[3] = X86_64_SSEUP_CLASS;
5350 return 4;
5351 case V4SFmode:
5352 case V4SImode:
5353 case V16QImode:
5354 case V8HImode:
5355 case V2DFmode:
5356 case V2DImode:
5357 classes[0] = X86_64_SSE_CLASS;
5358 classes[1] = X86_64_SSEUP_CLASS;
5359 return 2;
5360 case V1DImode:
5361 case V2SFmode:
5362 case V2SImode:
5363 case V4HImode:
5364 case V8QImode:
5365 classes[0] = X86_64_SSE_CLASS;
5366 return 1;
5367 case BLKmode:
5368 case VOIDmode:
5369 return 0;
5370 default:
5371 gcc_assert (VECTOR_MODE_P (mode));
5373 if (bytes > 16)
5374 return 0;
5376 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5378 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5379 classes[0] = X86_64_INTEGERSI_CLASS;
5380 else
5381 classes[0] = X86_64_INTEGER_CLASS;
5382 classes[1] = X86_64_INTEGER_CLASS;
5383 return 1 + (bytes > 8);
5387 /* Examine the argument and return set number of register required in each
5388 class. Return 0 iff parameter should be passed in memory. */
5389 static int
5390 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5391 int *int_nregs, int *sse_nregs)
5393 enum x86_64_reg_class regclass[MAX_CLASSES];
5394 int n = classify_argument (mode, type, regclass, 0);
5396 *int_nregs = 0;
5397 *sse_nregs = 0;
5398 if (!n)
5399 return 0;
5400 for (n--; n >= 0; n--)
5401 switch (regclass[n])
5403 case X86_64_INTEGER_CLASS:
5404 case X86_64_INTEGERSI_CLASS:
5405 (*int_nregs)++;
5406 break;
5407 case X86_64_SSE_CLASS:
5408 case X86_64_SSESF_CLASS:
5409 case X86_64_SSEDF_CLASS:
5410 (*sse_nregs)++;
5411 break;
5412 case X86_64_NO_CLASS:
5413 case X86_64_SSEUP_CLASS:
5414 break;
5415 case X86_64_X87_CLASS:
5416 case X86_64_X87UP_CLASS:
5417 if (!in_return)
5418 return 0;
5419 break;
5420 case X86_64_COMPLEX_X87_CLASS:
5421 return in_return ? 2 : 0;
5422 case X86_64_MEMORY_CLASS:
5423 gcc_unreachable ();
5425 return 1;
5428 /* Construct container for the argument used by GCC interface. See
5429 FUNCTION_ARG for the detailed description. */
5431 static rtx
5432 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5433 const_tree type, int in_return, int nintregs, int nsseregs,
5434 const int *intreg, int sse_regno)
5436 /* The following variables hold the static issued_error state. */
5437 static bool issued_sse_arg_error;
5438 static bool issued_sse_ret_error;
5439 static bool issued_x87_ret_error;
5441 enum machine_mode tmpmode;
5442 int bytes =
5443 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5444 enum x86_64_reg_class regclass[MAX_CLASSES];
5445 int n;
5446 int i;
5447 int nexps = 0;
5448 int needed_sseregs, needed_intregs;
5449 rtx exp[MAX_CLASSES];
5450 rtx ret;
5452 n = classify_argument (mode, type, regclass, 0);
5453 if (!n)
5454 return NULL;
5455 if (!examine_argument (mode, type, in_return, &needed_intregs,
5456 &needed_sseregs))
5457 return NULL;
5458 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5459 return NULL;
5461 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5462 some less clueful developer tries to use floating-point anyway. */
5463 if (needed_sseregs && !TARGET_SSE)
5465 if (in_return)
5467 if (!issued_sse_ret_error)
5469 error ("SSE register return with SSE disabled");
5470 issued_sse_ret_error = true;
5473 else if (!issued_sse_arg_error)
5475 error ("SSE register argument with SSE disabled");
5476 issued_sse_arg_error = true;
5478 return NULL;
5481 /* Likewise, error if the ABI requires us to return values in the
5482 x87 registers and the user specified -mno-80387. */
5483 if (!TARGET_80387 && in_return)
5484 for (i = 0; i < n; i++)
5485 if (regclass[i] == X86_64_X87_CLASS
5486 || regclass[i] == X86_64_X87UP_CLASS
5487 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5489 if (!issued_x87_ret_error)
5491 error ("x87 register return with x87 disabled");
5492 issued_x87_ret_error = true;
5494 return NULL;
5497 /* First construct simple cases. Avoid SCmode, since we want to use
5498 single register to pass this type. */
5499 if (n == 1 && mode != SCmode)
5500 switch (regclass[0])
5502 case X86_64_INTEGER_CLASS:
5503 case X86_64_INTEGERSI_CLASS:
5504 return gen_rtx_REG (mode, intreg[0]);
5505 case X86_64_SSE_CLASS:
5506 case X86_64_SSESF_CLASS:
5507 case X86_64_SSEDF_CLASS:
5508 if (mode != BLKmode)
5509 return gen_reg_or_parallel (mode, orig_mode,
5510 SSE_REGNO (sse_regno));
5511 break;
5512 case X86_64_X87_CLASS:
5513 case X86_64_COMPLEX_X87_CLASS:
5514 return gen_rtx_REG (mode, FIRST_STACK_REG);
5515 case X86_64_NO_CLASS:
5516 /* Zero sized array, struct or class. */
5517 return NULL;
5518 default:
5519 gcc_unreachable ();
5521 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5522 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5523 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5524 if (n == 4
5525 && regclass[0] == X86_64_SSE_CLASS
5526 && regclass[1] == X86_64_SSEUP_CLASS
5527 && regclass[2] == X86_64_SSEUP_CLASS
5528 && regclass[3] == X86_64_SSEUP_CLASS
5529 && mode != BLKmode)
5530 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5532 if (n == 2
5533 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5534 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5535 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5536 && regclass[1] == X86_64_INTEGER_CLASS
5537 && (mode == CDImode || mode == TImode || mode == TFmode)
5538 && intreg[0] + 1 == intreg[1])
5539 return gen_rtx_REG (mode, intreg[0]);
5541 /* Otherwise figure out the entries of the PARALLEL. */
5542 for (i = 0; i < n; i++)
5544 int pos;
5546 switch (regclass[i])
5548 case X86_64_NO_CLASS:
5549 break;
5550 case X86_64_INTEGER_CLASS:
5551 case X86_64_INTEGERSI_CLASS:
5552 /* Merge TImodes on aligned occasions here too. */
5553 if (i * 8 + 8 > bytes)
5554 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5555 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5556 tmpmode = SImode;
5557 else
5558 tmpmode = DImode;
5559 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5560 if (tmpmode == BLKmode)
5561 tmpmode = DImode;
5562 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5563 gen_rtx_REG (tmpmode, *intreg),
5564 GEN_INT (i*8));
5565 intreg++;
5566 break;
5567 case X86_64_SSESF_CLASS:
5568 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5569 gen_rtx_REG (SFmode,
5570 SSE_REGNO (sse_regno)),
5571 GEN_INT (i*8));
5572 sse_regno++;
5573 break;
5574 case X86_64_SSEDF_CLASS:
5575 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5576 gen_rtx_REG (DFmode,
5577 SSE_REGNO (sse_regno)),
5578 GEN_INT (i*8));
5579 sse_regno++;
5580 break;
5581 case X86_64_SSE_CLASS:
5582 pos = i;
5583 switch (n)
5585 case 1:
5586 tmpmode = DImode;
5587 break;
5588 case 2:
5589 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5591 tmpmode = TImode;
5592 i++;
5594 else
5595 tmpmode = DImode;
5596 break;
5597 case 4:
5598 gcc_assert (i == 0
5599 && regclass[1] == X86_64_SSEUP_CLASS
5600 && regclass[2] == X86_64_SSEUP_CLASS
5601 && regclass[3] == X86_64_SSEUP_CLASS);
5602 tmpmode = OImode;
5603 i += 3;
5604 break;
5605 default:
5606 gcc_unreachable ();
5608 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5609 gen_rtx_REG (tmpmode,
5610 SSE_REGNO (sse_regno)),
5611 GEN_INT (pos*8));
5612 sse_regno++;
5613 break;
5614 default:
5615 gcc_unreachable ();
5619 /* Empty aligned struct, union or class. */
5620 if (nexps == 0)
5621 return NULL;
5623 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5624 for (i = 0; i < nexps; i++)
5625 XVECEXP (ret, 0, i) = exp [i];
5626 return ret;
5629 /* Update the data in CUM to advance over an argument of mode MODE
5630 and data type TYPE. (TYPE is null for libcalls where that information
5631 may not be available.) */
5633 static void
5634 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5635 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5637 switch (mode)
5639 default:
5640 break;
5642 case BLKmode:
5643 if (bytes < 0)
5644 break;
5645 /* FALLTHRU */
5647 case DImode:
5648 case SImode:
5649 case HImode:
5650 case QImode:
5651 cum->words += words;
5652 cum->nregs -= words;
5653 cum->regno += words;
5655 if (cum->nregs <= 0)
5657 cum->nregs = 0;
5658 cum->regno = 0;
5660 break;
5662 case OImode:
5663 /* OImode shouldn't be used directly. */
5664 gcc_unreachable ();
5666 case DFmode:
5667 if (cum->float_in_sse < 2)
5668 break;
5669 case SFmode:
5670 if (cum->float_in_sse < 1)
5671 break;
5672 /* FALLTHRU */
5674 case V8SFmode:
5675 case V8SImode:
5676 case V32QImode:
5677 case V16HImode:
5678 case V4DFmode:
5679 case V4DImode:
5680 case TImode:
5681 case V16QImode:
5682 case V8HImode:
5683 case V4SImode:
5684 case V2DImode:
5685 case V4SFmode:
5686 case V2DFmode:
5687 if (!type || !AGGREGATE_TYPE_P (type))
5689 cum->sse_words += words;
5690 cum->sse_nregs -= 1;
5691 cum->sse_regno += 1;
5692 if (cum->sse_nregs <= 0)
5694 cum->sse_nregs = 0;
5695 cum->sse_regno = 0;
5698 break;
5700 case V8QImode:
5701 case V4HImode:
5702 case V2SImode:
5703 case V2SFmode:
5704 case V1DImode:
5705 if (!type || !AGGREGATE_TYPE_P (type))
5707 cum->mmx_words += words;
5708 cum->mmx_nregs -= 1;
5709 cum->mmx_regno += 1;
5710 if (cum->mmx_nregs <= 0)
5712 cum->mmx_nregs = 0;
5713 cum->mmx_regno = 0;
5716 break;
5720 static void
5721 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5722 tree type, HOST_WIDE_INT words, int named)
5724 int int_nregs, sse_nregs;
5726 /* Unnamed 256bit vector mode parameters are passed on stack. */
5727 if (!named && VALID_AVX256_REG_MODE (mode))
5728 return;
5730 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5731 cum->words += words;
5732 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5734 cum->nregs -= int_nregs;
5735 cum->sse_nregs -= sse_nregs;
5736 cum->regno += int_nregs;
5737 cum->sse_regno += sse_nregs;
5739 else
5740 cum->words += words;
5743 static void
5744 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5745 HOST_WIDE_INT words)
5747 /* Otherwise, this should be passed indirect. */
5748 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5750 cum->words += words;
5751 if (cum->nregs > 0)
5753 cum->nregs -= 1;
5754 cum->regno += 1;
5758 void
5759 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5760 tree type, int named)
5762 HOST_WIDE_INT bytes, words;
5764 if (mode == BLKmode)
5765 bytes = int_size_in_bytes (type);
5766 else
5767 bytes = GET_MODE_SIZE (mode);
5768 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5770 if (type)
5771 mode = type_natural_mode (type, NULL);
5773 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5774 function_arg_advance_ms_64 (cum, bytes, words);
5775 else if (TARGET_64BIT)
5776 function_arg_advance_64 (cum, mode, type, words, named);
5777 else
5778 function_arg_advance_32 (cum, mode, type, bytes, words);
5781 /* Define where to put the arguments to a function.
5782 Value is zero to push the argument on the stack,
5783 or a hard register in which to store the argument.
5785 MODE is the argument's machine mode.
5786 TYPE is the data type of the argument (as a tree).
5787 This is null for libcalls where that information may
5788 not be available.
5789 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5790 the preceding args and about the function being called.
5791 NAMED is nonzero if this argument is a named parameter
5792 (otherwise it is an extra parameter matching an ellipsis). */
5794 static rtx
5795 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5796 enum machine_mode orig_mode, tree type,
5797 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5799 static bool warnedsse, warnedmmx;
5801 /* Avoid the AL settings for the Unix64 ABI. */
5802 if (mode == VOIDmode)
5803 return constm1_rtx;
5805 switch (mode)
5807 default:
5808 break;
5810 case BLKmode:
5811 if (bytes < 0)
5812 break;
5813 /* FALLTHRU */
5814 case DImode:
5815 case SImode:
5816 case HImode:
5817 case QImode:
5818 if (words <= cum->nregs)
5820 int regno = cum->regno;
5822 /* Fastcall allocates the first two DWORD (SImode) or
5823 smaller arguments to ECX and EDX if it isn't an
5824 aggregate type . */
5825 if (cum->fastcall)
5827 if (mode == BLKmode
5828 || mode == DImode
5829 || (type && AGGREGATE_TYPE_P (type)))
5830 break;
5832 /* ECX not EAX is the first allocated register. */
5833 if (regno == AX_REG)
5834 regno = CX_REG;
5836 return gen_rtx_REG (mode, regno);
5838 break;
5840 case DFmode:
5841 if (cum->float_in_sse < 2)
5842 break;
5843 case SFmode:
5844 if (cum->float_in_sse < 1)
5845 break;
5846 /* FALLTHRU */
5847 case TImode:
5848 /* In 32bit, we pass TImode in xmm registers. */
5849 case V16QImode:
5850 case V8HImode:
5851 case V4SImode:
5852 case V2DImode:
5853 case V4SFmode:
5854 case V2DFmode:
5855 if (!type || !AGGREGATE_TYPE_P (type))
5857 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5859 warnedsse = true;
5860 warning (0, "SSE vector argument without SSE enabled "
5861 "changes the ABI");
5863 if (cum->sse_nregs)
5864 return gen_reg_or_parallel (mode, orig_mode,
5865 cum->sse_regno + FIRST_SSE_REG);
5867 break;
5869 case OImode:
5870 /* OImode shouldn't be used directly. */
5871 gcc_unreachable ();
5873 case V8SFmode:
5874 case V8SImode:
5875 case V32QImode:
5876 case V16HImode:
5877 case V4DFmode:
5878 case V4DImode:
5879 if (!type || !AGGREGATE_TYPE_P (type))
5881 if (cum->sse_nregs)
5882 return gen_reg_or_parallel (mode, orig_mode,
5883 cum->sse_regno + FIRST_SSE_REG);
5885 break;
5887 case V8QImode:
5888 case V4HImode:
5889 case V2SImode:
5890 case V2SFmode:
5891 case V1DImode:
5892 if (!type || !AGGREGATE_TYPE_P (type))
5894 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5896 warnedmmx = true;
5897 warning (0, "MMX vector argument without MMX enabled "
5898 "changes the ABI");
5900 if (cum->mmx_nregs)
5901 return gen_reg_or_parallel (mode, orig_mode,
5902 cum->mmx_regno + FIRST_MMX_REG);
5904 break;
5907 return NULL_RTX;
5910 static rtx
5911 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5912 enum machine_mode orig_mode, tree type, int named)
5914 /* Handle a hidden AL argument containing number of registers
5915 for varargs x86-64 functions. */
5916 if (mode == VOIDmode)
5917 return GEN_INT (cum->maybe_vaarg
5918 ? (cum->sse_nregs < 0
5919 ? (cum->call_abi == ix86_abi
5920 ? SSE_REGPARM_MAX
5921 : (ix86_abi != SYSV_ABI
5922 ? X86_64_SSE_REGPARM_MAX
5923 : X86_64_MS_SSE_REGPARM_MAX))
5924 : cum->sse_regno)
5925 : -1);
5927 switch (mode)
5929 default:
5930 break;
5932 case V8SFmode:
5933 case V8SImode:
5934 case V32QImode:
5935 case V16HImode:
5936 case V4DFmode:
5937 case V4DImode:
5938 /* Unnamed 256bit vector mode parameters are passed on stack. */
5939 if (!named)
5940 return NULL;
5941 break;
5944 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5945 cum->sse_nregs,
5946 &x86_64_int_parameter_registers [cum->regno],
5947 cum->sse_regno);
5950 static rtx
5951 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5952 enum machine_mode orig_mode, int named,
5953 HOST_WIDE_INT bytes)
5955 unsigned int regno;
5957 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5958 We use value of -2 to specify that current function call is MSABI. */
5959 if (mode == VOIDmode)
5960 return GEN_INT (-2);
5962 /* If we've run out of registers, it goes on the stack. */
5963 if (cum->nregs == 0)
5964 return NULL_RTX;
5966 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5968 /* Only floating point modes are passed in anything but integer regs. */
5969 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5971 if (named)
5972 regno = cum->regno + FIRST_SSE_REG;
5973 else
5975 rtx t1, t2;
5977 /* Unnamed floating parameters are passed in both the
5978 SSE and integer registers. */
5979 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5980 t2 = gen_rtx_REG (mode, regno);
5981 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5982 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5983 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5986 /* Handle aggregated types passed in register. */
5987 if (orig_mode == BLKmode)
5989 if (bytes > 0 && bytes <= 8)
5990 mode = (bytes > 4 ? DImode : SImode);
5991 if (mode == BLKmode)
5992 mode = DImode;
5995 return gen_reg_or_parallel (mode, orig_mode, regno);
5999 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6000 tree type, int named)
6002 enum machine_mode mode = omode;
6003 HOST_WIDE_INT bytes, words;
6005 if (mode == BLKmode)
6006 bytes = int_size_in_bytes (type);
6007 else
6008 bytes = GET_MODE_SIZE (mode);
6009 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6011 /* To simplify the code below, represent vector types with a vector mode
6012 even if MMX/SSE are not active. */
6013 if (type && TREE_CODE (type) == VECTOR_TYPE)
6014 mode = type_natural_mode (type, cum);
6016 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6017 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6018 else if (TARGET_64BIT)
6019 return function_arg_64 (cum, mode, omode, type, named);
6020 else
6021 return function_arg_32 (cum, mode, omode, type, bytes, words);
6024 /* A C expression that indicates when an argument must be passed by
6025 reference. If nonzero for an argument, a copy of that argument is
6026 made in memory and a pointer to the argument is passed instead of
6027 the argument itself. The pointer is passed in whatever way is
6028 appropriate for passing a pointer to that type. */
6030 static bool
6031 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6032 enum machine_mode mode ATTRIBUTE_UNUSED,
6033 const_tree type, bool named ATTRIBUTE_UNUSED)
6035 /* See Windows x64 Software Convention. */
6036 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6038 int msize = (int) GET_MODE_SIZE (mode);
6039 if (type)
6041 /* Arrays are passed by reference. */
6042 if (TREE_CODE (type) == ARRAY_TYPE)
6043 return true;
6045 if (AGGREGATE_TYPE_P (type))
6047 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6048 are passed by reference. */
6049 msize = int_size_in_bytes (type);
6053 /* __m128 is passed by reference. */
6054 switch (msize) {
6055 case 1: case 2: case 4: case 8:
6056 break;
6057 default:
6058 return true;
6061 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6062 return 1;
6064 return 0;
6067 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6068 ABI. */
6069 static bool
6070 contains_aligned_value_p (tree type)
6072 enum machine_mode mode = TYPE_MODE (type);
6073 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6074 || mode == TDmode
6075 || mode == TFmode
6076 || mode == TCmode)
6077 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6078 return true;
6079 if (TYPE_ALIGN (type) < 128)
6080 return false;
6082 if (AGGREGATE_TYPE_P (type))
6084 /* Walk the aggregates recursively. */
6085 switch (TREE_CODE (type))
6087 case RECORD_TYPE:
6088 case UNION_TYPE:
6089 case QUAL_UNION_TYPE:
6091 tree field;
6093 /* Walk all the structure fields. */
6094 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6096 if (TREE_CODE (field) == FIELD_DECL
6097 && contains_aligned_value_p (TREE_TYPE (field)))
6098 return true;
6100 break;
6103 case ARRAY_TYPE:
6104 /* Just for use if some languages passes arrays by value. */
6105 if (contains_aligned_value_p (TREE_TYPE (type)))
6106 return true;
6107 break;
6109 default:
6110 gcc_unreachable ();
6113 return false;
6116 /* Gives the alignment boundary, in bits, of an argument with the
6117 specified mode and type. */
6120 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6122 int align;
6123 if (type)
6125 /* Since canonical type is used for call, we convert it to
6126 canonical type if needed. */
6127 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6128 type = TYPE_CANONICAL (type);
6129 align = TYPE_ALIGN (type);
6131 else
6132 align = GET_MODE_ALIGNMENT (mode);
6133 if (align < PARM_BOUNDARY)
6134 align = PARM_BOUNDARY;
6135 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6136 natural boundaries. */
6137 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6139 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6140 make an exception for SSE modes since these require 128bit
6141 alignment.
6143 The handling here differs from field_alignment. ICC aligns MMX
6144 arguments to 4 byte boundaries, while structure fields are aligned
6145 to 8 byte boundaries. */
6146 if (!type)
6148 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6149 align = PARM_BOUNDARY;
6151 else
6153 if (!contains_aligned_value_p (type))
6154 align = PARM_BOUNDARY;
6157 if (align > BIGGEST_ALIGNMENT)
6158 align = BIGGEST_ALIGNMENT;
6159 return align;
6162 /* Return true if N is a possible register number of function value. */
6164 bool
6165 ix86_function_value_regno_p (int regno)
6167 switch (regno)
6169 case 0:
6170 return true;
6172 case FIRST_FLOAT_REG:
6173 /* TODO: The function should depend on current function ABI but
6174 builtins.c would need updating then. Therefore we use the
6175 default ABI. */
6176 if (TARGET_64BIT && ix86_abi == MS_ABI)
6177 return false;
6178 return TARGET_FLOAT_RETURNS_IN_80387;
6180 case FIRST_SSE_REG:
6181 return TARGET_SSE;
6183 case FIRST_MMX_REG:
6184 if (TARGET_MACHO || TARGET_64BIT)
6185 return false;
6186 return TARGET_MMX;
6189 return false;
6192 /* Define how to find the value returned by a function.
6193 VALTYPE is the data type of the value (as a tree).
6194 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6195 otherwise, FUNC is 0. */
6197 static rtx
6198 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6199 const_tree fntype, const_tree fn)
6201 unsigned int regno;
6203 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6204 we normally prevent this case when mmx is not available. However
6205 some ABIs may require the result to be returned like DImode. */
6206 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6207 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6209 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6210 we prevent this case when sse is not available. However some ABIs
6211 may require the result to be returned like integer TImode. */
6212 else if (mode == TImode
6213 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6214 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6216 /* 32-byte vector modes in %ymm0. */
6217 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6218 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6220 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6221 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6222 regno = FIRST_FLOAT_REG;
6223 else
6224 /* Most things go in %eax. */
6225 regno = AX_REG;
6227 /* Override FP return register with %xmm0 for local functions when
6228 SSE math is enabled or for functions with sseregparm attribute. */
6229 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6231 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6232 if ((sse_level >= 1 && mode == SFmode)
6233 || (sse_level == 2 && mode == DFmode))
6234 regno = FIRST_SSE_REG;
6237 /* OImode shouldn't be used directly. */
6238 gcc_assert (mode != OImode);
6240 return gen_rtx_REG (orig_mode, regno);
6243 static rtx
6244 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6245 const_tree valtype)
6247 rtx ret;
6249 /* Handle libcalls, which don't provide a type node. */
6250 if (valtype == NULL)
6252 switch (mode)
6254 case SFmode:
6255 case SCmode:
6256 case DFmode:
6257 case DCmode:
6258 case TFmode:
6259 case SDmode:
6260 case DDmode:
6261 case TDmode:
6262 return gen_rtx_REG (mode, FIRST_SSE_REG);
6263 case XFmode:
6264 case XCmode:
6265 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6266 case TCmode:
6267 return NULL;
6268 default:
6269 return gen_rtx_REG (mode, AX_REG);
6273 ret = construct_container (mode, orig_mode, valtype, 1,
6274 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6275 x86_64_int_return_registers, 0);
6277 /* For zero sized structures, construct_container returns NULL, but we
6278 need to keep rest of compiler happy by returning meaningful value. */
6279 if (!ret)
6280 ret = gen_rtx_REG (orig_mode, AX_REG);
6282 return ret;
6285 static rtx
6286 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6288 unsigned int regno = AX_REG;
6290 if (TARGET_SSE)
6292 switch (GET_MODE_SIZE (mode))
6294 case 16:
6295 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6296 && !COMPLEX_MODE_P (mode))
6297 regno = FIRST_SSE_REG;
6298 break;
6299 case 8:
6300 case 4:
6301 if (mode == SFmode || mode == DFmode)
6302 regno = FIRST_SSE_REG;
6303 break;
6304 default:
6305 break;
6308 return gen_rtx_REG (orig_mode, regno);
6311 static rtx
6312 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6313 enum machine_mode orig_mode, enum machine_mode mode)
6315 const_tree fn, fntype;
6317 fn = NULL_TREE;
6318 if (fntype_or_decl && DECL_P (fntype_or_decl))
6319 fn = fntype_or_decl;
6320 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6322 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6323 return function_value_ms_64 (orig_mode, mode);
6324 else if (TARGET_64BIT)
6325 return function_value_64 (orig_mode, mode, valtype);
6326 else
6327 return function_value_32 (orig_mode, mode, fntype, fn);
6330 static rtx
6331 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6332 bool outgoing ATTRIBUTE_UNUSED)
6334 enum machine_mode mode, orig_mode;
6336 orig_mode = TYPE_MODE (valtype);
6337 mode = type_natural_mode (valtype, NULL);
6338 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6342 ix86_libcall_value (enum machine_mode mode)
6344 return ix86_function_value_1 (NULL, NULL, mode, mode);
6347 /* Return true iff type is returned in memory. */
6349 static int ATTRIBUTE_UNUSED
6350 return_in_memory_32 (const_tree type, enum machine_mode mode)
6352 HOST_WIDE_INT size;
6354 if (mode == BLKmode)
6355 return 1;
6357 size = int_size_in_bytes (type);
6359 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6360 return 0;
6362 if (VECTOR_MODE_P (mode) || mode == TImode)
6364 /* User-created vectors small enough to fit in EAX. */
6365 if (size < 8)
6366 return 0;
6368 /* MMX/3dNow values are returned in MM0,
6369 except when it doesn't exits. */
6370 if (size == 8)
6371 return (TARGET_MMX ? 0 : 1);
6373 /* SSE values are returned in XMM0, except when it doesn't exist. */
6374 if (size == 16)
6375 return (TARGET_SSE ? 0 : 1);
6377 /* AVX values are returned in YMM0, except when it doesn't exist. */
6378 if (size == 32)
6379 return TARGET_AVX ? 0 : 1;
6382 if (mode == XFmode)
6383 return 0;
6385 if (size > 12)
6386 return 1;
6388 /* OImode shouldn't be used directly. */
6389 gcc_assert (mode != OImode);
6391 return 0;
6394 static int ATTRIBUTE_UNUSED
6395 return_in_memory_64 (const_tree type, enum machine_mode mode)
6397 int needed_intregs, needed_sseregs;
6398 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6401 static int ATTRIBUTE_UNUSED
6402 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6404 HOST_WIDE_INT size = int_size_in_bytes (type);
6406 /* __m128 is returned in xmm0. */
6407 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6408 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6409 return 0;
6411 /* Otherwise, the size must be exactly in [1248]. */
6412 return (size != 1 && size != 2 && size != 4 && size != 8);
6415 static bool
6416 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6418 #ifdef SUBTARGET_RETURN_IN_MEMORY
6419 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6420 #else
6421 const enum machine_mode mode = type_natural_mode (type, NULL);
6423 if (TARGET_64BIT)
6425 if (ix86_function_type_abi (fntype) == MS_ABI)
6426 return return_in_memory_ms_64 (type, mode);
6427 else
6428 return return_in_memory_64 (type, mode);
6430 else
6431 return return_in_memory_32 (type, mode);
6432 #endif
6435 /* Return false iff TYPE is returned in memory. This version is used
6436 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6437 but differs notably in that when MMX is available, 8-byte vectors
6438 are returned in memory, rather than in MMX registers. */
6440 bool
6441 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6443 int size;
6444 enum machine_mode mode = type_natural_mode (type, NULL);
6446 if (TARGET_64BIT)
6447 return return_in_memory_64 (type, mode);
6449 if (mode == BLKmode)
6450 return 1;
6452 size = int_size_in_bytes (type);
6454 if (VECTOR_MODE_P (mode))
6456 /* Return in memory only if MMX registers *are* available. This
6457 seems backwards, but it is consistent with the existing
6458 Solaris x86 ABI. */
6459 if (size == 8)
6460 return TARGET_MMX;
6461 if (size == 16)
6462 return !TARGET_SSE;
6464 else if (mode == TImode)
6465 return !TARGET_SSE;
6466 else if (mode == XFmode)
6467 return 0;
6469 return size > 12;
6472 /* When returning SSE vector types, we have a choice of either
6473 (1) being abi incompatible with a -march switch, or
6474 (2) generating an error.
6475 Given no good solution, I think the safest thing is one warning.
6476 The user won't be able to use -Werror, but....
6478 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6479 called in response to actually generating a caller or callee that
6480 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6481 via aggregate_value_p for general type probing from tree-ssa. */
6483 static rtx
6484 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6486 static bool warnedsse, warnedmmx;
6488 if (!TARGET_64BIT && type)
6490 /* Look at the return type of the function, not the function type. */
6491 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6493 if (!TARGET_SSE && !warnedsse)
6495 if (mode == TImode
6496 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6498 warnedsse = true;
6499 warning (0, "SSE vector return without SSE enabled "
6500 "changes the ABI");
6504 if (!TARGET_MMX && !warnedmmx)
6506 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6508 warnedmmx = true;
6509 warning (0, "MMX vector return without MMX enabled "
6510 "changes the ABI");
6515 return NULL;
6519 /* Create the va_list data type. */
6521 /* Returns the calling convention specific va_list date type.
6522 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6524 static tree
6525 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6527 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6529 /* For i386 we use plain pointer to argument area. */
6530 if (!TARGET_64BIT || abi == MS_ABI)
6531 return build_pointer_type (char_type_node);
6533 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6534 type_decl = build_decl (BUILTINS_LOCATION,
6535 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6537 f_gpr = build_decl (BUILTINS_LOCATION,
6538 FIELD_DECL, get_identifier ("gp_offset"),
6539 unsigned_type_node);
6540 f_fpr = build_decl (BUILTINS_LOCATION,
6541 FIELD_DECL, get_identifier ("fp_offset"),
6542 unsigned_type_node);
6543 f_ovf = build_decl (BUILTINS_LOCATION,
6544 FIELD_DECL, get_identifier ("overflow_arg_area"),
6545 ptr_type_node);
6546 f_sav = build_decl (BUILTINS_LOCATION,
6547 FIELD_DECL, get_identifier ("reg_save_area"),
6548 ptr_type_node);
6550 va_list_gpr_counter_field = f_gpr;
6551 va_list_fpr_counter_field = f_fpr;
6553 DECL_FIELD_CONTEXT (f_gpr) = record;
6554 DECL_FIELD_CONTEXT (f_fpr) = record;
6555 DECL_FIELD_CONTEXT (f_ovf) = record;
6556 DECL_FIELD_CONTEXT (f_sav) = record;
6558 TREE_CHAIN (record) = type_decl;
6559 TYPE_NAME (record) = type_decl;
6560 TYPE_FIELDS (record) = f_gpr;
6561 TREE_CHAIN (f_gpr) = f_fpr;
6562 TREE_CHAIN (f_fpr) = f_ovf;
6563 TREE_CHAIN (f_ovf) = f_sav;
6565 layout_type (record);
6567 /* The correct type is an array type of one element. */
6568 return build_array_type (record, build_index_type (size_zero_node));
6571 /* Setup the builtin va_list data type and for 64-bit the additional
6572 calling convention specific va_list data types. */
6574 static tree
6575 ix86_build_builtin_va_list (void)
6577 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6579 /* Initialize abi specific va_list builtin types. */
6580 if (TARGET_64BIT)
6582 tree t;
6583 if (ix86_abi == MS_ABI)
6585 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6586 if (TREE_CODE (t) != RECORD_TYPE)
6587 t = build_variant_type_copy (t);
6588 sysv_va_list_type_node = t;
6590 else
6592 t = ret;
6593 if (TREE_CODE (t) != RECORD_TYPE)
6594 t = build_variant_type_copy (t);
6595 sysv_va_list_type_node = t;
6597 if (ix86_abi != MS_ABI)
6599 t = ix86_build_builtin_va_list_abi (MS_ABI);
6600 if (TREE_CODE (t) != RECORD_TYPE)
6601 t = build_variant_type_copy (t);
6602 ms_va_list_type_node = t;
6604 else
6606 t = ret;
6607 if (TREE_CODE (t) != RECORD_TYPE)
6608 t = build_variant_type_copy (t);
6609 ms_va_list_type_node = t;
6613 return ret;
6616 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6618 static void
6619 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6621 rtx save_area, mem;
6622 rtx label;
6623 rtx label_ref;
6624 rtx tmp_reg;
6625 rtx nsse_reg;
6626 alias_set_type set;
6627 int i;
6628 int regparm = ix86_regparm;
6630 if (cum->call_abi != ix86_abi)
6631 regparm = (ix86_abi != SYSV_ABI
6632 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6634 /* GPR size of varargs save area. */
6635 if (cfun->va_list_gpr_size)
6636 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6637 else
6638 ix86_varargs_gpr_size = 0;
6640 /* FPR size of varargs save area. We don't need it if we don't pass
6641 anything in SSE registers. */
6642 if (cum->sse_nregs && cfun->va_list_fpr_size)
6643 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6644 else
6645 ix86_varargs_fpr_size = 0;
6647 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6648 return;
6650 save_area = frame_pointer_rtx;
6651 set = get_varargs_alias_set ();
6653 for (i = cum->regno;
6654 i < regparm
6655 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6656 i++)
6658 mem = gen_rtx_MEM (Pmode,
6659 plus_constant (save_area, i * UNITS_PER_WORD));
6660 MEM_NOTRAP_P (mem) = 1;
6661 set_mem_alias_set (mem, set);
6662 emit_move_insn (mem, gen_rtx_REG (Pmode,
6663 x86_64_int_parameter_registers[i]));
6666 if (ix86_varargs_fpr_size)
6668 /* Now emit code to save SSE registers. The AX parameter contains number
6669 of SSE parameter registers used to call this function. We use
6670 sse_prologue_save insn template that produces computed jump across
6671 SSE saves. We need some preparation work to get this working. */
6673 label = gen_label_rtx ();
6674 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6676 /* Compute address to jump to :
6677 label - eax*4 + nnamed_sse_arguments*4 Or
6678 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6679 tmp_reg = gen_reg_rtx (Pmode);
6680 nsse_reg = gen_reg_rtx (Pmode);
6681 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6682 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6683 gen_rtx_MULT (Pmode, nsse_reg,
6684 GEN_INT (4))));
6686 /* vmovaps is one byte longer than movaps. */
6687 if (TARGET_AVX)
6688 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6689 gen_rtx_PLUS (Pmode, tmp_reg,
6690 nsse_reg)));
6692 if (cum->sse_regno)
6693 emit_move_insn
6694 (nsse_reg,
6695 gen_rtx_CONST (DImode,
6696 gen_rtx_PLUS (DImode,
6697 label_ref,
6698 GEN_INT (cum->sse_regno
6699 * (TARGET_AVX ? 5 : 4)))));
6700 else
6701 emit_move_insn (nsse_reg, label_ref);
6702 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6704 /* Compute address of memory block we save into. We always use pointer
6705 pointing 127 bytes after first byte to store - this is needed to keep
6706 instruction size limited by 4 bytes (5 bytes for AVX) with one
6707 byte displacement. */
6708 tmp_reg = gen_reg_rtx (Pmode);
6709 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6710 plus_constant (save_area,
6711 ix86_varargs_gpr_size + 127)));
6712 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6713 MEM_NOTRAP_P (mem) = 1;
6714 set_mem_alias_set (mem, set);
6715 set_mem_align (mem, BITS_PER_WORD);
6717 /* And finally do the dirty job! */
6718 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6719 GEN_INT (cum->sse_regno), label));
6723 static void
6724 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6726 alias_set_type set = get_varargs_alias_set ();
6727 int i;
6729 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
6731 rtx reg, mem;
6733 mem = gen_rtx_MEM (Pmode,
6734 plus_constant (virtual_incoming_args_rtx,
6735 i * UNITS_PER_WORD));
6736 MEM_NOTRAP_P (mem) = 1;
6737 set_mem_alias_set (mem, set);
6739 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6740 emit_move_insn (mem, reg);
6744 static void
6745 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6746 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6747 int no_rtl)
6749 CUMULATIVE_ARGS next_cum;
6750 tree fntype;
6752 /* This argument doesn't appear to be used anymore. Which is good,
6753 because the old code here didn't suppress rtl generation. */
6754 gcc_assert (!no_rtl);
6756 if (!TARGET_64BIT)
6757 return;
6759 fntype = TREE_TYPE (current_function_decl);
6761 /* For varargs, we do not want to skip the dummy va_dcl argument.
6762 For stdargs, we do want to skip the last named argument. */
6763 next_cum = *cum;
6764 if (stdarg_p (fntype))
6765 function_arg_advance (&next_cum, mode, type, 1);
6767 if (cum->call_abi == MS_ABI)
6768 setup_incoming_varargs_ms_64 (&next_cum);
6769 else
6770 setup_incoming_varargs_64 (&next_cum);
6773 /* Checks if TYPE is of kind va_list char *. */
6775 static bool
6776 is_va_list_char_pointer (tree type)
6778 tree canonic;
6780 /* For 32-bit it is always true. */
6781 if (!TARGET_64BIT)
6782 return true;
6783 canonic = ix86_canonical_va_list_type (type);
6784 return (canonic == ms_va_list_type_node
6785 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6788 /* Implement va_start. */
6790 static void
6791 ix86_va_start (tree valist, rtx nextarg)
6793 HOST_WIDE_INT words, n_gpr, n_fpr;
6794 tree f_gpr, f_fpr, f_ovf, f_sav;
6795 tree gpr, fpr, ovf, sav, t;
6796 tree type;
6798 /* Only 64bit target needs something special. */
6799 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6801 std_expand_builtin_va_start (valist, nextarg);
6802 return;
6805 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6806 f_fpr = TREE_CHAIN (f_gpr);
6807 f_ovf = TREE_CHAIN (f_fpr);
6808 f_sav = TREE_CHAIN (f_ovf);
6810 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6811 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6812 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6813 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6814 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6816 /* Count number of gp and fp argument registers used. */
6817 words = crtl->args.info.words;
6818 n_gpr = crtl->args.info.regno;
6819 n_fpr = crtl->args.info.sse_regno;
6821 if (cfun->va_list_gpr_size)
6823 type = TREE_TYPE (gpr);
6824 t = build2 (MODIFY_EXPR, type,
6825 gpr, build_int_cst (type, n_gpr * 8));
6826 TREE_SIDE_EFFECTS (t) = 1;
6827 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6830 if (TARGET_SSE && cfun->va_list_fpr_size)
6832 type = TREE_TYPE (fpr);
6833 t = build2 (MODIFY_EXPR, type, fpr,
6834 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6835 TREE_SIDE_EFFECTS (t) = 1;
6836 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6839 /* Find the overflow area. */
6840 type = TREE_TYPE (ovf);
6841 t = make_tree (type, crtl->args.internal_arg_pointer);
6842 if (words != 0)
6843 t = build2 (POINTER_PLUS_EXPR, type, t,
6844 size_int (words * UNITS_PER_WORD));
6845 t = build2 (MODIFY_EXPR, type, ovf, t);
6846 TREE_SIDE_EFFECTS (t) = 1;
6847 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6849 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6851 /* Find the register save area.
6852 Prologue of the function save it right above stack frame. */
6853 type = TREE_TYPE (sav);
6854 t = make_tree (type, frame_pointer_rtx);
6855 if (!ix86_varargs_gpr_size)
6856 t = build2 (POINTER_PLUS_EXPR, type, t,
6857 size_int (-8 * X86_64_REGPARM_MAX));
6858 t = build2 (MODIFY_EXPR, type, sav, t);
6859 TREE_SIDE_EFFECTS (t) = 1;
6860 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6864 /* Implement va_arg. */
6866 static tree
6867 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6868 gimple_seq *post_p)
6870 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6871 tree f_gpr, f_fpr, f_ovf, f_sav;
6872 tree gpr, fpr, ovf, sav, t;
6873 int size, rsize;
6874 tree lab_false, lab_over = NULL_TREE;
6875 tree addr, t2;
6876 rtx container;
6877 int indirect_p = 0;
6878 tree ptrtype;
6879 enum machine_mode nat_mode;
6880 int arg_boundary;
6882 /* Only 64bit target needs something special. */
6883 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6884 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6886 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6887 f_fpr = TREE_CHAIN (f_gpr);
6888 f_ovf = TREE_CHAIN (f_fpr);
6889 f_sav = TREE_CHAIN (f_ovf);
6891 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6892 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6893 valist = build_va_arg_indirect_ref (valist);
6894 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6895 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6896 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6898 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6899 if (indirect_p)
6900 type = build_pointer_type (type);
6901 size = int_size_in_bytes (type);
6902 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6904 nat_mode = type_natural_mode (type, NULL);
6905 switch (nat_mode)
6907 case V8SFmode:
6908 case V8SImode:
6909 case V32QImode:
6910 case V16HImode:
6911 case V4DFmode:
6912 case V4DImode:
6913 /* Unnamed 256bit vector mode parameters are passed on stack. */
6914 if (ix86_cfun_abi () == SYSV_ABI)
6916 container = NULL;
6917 break;
6920 default:
6921 container = construct_container (nat_mode, TYPE_MODE (type),
6922 type, 0, X86_64_REGPARM_MAX,
6923 X86_64_SSE_REGPARM_MAX, intreg,
6925 break;
6928 /* Pull the value out of the saved registers. */
6930 addr = create_tmp_var (ptr_type_node, "addr");
6931 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6933 if (container)
6935 int needed_intregs, needed_sseregs;
6936 bool need_temp;
6937 tree int_addr, sse_addr;
6939 lab_false = create_artificial_label (UNKNOWN_LOCATION);
6940 lab_over = create_artificial_label (UNKNOWN_LOCATION);
6942 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6944 need_temp = (!REG_P (container)
6945 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6946 || TYPE_ALIGN (type) > 128));
6948 /* In case we are passing structure, verify that it is consecutive block
6949 on the register save area. If not we need to do moves. */
6950 if (!need_temp && !REG_P (container))
6952 /* Verify that all registers are strictly consecutive */
6953 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6955 int i;
6957 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6959 rtx slot = XVECEXP (container, 0, i);
6960 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6961 || INTVAL (XEXP (slot, 1)) != i * 16)
6962 need_temp = 1;
6965 else
6967 int i;
6969 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6971 rtx slot = XVECEXP (container, 0, i);
6972 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6973 || INTVAL (XEXP (slot, 1)) != i * 8)
6974 need_temp = 1;
6978 if (!need_temp)
6980 int_addr = addr;
6981 sse_addr = addr;
6983 else
6985 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6986 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6987 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6988 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6991 /* First ensure that we fit completely in registers. */
6992 if (needed_intregs)
6994 t = build_int_cst (TREE_TYPE (gpr),
6995 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6996 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6997 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6998 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6999 gimplify_and_add (t, pre_p);
7001 if (needed_sseregs)
7003 t = build_int_cst (TREE_TYPE (fpr),
7004 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7005 + X86_64_REGPARM_MAX * 8);
7006 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7007 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7008 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7009 gimplify_and_add (t, pre_p);
7012 /* Compute index to start of area used for integer regs. */
7013 if (needed_intregs)
7015 /* int_addr = gpr + sav; */
7016 t = fold_convert (sizetype, gpr);
7017 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7018 gimplify_assign (int_addr, t, pre_p);
7020 if (needed_sseregs)
7022 /* sse_addr = fpr + sav; */
7023 t = fold_convert (sizetype, fpr);
7024 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7025 gimplify_assign (sse_addr, t, pre_p);
7027 if (need_temp)
7029 int i;
7030 tree temp = create_tmp_var (type, "va_arg_tmp");
7032 /* addr = &temp; */
7033 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7034 gimplify_assign (addr, t, pre_p);
7036 for (i = 0; i < XVECLEN (container, 0); i++)
7038 rtx slot = XVECEXP (container, 0, i);
7039 rtx reg = XEXP (slot, 0);
7040 enum machine_mode mode = GET_MODE (reg);
7041 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7042 tree addr_type = build_pointer_type (piece_type);
7043 tree daddr_type = build_pointer_type_for_mode (piece_type,
7044 ptr_mode, true);
7045 tree src_addr, src;
7046 int src_offset;
7047 tree dest_addr, dest;
7049 if (SSE_REGNO_P (REGNO (reg)))
7051 src_addr = sse_addr;
7052 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7054 else
7056 src_addr = int_addr;
7057 src_offset = REGNO (reg) * 8;
7059 src_addr = fold_convert (addr_type, src_addr);
7060 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7061 size_int (src_offset));
7062 src = build_va_arg_indirect_ref (src_addr);
7064 dest_addr = fold_convert (daddr_type, addr);
7065 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7066 size_int (INTVAL (XEXP (slot, 1))));
7067 dest = build_va_arg_indirect_ref (dest_addr);
7069 gimplify_assign (dest, src, pre_p);
7073 if (needed_intregs)
7075 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7076 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7077 gimplify_assign (gpr, t, pre_p);
7080 if (needed_sseregs)
7082 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7083 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7084 gimplify_assign (fpr, t, pre_p);
7087 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7089 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7092 /* ... otherwise out of the overflow area. */
7094 /* When we align parameter on stack for caller, if the parameter
7095 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7096 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7097 here with caller. */
7098 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7099 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7100 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7102 /* Care for on-stack alignment if needed. */
7103 if (arg_boundary <= 64
7104 || integer_zerop (TYPE_SIZE (type)))
7105 t = ovf;
7106 else
7108 HOST_WIDE_INT align = arg_boundary / 8;
7109 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7110 size_int (align - 1));
7111 t = fold_convert (sizetype, t);
7112 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7113 size_int (-align));
7114 t = fold_convert (TREE_TYPE (ovf), t);
7116 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7117 gimplify_assign (addr, t, pre_p);
7119 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7120 size_int (rsize * UNITS_PER_WORD));
7121 gimplify_assign (unshare_expr (ovf), t, pre_p);
7123 if (container)
7124 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7126 ptrtype = build_pointer_type (type);
7127 addr = fold_convert (ptrtype, addr);
7129 if (indirect_p)
7130 addr = build_va_arg_indirect_ref (addr);
7131 return build_va_arg_indirect_ref (addr);
7134 /* Return nonzero if OPNUM's MEM should be matched
7135 in movabs* patterns. */
7138 ix86_check_movabs (rtx insn, int opnum)
7140 rtx set, mem;
7142 set = PATTERN (insn);
7143 if (GET_CODE (set) == PARALLEL)
7144 set = XVECEXP (set, 0, 0);
7145 gcc_assert (GET_CODE (set) == SET);
7146 mem = XEXP (set, opnum);
7147 while (GET_CODE (mem) == SUBREG)
7148 mem = SUBREG_REG (mem);
7149 gcc_assert (MEM_P (mem));
7150 return (volatile_ok || !MEM_VOLATILE_P (mem));
7153 /* Initialize the table of extra 80387 mathematical constants. */
7155 static void
7156 init_ext_80387_constants (void)
7158 static const char * cst[5] =
7160 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7161 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7162 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7163 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7164 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7166 int i;
7168 for (i = 0; i < 5; i++)
7170 real_from_string (&ext_80387_constants_table[i], cst[i]);
7171 /* Ensure each constant is rounded to XFmode precision. */
7172 real_convert (&ext_80387_constants_table[i],
7173 XFmode, &ext_80387_constants_table[i]);
7176 ext_80387_constants_init = 1;
7179 /* Return true if the constant is something that can be loaded with
7180 a special instruction. */
7183 standard_80387_constant_p (rtx x)
7185 enum machine_mode mode = GET_MODE (x);
7187 REAL_VALUE_TYPE r;
7189 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7190 return -1;
7192 if (x == CONST0_RTX (mode))
7193 return 1;
7194 if (x == CONST1_RTX (mode))
7195 return 2;
7197 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7199 /* For XFmode constants, try to find a special 80387 instruction when
7200 optimizing for size or on those CPUs that benefit from them. */
7201 if (mode == XFmode
7202 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7204 int i;
7206 if (! ext_80387_constants_init)
7207 init_ext_80387_constants ();
7209 for (i = 0; i < 5; i++)
7210 if (real_identical (&r, &ext_80387_constants_table[i]))
7211 return i + 3;
7214 /* Load of the constant -0.0 or -1.0 will be split as
7215 fldz;fchs or fld1;fchs sequence. */
7216 if (real_isnegzero (&r))
7217 return 8;
7218 if (real_identical (&r, &dconstm1))
7219 return 9;
7221 return 0;
7224 /* Return the opcode of the special instruction to be used to load
7225 the constant X. */
7227 const char *
7228 standard_80387_constant_opcode (rtx x)
7230 switch (standard_80387_constant_p (x))
7232 case 1:
7233 return "fldz";
7234 case 2:
7235 return "fld1";
7236 case 3:
7237 return "fldlg2";
7238 case 4:
7239 return "fldln2";
7240 case 5:
7241 return "fldl2e";
7242 case 6:
7243 return "fldl2t";
7244 case 7:
7245 return "fldpi";
7246 case 8:
7247 case 9:
7248 return "#";
7249 default:
7250 gcc_unreachable ();
7254 /* Return the CONST_DOUBLE representing the 80387 constant that is
7255 loaded by the specified special instruction. The argument IDX
7256 matches the return value from standard_80387_constant_p. */
7259 standard_80387_constant_rtx (int idx)
7261 int i;
7263 if (! ext_80387_constants_init)
7264 init_ext_80387_constants ();
7266 switch (idx)
7268 case 3:
7269 case 4:
7270 case 5:
7271 case 6:
7272 case 7:
7273 i = idx - 3;
7274 break;
7276 default:
7277 gcc_unreachable ();
7280 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7281 XFmode);
7284 /* Return 1 if mode is a valid mode for sse. */
7285 static int
7286 standard_sse_mode_p (enum machine_mode mode)
7288 switch (mode)
7290 case V16QImode:
7291 case V8HImode:
7292 case V4SImode:
7293 case V2DImode:
7294 case V4SFmode:
7295 case V2DFmode:
7296 return 1;
7298 default:
7299 return 0;
7303 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7304 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7305 modes and AVX is enabled. */
7308 standard_sse_constant_p (rtx x)
7310 enum machine_mode mode = GET_MODE (x);
7312 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7313 return 1;
7314 if (vector_all_ones_operand (x, mode))
7316 if (standard_sse_mode_p (mode))
7317 return TARGET_SSE2 ? 2 : -2;
7318 else if (VALID_AVX256_REG_MODE (mode))
7319 return TARGET_AVX ? 3 : -3;
7322 return 0;
7325 /* Return the opcode of the special instruction to be used to load
7326 the constant X. */
7328 const char *
7329 standard_sse_constant_opcode (rtx insn, rtx x)
7331 switch (standard_sse_constant_p (x))
7333 case 1:
7334 switch (get_attr_mode (insn))
7336 case MODE_V4SF:
7337 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7338 case MODE_V2DF:
7339 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7340 case MODE_TI:
7341 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7342 case MODE_V8SF:
7343 return "vxorps\t%x0, %x0, %x0";
7344 case MODE_V4DF:
7345 return "vxorpd\t%x0, %x0, %x0";
7346 case MODE_OI:
7347 return "vpxor\t%x0, %x0, %x0";
7348 default:
7349 gcc_unreachable ();
7351 case 2:
7352 if (TARGET_AVX)
7353 switch (get_attr_mode (insn))
7355 case MODE_V4SF:
7356 case MODE_V2DF:
7357 case MODE_TI:
7358 return "vpcmpeqd\t%0, %0, %0";
7359 break;
7360 default:
7361 gcc_unreachable ();
7363 else
7364 return "pcmpeqd\t%0, %0";
7366 gcc_unreachable ();
7369 /* Returns 1 if OP contains a symbol reference */
7372 symbolic_reference_mentioned_p (rtx op)
7374 const char *fmt;
7375 int i;
7377 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7378 return 1;
7380 fmt = GET_RTX_FORMAT (GET_CODE (op));
7381 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7383 if (fmt[i] == 'E')
7385 int j;
7387 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7388 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7389 return 1;
7392 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7393 return 1;
7396 return 0;
7399 /* Return 1 if it is appropriate to emit `ret' instructions in the
7400 body of a function. Do this only if the epilogue is simple, needing a
7401 couple of insns. Prior to reloading, we can't tell how many registers
7402 must be saved, so return 0 then. Return 0 if there is no frame
7403 marker to de-allocate. */
7406 ix86_can_use_return_insn_p (void)
7408 struct ix86_frame frame;
7410 if (! reload_completed || frame_pointer_needed)
7411 return 0;
7413 /* Don't allow more than 32 pop, since that's all we can do
7414 with one instruction. */
7415 if (crtl->args.pops_args
7416 && crtl->args.size >= 32768)
7417 return 0;
7419 ix86_compute_frame_layout (&frame);
7420 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7423 /* Value should be nonzero if functions must have frame pointers.
7424 Zero means the frame pointer need not be set up (and parms may
7425 be accessed via the stack pointer) in functions that seem suitable. */
7428 ix86_frame_pointer_required (void)
7430 /* If we accessed previous frames, then the generated code expects
7431 to be able to access the saved ebp value in our frame. */
7432 if (cfun->machine->accesses_prev_frame)
7433 return 1;
7435 /* Several x86 os'es need a frame pointer for other reasons,
7436 usually pertaining to setjmp. */
7437 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7438 return 1;
7440 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7441 the frame pointer by default. Turn it back on now if we've not
7442 got a leaf function. */
7443 if (TARGET_OMIT_LEAF_FRAME_POINTER
7444 && (!current_function_is_leaf
7445 || ix86_current_function_calls_tls_descriptor))
7446 return 1;
7448 if (crtl->profile)
7449 return 1;
7451 return 0;
7454 /* Record that the current function accesses previous call frames. */
7456 void
7457 ix86_setup_frame_addresses (void)
7459 cfun->machine->accesses_prev_frame = 1;
7462 #ifndef USE_HIDDEN_LINKONCE
7463 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7464 # define USE_HIDDEN_LINKONCE 1
7465 # else
7466 # define USE_HIDDEN_LINKONCE 0
7467 # endif
7468 #endif
7470 static int pic_labels_used;
7472 /* Fills in the label name that should be used for a pc thunk for
7473 the given register. */
7475 static void
7476 get_pc_thunk_name (char name[32], unsigned int regno)
7478 gcc_assert (!TARGET_64BIT);
7480 if (USE_HIDDEN_LINKONCE)
7481 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7482 else
7483 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7487 /* This function generates code for -fpic that loads %ebx with
7488 the return address of the caller and then returns. */
7490 void
7491 ix86_file_end (void)
7493 rtx xops[2];
7494 int regno;
7496 for (regno = 0; regno < 8; ++regno)
7498 char name[32];
7500 if (! ((pic_labels_used >> regno) & 1))
7501 continue;
7503 get_pc_thunk_name (name, regno);
7505 #if TARGET_MACHO
7506 if (TARGET_MACHO)
7508 switch_to_section (darwin_sections[text_coal_section]);
7509 fputs ("\t.weak_definition\t", asm_out_file);
7510 assemble_name (asm_out_file, name);
7511 fputs ("\n\t.private_extern\t", asm_out_file);
7512 assemble_name (asm_out_file, name);
7513 fputs ("\n", asm_out_file);
7514 ASM_OUTPUT_LABEL (asm_out_file, name);
7516 else
7517 #endif
7518 if (USE_HIDDEN_LINKONCE)
7520 tree decl;
7522 decl = build_decl (BUILTINS_LOCATION,
7523 FUNCTION_DECL, get_identifier (name),
7524 error_mark_node);
7525 TREE_PUBLIC (decl) = 1;
7526 TREE_STATIC (decl) = 1;
7527 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7529 (*targetm.asm_out.unique_section) (decl, 0);
7530 switch_to_section (get_named_section (decl, NULL, 0));
7532 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7533 fputs ("\t.hidden\t", asm_out_file);
7534 assemble_name (asm_out_file, name);
7535 fputc ('\n', asm_out_file);
7536 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7538 else
7540 switch_to_section (text_section);
7541 ASM_OUTPUT_LABEL (asm_out_file, name);
7544 xops[0] = gen_rtx_REG (Pmode, regno);
7545 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7546 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7547 output_asm_insn ("ret", xops);
7550 if (NEED_INDICATE_EXEC_STACK)
7551 file_end_indicate_exec_stack ();
7554 /* Emit code for the SET_GOT patterns. */
7556 const char *
7557 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7559 rtx xops[3];
7561 xops[0] = dest;
7563 if (TARGET_VXWORKS_RTP && flag_pic)
7565 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7566 xops[2] = gen_rtx_MEM (Pmode,
7567 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7568 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7570 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7571 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7572 an unadorned address. */
7573 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7574 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7575 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7576 return "";
7579 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7581 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7583 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7585 if (!flag_pic)
7586 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7587 else
7588 output_asm_insn ("call\t%a2", xops);
7590 #if TARGET_MACHO
7591 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7592 is what will be referenced by the Mach-O PIC subsystem. */
7593 if (!label)
7594 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7595 #endif
7597 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7598 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7600 if (flag_pic)
7601 output_asm_insn ("pop%z0\t%0", xops);
7603 else
7605 char name[32];
7606 get_pc_thunk_name (name, REGNO (dest));
7607 pic_labels_used |= 1 << REGNO (dest);
7609 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7610 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7611 output_asm_insn ("call\t%X2", xops);
7612 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7613 is what will be referenced by the Mach-O PIC subsystem. */
7614 #if TARGET_MACHO
7615 if (!label)
7616 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7617 else
7618 targetm.asm_out.internal_label (asm_out_file, "L",
7619 CODE_LABEL_NUMBER (label));
7620 #endif
7623 if (TARGET_MACHO)
7624 return "";
7626 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7627 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7628 else
7629 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7631 return "";
7634 /* Generate an "push" pattern for input ARG. */
7636 static rtx
7637 gen_push (rtx arg)
7639 if (ix86_cfa_state->reg == stack_pointer_rtx)
7640 ix86_cfa_state->offset += UNITS_PER_WORD;
7642 return gen_rtx_SET (VOIDmode,
7643 gen_rtx_MEM (Pmode,
7644 gen_rtx_PRE_DEC (Pmode,
7645 stack_pointer_rtx)),
7646 arg);
7649 /* Return >= 0 if there is an unused call-clobbered register available
7650 for the entire function. */
7652 static unsigned int
7653 ix86_select_alt_pic_regnum (void)
7655 if (current_function_is_leaf && !crtl->profile
7656 && !ix86_current_function_calls_tls_descriptor)
7658 int i, drap;
7659 /* Can't use the same register for both PIC and DRAP. */
7660 if (crtl->drap_reg)
7661 drap = REGNO (crtl->drap_reg);
7662 else
7663 drap = -1;
7664 for (i = 2; i >= 0; --i)
7665 if (i != drap && !df_regs_ever_live_p (i))
7666 return i;
7669 return INVALID_REGNUM;
7672 /* Return 1 if we need to save REGNO. */
7673 static int
7674 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7676 if (pic_offset_table_rtx
7677 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7678 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7679 || crtl->profile
7680 || crtl->calls_eh_return
7681 || crtl->uses_const_pool))
7683 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7684 return 0;
7685 return 1;
7688 if (crtl->calls_eh_return && maybe_eh_return)
7690 unsigned i;
7691 for (i = 0; ; i++)
7693 unsigned test = EH_RETURN_DATA_REGNO (i);
7694 if (test == INVALID_REGNUM)
7695 break;
7696 if (test == regno)
7697 return 1;
7701 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7702 return 1;
7704 return (df_regs_ever_live_p (regno)
7705 && !call_used_regs[regno]
7706 && !fixed_regs[regno]
7707 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7710 /* Return number of saved general prupose registers. */
7712 static int
7713 ix86_nsaved_regs (void)
7715 int nregs = 0;
7716 int regno;
7718 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7719 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7720 nregs ++;
7721 return nregs;
7724 /* Return number of saved SSE registrers. */
7726 static int
7727 ix86_nsaved_sseregs (void)
7729 int nregs = 0;
7730 int regno;
7732 if (ix86_cfun_abi () != MS_ABI)
7733 return 0;
7734 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7735 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7736 nregs ++;
7737 return nregs;
7740 /* Given FROM and TO register numbers, say whether this elimination is
7741 allowed. If stack alignment is needed, we can only replace argument
7742 pointer with hard frame pointer, or replace frame pointer with stack
7743 pointer. Otherwise, frame pointer elimination is automatically
7744 handled and all other eliminations are valid. */
7747 ix86_can_eliminate (int from, int to)
7749 if (stack_realign_fp)
7750 return ((from == ARG_POINTER_REGNUM
7751 && to == HARD_FRAME_POINTER_REGNUM)
7752 || (from == FRAME_POINTER_REGNUM
7753 && to == STACK_POINTER_REGNUM));
7754 else
7755 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7758 /* Return the offset between two registers, one to be eliminated, and the other
7759 its replacement, at the start of a routine. */
7761 HOST_WIDE_INT
7762 ix86_initial_elimination_offset (int from, int to)
7764 struct ix86_frame frame;
7765 ix86_compute_frame_layout (&frame);
7767 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7768 return frame.hard_frame_pointer_offset;
7769 else if (from == FRAME_POINTER_REGNUM
7770 && to == HARD_FRAME_POINTER_REGNUM)
7771 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7772 else
7774 gcc_assert (to == STACK_POINTER_REGNUM);
7776 if (from == ARG_POINTER_REGNUM)
7777 return frame.stack_pointer_offset;
7779 gcc_assert (from == FRAME_POINTER_REGNUM);
7780 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7784 /* In a dynamically-aligned function, we can't know the offset from
7785 stack pointer to frame pointer, so we must ensure that setjmp
7786 eliminates fp against the hard fp (%ebp) rather than trying to
7787 index from %esp up to the top of the frame across a gap that is
7788 of unknown (at compile-time) size. */
7789 static rtx
7790 ix86_builtin_setjmp_frame_value (void)
7792 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7795 /* Fill structure ix86_frame about frame of currently computed function. */
7797 static void
7798 ix86_compute_frame_layout (struct ix86_frame *frame)
7800 HOST_WIDE_INT total_size;
7801 unsigned int stack_alignment_needed;
7802 HOST_WIDE_INT offset;
7803 unsigned int preferred_alignment;
7804 HOST_WIDE_INT size = get_frame_size ();
7806 frame->nregs = ix86_nsaved_regs ();
7807 frame->nsseregs = ix86_nsaved_sseregs ();
7808 total_size = size;
7810 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7811 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7813 /* MS ABI seem to require stack alignment to be always 16 except for function
7814 prologues. */
7815 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7817 preferred_alignment = 16;
7818 stack_alignment_needed = 16;
7819 crtl->preferred_stack_boundary = 128;
7820 crtl->stack_alignment_needed = 128;
7823 gcc_assert (!size || stack_alignment_needed);
7824 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7825 gcc_assert (preferred_alignment <= stack_alignment_needed);
7827 /* During reload iteration the amount of registers saved can change.
7828 Recompute the value as needed. Do not recompute when amount of registers
7829 didn't change as reload does multiple calls to the function and does not
7830 expect the decision to change within single iteration. */
7831 if (!optimize_function_for_size_p (cfun)
7832 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7834 int count = frame->nregs;
7836 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7837 /* The fast prologue uses move instead of push to save registers. This
7838 is significantly longer, but also executes faster as modern hardware
7839 can execute the moves in parallel, but can't do that for push/pop.
7841 Be careful about choosing what prologue to emit: When function takes
7842 many instructions to execute we may use slow version as well as in
7843 case function is known to be outside hot spot (this is known with
7844 feedback only). Weight the size of function by number of registers
7845 to save as it is cheap to use one or two push instructions but very
7846 slow to use many of them. */
7847 if (count)
7848 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7849 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7850 || (flag_branch_probabilities
7851 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7852 cfun->machine->use_fast_prologue_epilogue = false;
7853 else
7854 cfun->machine->use_fast_prologue_epilogue
7855 = !expensive_function_p (count);
7857 if (TARGET_PROLOGUE_USING_MOVE
7858 && cfun->machine->use_fast_prologue_epilogue)
7859 frame->save_regs_using_mov = true;
7860 else
7861 frame->save_regs_using_mov = false;
7864 /* Skip return address and saved base pointer. */
7865 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7867 frame->hard_frame_pointer_offset = offset;
7869 /* Set offset to aligned because the realigned frame starts from
7870 here. */
7871 if (stack_realign_fp)
7872 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7874 /* Register save area */
7875 offset += frame->nregs * UNITS_PER_WORD;
7877 /* Align SSE reg save area. */
7878 if (frame->nsseregs)
7879 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7880 else
7881 frame->padding0 = 0;
7883 /* SSE register save area. */
7884 offset += frame->padding0 + frame->nsseregs * 16;
7886 /* Va-arg area */
7887 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7888 offset += frame->va_arg_size;
7890 /* Align start of frame for local function. */
7891 frame->padding1 = ((offset + stack_alignment_needed - 1)
7892 & -stack_alignment_needed) - offset;
7894 offset += frame->padding1;
7896 /* Frame pointer points here. */
7897 frame->frame_pointer_offset = offset;
7899 offset += size;
7901 /* Add outgoing arguments area. Can be skipped if we eliminated
7902 all the function calls as dead code.
7903 Skipping is however impossible when function calls alloca. Alloca
7904 expander assumes that last crtl->outgoing_args_size
7905 of stack frame are unused. */
7906 if (ACCUMULATE_OUTGOING_ARGS
7907 && (!current_function_is_leaf || cfun->calls_alloca
7908 || ix86_current_function_calls_tls_descriptor))
7910 offset += crtl->outgoing_args_size;
7911 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7913 else
7914 frame->outgoing_arguments_size = 0;
7916 /* Align stack boundary. Only needed if we're calling another function
7917 or using alloca. */
7918 if (!current_function_is_leaf || cfun->calls_alloca
7919 || ix86_current_function_calls_tls_descriptor)
7920 frame->padding2 = ((offset + preferred_alignment - 1)
7921 & -preferred_alignment) - offset;
7922 else
7923 frame->padding2 = 0;
7925 offset += frame->padding2;
7927 /* We've reached end of stack frame. */
7928 frame->stack_pointer_offset = offset;
7930 /* Size prologue needs to allocate. */
7931 frame->to_allocate =
7932 (size + frame->padding1 + frame->padding2
7933 + frame->outgoing_arguments_size + frame->va_arg_size);
7935 if ((!frame->to_allocate && frame->nregs <= 1)
7936 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7937 frame->save_regs_using_mov = false;
7939 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7940 && current_function_is_leaf
7941 && !ix86_current_function_calls_tls_descriptor)
7943 frame->red_zone_size = frame->to_allocate;
7944 if (frame->save_regs_using_mov)
7945 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7946 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7947 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7949 else
7950 frame->red_zone_size = 0;
7951 frame->to_allocate -= frame->red_zone_size;
7952 frame->stack_pointer_offset -= frame->red_zone_size;
7953 #if 0
7954 fprintf (stderr, "\n");
7955 fprintf (stderr, "size: %ld\n", (long)size);
7956 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7957 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7958 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7959 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7960 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7961 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7962 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7963 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7964 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7965 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7966 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7967 (long)frame->hard_frame_pointer_offset);
7968 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7969 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7970 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7971 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7972 #endif
7975 /* Emit code to save registers in the prologue. */
7977 static void
7978 ix86_emit_save_regs (void)
7980 unsigned int regno;
7981 rtx insn;
7983 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7984 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7986 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7987 RTX_FRAME_RELATED_P (insn) = 1;
7991 /* Emit code to save registers using MOV insns. First register
7992 is restored from POINTER + OFFSET. */
7993 static void
7994 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7996 unsigned int regno;
7997 rtx insn;
7999 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8000 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8002 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8003 Pmode, offset),
8004 gen_rtx_REG (Pmode, regno));
8005 RTX_FRAME_RELATED_P (insn) = 1;
8006 offset += UNITS_PER_WORD;
8010 /* Emit code to save registers using MOV insns. First register
8011 is restored from POINTER + OFFSET. */
8012 static void
8013 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8015 unsigned int regno;
8016 rtx insn;
8017 rtx mem;
8019 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8020 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8022 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8023 set_mem_align (mem, 128);
8024 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8025 RTX_FRAME_RELATED_P (insn) = 1;
8026 offset += 16;
8030 static GTY(()) rtx queued_cfa_restores;
8032 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8033 manipulation insn. Don't add it if the previously
8034 saved value will be left untouched within stack red-zone till return,
8035 as unwinders can find the same value in the register and
8036 on the stack. */
8038 static void
8039 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8041 if (TARGET_RED_ZONE
8042 && !TARGET_64BIT_MS_ABI
8043 && red_offset + RED_ZONE_SIZE >= 0
8044 && crtl->args.pops_args < 65536)
8045 return;
8047 if (insn)
8049 add_reg_note (insn, REG_CFA_RESTORE, reg);
8050 RTX_FRAME_RELATED_P (insn) = 1;
8052 else
8053 queued_cfa_restores
8054 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8057 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8059 static void
8060 ix86_add_queued_cfa_restore_notes (rtx insn)
8062 rtx last;
8063 if (!queued_cfa_restores)
8064 return;
8065 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8067 XEXP (last, 1) = REG_NOTES (insn);
8068 REG_NOTES (insn) = queued_cfa_restores;
8069 queued_cfa_restores = NULL_RTX;
8070 RTX_FRAME_RELATED_P (insn) = 1;
8073 /* Expand prologue or epilogue stack adjustment.
8074 The pattern exist to put a dependency on all ebp-based memory accesses.
8075 STYLE should be negative if instructions should be marked as frame related,
8076 zero if %r11 register is live and cannot be freely used and positive
8077 otherwise. */
8079 static void
8080 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8081 int style, bool set_cfa)
8083 rtx insn;
8085 if (! TARGET_64BIT)
8086 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8087 else if (x86_64_immediate_operand (offset, DImode))
8088 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8089 else
8091 rtx r11;
8092 /* r11 is used by indirect sibcall return as well, set before the
8093 epilogue and used after the epilogue. ATM indirect sibcall
8094 shouldn't be used together with huge frame sizes in one
8095 function because of the frame_size check in sibcall.c. */
8096 gcc_assert (style);
8097 r11 = gen_rtx_REG (DImode, R11_REG);
8098 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8099 if (style < 0)
8100 RTX_FRAME_RELATED_P (insn) = 1;
8101 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8102 offset));
8105 if (style >= 0)
8106 ix86_add_queued_cfa_restore_notes (insn);
8108 if (set_cfa)
8110 rtx r;
8112 gcc_assert (ix86_cfa_state->reg == src);
8113 ix86_cfa_state->offset += INTVAL (offset);
8114 ix86_cfa_state->reg = dest;
8116 r = gen_rtx_PLUS (Pmode, src, offset);
8117 r = gen_rtx_SET (VOIDmode, dest, r);
8118 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8119 RTX_FRAME_RELATED_P (insn) = 1;
8121 else if (style < 0)
8122 RTX_FRAME_RELATED_P (insn) = 1;
8125 /* Find an available register to be used as dynamic realign argument
8126 pointer regsiter. Such a register will be written in prologue and
8127 used in begin of body, so it must not be
8128 1. parameter passing register.
8129 2. GOT pointer.
8130 We reuse static-chain register if it is available. Otherwise, we
8131 use DI for i386 and R13 for x86-64. We chose R13 since it has
8132 shorter encoding.
8134 Return: the regno of chosen register. */
8136 static unsigned int
8137 find_drap_reg (void)
8139 tree decl = cfun->decl;
8141 if (TARGET_64BIT)
8143 /* Use R13 for nested function or function need static chain.
8144 Since function with tail call may use any caller-saved
8145 registers in epilogue, DRAP must not use caller-saved
8146 register in such case. */
8147 if ((decl_function_context (decl)
8148 && !DECL_NO_STATIC_CHAIN (decl))
8149 || crtl->tail_call_emit)
8150 return R13_REG;
8152 return R10_REG;
8154 else
8156 /* Use DI for nested function or function need static chain.
8157 Since function with tail call may use any caller-saved
8158 registers in epilogue, DRAP must not use caller-saved
8159 register in such case. */
8160 if ((decl_function_context (decl)
8161 && !DECL_NO_STATIC_CHAIN (decl))
8162 || crtl->tail_call_emit)
8163 return DI_REG;
8165 /* Reuse static chain register if it isn't used for parameter
8166 passing. */
8167 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8168 && !lookup_attribute ("fastcall",
8169 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8170 return CX_REG;
8171 else
8172 return DI_REG;
8176 /* Update incoming stack boundary and estimated stack alignment. */
8178 static void
8179 ix86_update_stack_boundary (void)
8181 /* Prefer the one specified at command line. */
8182 ix86_incoming_stack_boundary
8183 = (ix86_user_incoming_stack_boundary
8184 ? ix86_user_incoming_stack_boundary
8185 : ix86_default_incoming_stack_boundary);
8187 /* Incoming stack alignment can be changed on individual functions
8188 via force_align_arg_pointer attribute. We use the smallest
8189 incoming stack boundary. */
8190 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8191 && lookup_attribute (ix86_force_align_arg_pointer_string,
8192 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8193 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8195 /* The incoming stack frame has to be aligned at least at
8196 parm_stack_boundary. */
8197 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8198 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8200 /* Stack at entrance of main is aligned by runtime. We use the
8201 smallest incoming stack boundary. */
8202 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8203 && DECL_NAME (current_function_decl)
8204 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8205 && DECL_FILE_SCOPE_P (current_function_decl))
8206 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8208 /* x86_64 vararg needs 16byte stack alignment for register save
8209 area. */
8210 if (TARGET_64BIT
8211 && cfun->stdarg
8212 && crtl->stack_alignment_estimated < 128)
8213 crtl->stack_alignment_estimated = 128;
8216 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8217 needed or an rtx for DRAP otherwise. */
8219 static rtx
8220 ix86_get_drap_rtx (void)
8222 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8223 crtl->need_drap = true;
8225 if (stack_realign_drap)
8227 /* Assign DRAP to vDRAP and returns vDRAP */
8228 unsigned int regno = find_drap_reg ();
8229 rtx drap_vreg;
8230 rtx arg_ptr;
8231 rtx seq, insn;
8233 arg_ptr = gen_rtx_REG (Pmode, regno);
8234 crtl->drap_reg = arg_ptr;
8236 start_sequence ();
8237 drap_vreg = copy_to_reg (arg_ptr);
8238 seq = get_insns ();
8239 end_sequence ();
8241 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8242 RTX_FRAME_RELATED_P (insn) = 1;
8243 return drap_vreg;
8245 else
8246 return NULL;
8249 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8251 static rtx
8252 ix86_internal_arg_pointer (void)
8254 return virtual_incoming_args_rtx;
8257 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8258 to be generated in correct form. */
8259 static void
8260 ix86_finalize_stack_realign_flags (void)
8262 /* Check if stack realign is really needed after reload, and
8263 stores result in cfun */
8264 unsigned int incoming_stack_boundary
8265 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8266 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8267 unsigned int stack_realign = (incoming_stack_boundary
8268 < (current_function_is_leaf
8269 ? crtl->max_used_stack_slot_alignment
8270 : crtl->stack_alignment_needed));
8272 if (crtl->stack_realign_finalized)
8274 /* After stack_realign_needed is finalized, we can't no longer
8275 change it. */
8276 gcc_assert (crtl->stack_realign_needed == stack_realign);
8278 else
8280 crtl->stack_realign_needed = stack_realign;
8281 crtl->stack_realign_finalized = true;
8285 /* Expand the prologue into a bunch of separate insns. */
8287 void
8288 ix86_expand_prologue (void)
8290 rtx insn;
8291 bool pic_reg_used;
8292 struct ix86_frame frame;
8293 HOST_WIDE_INT allocate;
8295 ix86_finalize_stack_realign_flags ();
8297 /* DRAP should not coexist with stack_realign_fp */
8298 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8300 /* Initialize CFA state for before the prologue. */
8301 ix86_cfa_state->reg = stack_pointer_rtx;
8302 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8304 ix86_compute_frame_layout (&frame);
8306 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8307 of DRAP is needed and stack realignment is really needed after reload */
8308 if (crtl->drap_reg && crtl->stack_realign_needed)
8310 rtx x, y;
8311 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8312 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8313 ? 0 : UNITS_PER_WORD);
8315 gcc_assert (stack_realign_drap);
8317 /* Grab the argument pointer. */
8318 x = plus_constant (stack_pointer_rtx,
8319 (UNITS_PER_WORD + param_ptr_offset));
8320 y = crtl->drap_reg;
8322 /* Only need to push parameter pointer reg if it is caller
8323 saved reg */
8324 if (!call_used_regs[REGNO (crtl->drap_reg)])
8326 /* Push arg pointer reg */
8327 insn = emit_insn (gen_push (y));
8328 RTX_FRAME_RELATED_P (insn) = 1;
8331 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8332 RTX_FRAME_RELATED_P (insn) = 1;
8333 ix86_cfa_state->reg = crtl->drap_reg;
8335 /* Align the stack. */
8336 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8337 stack_pointer_rtx,
8338 GEN_INT (-align_bytes)));
8339 RTX_FRAME_RELATED_P (insn) = 1;
8341 /* Replicate the return address on the stack so that return
8342 address can be reached via (argp - 1) slot. This is needed
8343 to implement macro RETURN_ADDR_RTX and intrinsic function
8344 expand_builtin_return_addr etc. */
8345 x = crtl->drap_reg;
8346 x = gen_frame_mem (Pmode,
8347 plus_constant (x, -UNITS_PER_WORD));
8348 insn = emit_insn (gen_push (x));
8349 RTX_FRAME_RELATED_P (insn) = 1;
8352 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8353 slower on all targets. Also sdb doesn't like it. */
8355 if (frame_pointer_needed)
8357 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8358 RTX_FRAME_RELATED_P (insn) = 1;
8360 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8361 RTX_FRAME_RELATED_P (insn) = 1;
8363 if (ix86_cfa_state->reg == stack_pointer_rtx)
8364 ix86_cfa_state->reg = hard_frame_pointer_rtx;
8367 if (stack_realign_fp)
8369 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8370 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8372 /* Align the stack. */
8373 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8374 stack_pointer_rtx,
8375 GEN_INT (-align_bytes)));
8376 RTX_FRAME_RELATED_P (insn) = 1;
8379 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8381 if (!frame.save_regs_using_mov)
8382 ix86_emit_save_regs ();
8383 else
8384 allocate += frame.nregs * UNITS_PER_WORD;
8386 /* When using red zone we may start register saving before allocating
8387 the stack frame saving one cycle of the prologue. However I will
8388 avoid doing this if I am going to have to probe the stack since
8389 at least on x86_64 the stack probe can turn into a call that clobbers
8390 a red zone location */
8391 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8392 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8393 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8394 && !crtl->stack_realign_needed)
8395 ? hard_frame_pointer_rtx
8396 : stack_pointer_rtx,
8397 -frame.nregs * UNITS_PER_WORD);
8399 if (allocate == 0)
8401 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8402 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8403 GEN_INT (-allocate), -1,
8404 ix86_cfa_state->reg == stack_pointer_rtx);
8405 else
8407 /* Only valid for Win32. */
8408 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8409 bool eax_live;
8410 rtx t;
8412 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8414 if (cfun->machine->call_abi == MS_ABI)
8415 eax_live = false;
8416 else
8417 eax_live = ix86_eax_live_at_start_p ();
8419 if (eax_live)
8421 emit_insn (gen_push (eax));
8422 allocate -= UNITS_PER_WORD;
8425 emit_move_insn (eax, GEN_INT (allocate));
8427 if (TARGET_64BIT)
8428 insn = gen_allocate_stack_worker_64 (eax, eax);
8429 else
8430 insn = gen_allocate_stack_worker_32 (eax, eax);
8431 insn = emit_insn (insn);
8433 if (ix86_cfa_state->reg == stack_pointer_rtx)
8435 ix86_cfa_state->offset += allocate;
8436 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8437 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8438 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8439 RTX_FRAME_RELATED_P (insn) = 1;
8442 if (eax_live)
8444 if (frame_pointer_needed)
8445 t = plus_constant (hard_frame_pointer_rtx,
8446 allocate
8447 - frame.to_allocate
8448 - frame.nregs * UNITS_PER_WORD);
8449 else
8450 t = plus_constant (stack_pointer_rtx, allocate);
8451 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8455 if (frame.save_regs_using_mov
8456 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8457 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8459 if (!frame_pointer_needed
8460 || !frame.to_allocate
8461 || crtl->stack_realign_needed)
8462 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8463 frame.to_allocate
8464 + frame.nsseregs * 16 + frame.padding0);
8465 else
8466 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8467 -frame.nregs * UNITS_PER_WORD);
8469 if (!frame_pointer_needed
8470 || !frame.to_allocate
8471 || crtl->stack_realign_needed)
8472 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8473 frame.to_allocate);
8474 else
8475 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8476 - frame.nregs * UNITS_PER_WORD
8477 - frame.nsseregs * 16
8478 - frame.padding0);
8480 pic_reg_used = false;
8481 if (pic_offset_table_rtx
8482 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8483 || crtl->profile))
8485 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8487 if (alt_pic_reg_used != INVALID_REGNUM)
8488 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8490 pic_reg_used = true;
8493 if (pic_reg_used)
8495 if (TARGET_64BIT)
8497 if (ix86_cmodel == CM_LARGE_PIC)
8499 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8500 rtx label = gen_label_rtx ();
8501 emit_label (label);
8502 LABEL_PRESERVE_P (label) = 1;
8503 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8504 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8505 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8506 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8507 pic_offset_table_rtx, tmp_reg));
8509 else
8510 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8512 else
8513 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8516 /* In the pic_reg_used case, make sure that the got load isn't deleted
8517 when mcount needs it. Blockage to avoid call movement across mcount
8518 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8519 note. */
8520 if (crtl->profile && pic_reg_used)
8521 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8523 if (crtl->drap_reg && !crtl->stack_realign_needed)
8525 /* vDRAP is setup but after reload it turns out stack realign
8526 isn't necessary, here we will emit prologue to setup DRAP
8527 without stack realign adjustment */
8528 int drap_bp_offset = UNITS_PER_WORD * 2;
8529 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8530 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8533 /* Prevent instructions from being scheduled into register save push
8534 sequence when access to the redzone area is done through frame pointer.
8535 The offset betweeh the frame pointer and the stack pointer is calculated
8536 relative to the value of the stack pointer at the end of the function
8537 prologue, and moving instructions that access redzone area via frame
8538 pointer inside push sequence violates this assumption. */
8539 if (frame_pointer_needed && frame.red_zone_size)
8540 emit_insn (gen_memory_blockage ());
8542 /* Emit cld instruction if stringops are used in the function. */
8543 if (TARGET_CLD && ix86_current_function_needs_cld)
8544 emit_insn (gen_cld ());
8547 /* Emit code to restore REG using a POP insn. */
8549 static void
8550 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
8552 rtx insn = emit_insn (ix86_gen_pop1 (reg));
8554 if (ix86_cfa_state->reg == crtl->drap_reg
8555 && REGNO (reg) == REGNO (crtl->drap_reg))
8557 /* Previously we'd represented the CFA as an expression
8558 like *(%ebp - 8). We've just popped that value from
8559 the stack, which means we need to reset the CFA to
8560 the drap register. This will remain until we restore
8561 the stack pointer. */
8562 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8563 RTX_FRAME_RELATED_P (insn) = 1;
8564 return;
8567 if (ix86_cfa_state->reg == stack_pointer_rtx)
8569 ix86_cfa_state->offset -= UNITS_PER_WORD;
8570 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8571 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8572 RTX_FRAME_RELATED_P (insn) = 1;
8575 /* When the frame pointer is the CFA, and we pop it, we are
8576 swapping back to the stack pointer as the CFA. This happens
8577 for stack frames that don't allocate other data, so we assume
8578 the stack pointer is now pointing at the return address, i.e.
8579 the function entry state, which makes the offset be 1 word. */
8580 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8581 && reg == hard_frame_pointer_rtx)
8583 ix86_cfa_state->reg = stack_pointer_rtx;
8584 ix86_cfa_state->offset = UNITS_PER_WORD;
8586 add_reg_note (insn, REG_CFA_DEF_CFA,
8587 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8588 GEN_INT (UNITS_PER_WORD)));
8589 RTX_FRAME_RELATED_P (insn) = 1;
8592 ix86_add_cfa_restore_note (insn, reg, red_offset);
8595 /* Emit code to restore saved registers using POP insns. */
8597 static void
8598 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
8600 int regno;
8602 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8603 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8605 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
8606 red_offset);
8607 red_offset += UNITS_PER_WORD;
8611 /* Emit code and notes for the LEAVE instruction. */
8613 static void
8614 ix86_emit_leave (HOST_WIDE_INT red_offset)
8616 rtx insn = emit_insn (ix86_gen_leave ());
8618 ix86_add_queued_cfa_restore_notes (insn);
8620 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8622 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8623 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8624 RTX_FRAME_RELATED_P (insn) = 1;
8625 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
8629 /* Emit code to restore saved registers using MOV insns. First register
8630 is restored from POINTER + OFFSET. */
8631 static void
8632 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8633 HOST_WIDE_INT red_offset,
8634 int maybe_eh_return)
8636 unsigned int regno;
8637 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8638 rtx insn;
8640 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8641 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8643 rtx reg = gen_rtx_REG (Pmode, regno);
8645 /* Ensure that adjust_address won't be forced to produce pointer
8646 out of range allowed by x86-64 instruction set. */
8647 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8649 rtx r11;
8651 r11 = gen_rtx_REG (DImode, R11_REG);
8652 emit_move_insn (r11, GEN_INT (offset));
8653 emit_insn (gen_adddi3 (r11, r11, pointer));
8654 base_address = gen_rtx_MEM (Pmode, r11);
8655 offset = 0;
8657 insn = emit_move_insn (reg,
8658 adjust_address (base_address, Pmode, offset));
8659 offset += UNITS_PER_WORD;
8661 if (ix86_cfa_state->reg == crtl->drap_reg
8662 && regno == REGNO (crtl->drap_reg))
8664 /* Previously we'd represented the CFA as an expression
8665 like *(%ebp - 8). We've just popped that value from
8666 the stack, which means we need to reset the CFA to
8667 the drap register. This will remain until we restore
8668 the stack pointer. */
8669 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8670 RTX_FRAME_RELATED_P (insn) = 1;
8672 else
8673 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8675 red_offset += UNITS_PER_WORD;
8679 /* Emit code to restore saved registers using MOV insns. First register
8680 is restored from POINTER + OFFSET. */
8681 static void
8682 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8683 HOST_WIDE_INT red_offset,
8684 int maybe_eh_return)
8686 int regno;
8687 rtx base_address = gen_rtx_MEM (TImode, pointer);
8688 rtx mem, insn;
8690 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8691 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8693 rtx reg = gen_rtx_REG (TImode, regno);
8695 /* Ensure that adjust_address won't be forced to produce pointer
8696 out of range allowed by x86-64 instruction set. */
8697 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8699 rtx r11;
8701 r11 = gen_rtx_REG (DImode, R11_REG);
8702 emit_move_insn (r11, GEN_INT (offset));
8703 emit_insn (gen_adddi3 (r11, r11, pointer));
8704 base_address = gen_rtx_MEM (TImode, r11);
8705 offset = 0;
8707 mem = adjust_address (base_address, TImode, offset);
8708 set_mem_align (mem, 128);
8709 insn = emit_move_insn (reg, mem);
8710 offset += 16;
8712 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8714 red_offset += 16;
8718 /* Restore function stack, frame, and registers. */
8720 void
8721 ix86_expand_epilogue (int style)
8723 int sp_valid;
8724 struct ix86_frame frame;
8725 HOST_WIDE_INT offset, red_offset;
8726 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
8727 bool using_drap;
8729 ix86_finalize_stack_realign_flags ();
8731 /* When stack is realigned, SP must be valid. */
8732 sp_valid = (!frame_pointer_needed
8733 || current_function_sp_is_unchanging
8734 || stack_realign_fp);
8736 ix86_compute_frame_layout (&frame);
8738 /* See the comment about red zone and frame
8739 pointer usage in ix86_expand_prologue. */
8740 if (frame_pointer_needed && frame.red_zone_size)
8741 emit_insn (gen_memory_blockage ());
8743 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8744 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
8746 /* Calculate start of saved registers relative to ebp. Special care
8747 must be taken for the normal return case of a function using
8748 eh_return: the eax and edx registers are marked as saved, but not
8749 restored along this path. */
8750 offset = frame.nregs;
8751 if (crtl->calls_eh_return && style != 2)
8752 offset -= 2;
8753 offset *= -UNITS_PER_WORD;
8754 offset -= frame.nsseregs * 16 + frame.padding0;
8756 /* Calculate start of saved registers relative to esp on entry of the
8757 function. When realigning stack, this needs to be the most negative
8758 value possible at runtime. */
8759 red_offset = offset;
8760 if (using_drap)
8761 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8762 + UNITS_PER_WORD;
8763 else if (stack_realign_fp)
8764 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8765 - UNITS_PER_WORD;
8766 if (frame_pointer_needed)
8767 red_offset -= UNITS_PER_WORD;
8769 /* If we're only restoring one register and sp is not valid then
8770 using a move instruction to restore the register since it's
8771 less work than reloading sp and popping the register.
8773 The default code result in stack adjustment using add/lea instruction,
8774 while this code results in LEAVE instruction (or discrete equivalent),
8775 so it is profitable in some other cases as well. Especially when there
8776 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8777 and there is exactly one register to pop. This heuristic may need some
8778 tuning in future. */
8779 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8780 || (TARGET_EPILOGUE_USING_MOVE
8781 && cfun->machine->use_fast_prologue_epilogue
8782 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8783 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8784 && frame.to_allocate)
8785 || (frame_pointer_needed && TARGET_USE_LEAVE
8786 && cfun->machine->use_fast_prologue_epilogue
8787 && (frame.nregs + frame.nsseregs) == 1)
8788 || crtl->calls_eh_return)
8790 /* Restore registers. We can use ebp or esp to address the memory
8791 locations. If both are available, default to ebp, since offsets
8792 are known to be small. Only exception is esp pointing directly
8793 to the end of block of saved registers, where we may simplify
8794 addressing mode.
8796 If we are realigning stack with bp and sp, regs restore can't
8797 be addressed by bp. sp must be used instead. */
8799 if (!frame_pointer_needed
8800 || (sp_valid && !frame.to_allocate)
8801 || stack_realign_fp)
8803 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8804 frame.to_allocate, red_offset,
8805 style == 2);
8806 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8807 frame.to_allocate
8808 + frame.nsseregs * 16
8809 + frame.padding0,
8810 red_offset
8811 + frame.nsseregs * 16
8812 + frame.padding0, style == 2);
8814 else
8816 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8817 offset, red_offset,
8818 style == 2);
8819 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8820 offset
8821 + frame.nsseregs * 16
8822 + frame.padding0,
8823 red_offset
8824 + frame.nsseregs * 16
8825 + frame.padding0, style == 2);
8828 red_offset -= offset;
8830 /* eh_return epilogues need %ecx added to the stack pointer. */
8831 if (style == 2)
8833 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8835 /* Stack align doesn't work with eh_return. */
8836 gcc_assert (!crtl->stack_realign_needed);
8838 if (frame_pointer_needed)
8840 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8841 tmp = plus_constant (tmp, UNITS_PER_WORD);
8842 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8844 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8845 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
8847 /* Note that we use SA as a temporary CFA, as the return
8848 address is at the proper place relative to it. We
8849 pretend this happens at the FP restore insn because
8850 prior to this insn the FP would be stored at the wrong
8851 offset relative to SA, and after this insn we have no
8852 other reasonable register to use for the CFA. We don't
8853 bother resetting the CFA to the SP for the duration of
8854 the return insn. */
8855 add_reg_note (tmp, REG_CFA_DEF_CFA,
8856 plus_constant (sa, UNITS_PER_WORD));
8857 ix86_add_queued_cfa_restore_notes (tmp);
8858 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8859 RTX_FRAME_RELATED_P (tmp) = 1;
8860 ix86_cfa_state->reg = sa;
8861 ix86_cfa_state->offset = UNITS_PER_WORD;
8863 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8864 const0_rtx, style, false);
8866 else
8868 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8869 tmp = plus_constant (tmp, (frame.to_allocate
8870 + frame.nregs * UNITS_PER_WORD
8871 + frame.nsseregs * 16
8872 + frame.padding0));
8873 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8874 ix86_add_queued_cfa_restore_notes (tmp);
8876 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
8877 if (ix86_cfa_state->offset != UNITS_PER_WORD)
8879 ix86_cfa_state->offset = UNITS_PER_WORD;
8880 add_reg_note (tmp, REG_CFA_DEF_CFA,
8881 plus_constant (stack_pointer_rtx,
8882 UNITS_PER_WORD));
8883 RTX_FRAME_RELATED_P (tmp) = 1;
8887 else if (!frame_pointer_needed)
8888 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8889 GEN_INT (frame.to_allocate
8890 + frame.nregs * UNITS_PER_WORD
8891 + frame.nsseregs * 16
8892 + frame.padding0),
8893 style, !using_drap);
8894 /* If not an i386, mov & pop is faster than "leave". */
8895 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8896 || !cfun->machine->use_fast_prologue_epilogue)
8897 ix86_emit_leave (red_offset);
8898 else
8900 pro_epilogue_adjust_stack (stack_pointer_rtx,
8901 hard_frame_pointer_rtx,
8902 const0_rtx, style, !using_drap);
8904 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
8907 else
8909 /* First step is to deallocate the stack frame so that we can
8910 pop the registers.
8912 If we realign stack with frame pointer, then stack pointer
8913 won't be able to recover via lea $offset(%bp), %sp, because
8914 there is a padding area between bp and sp for realign.
8915 "add $to_allocate, %sp" must be used instead. */
8916 if (!sp_valid)
8918 gcc_assert (frame_pointer_needed);
8919 gcc_assert (!stack_realign_fp);
8920 pro_epilogue_adjust_stack (stack_pointer_rtx,
8921 hard_frame_pointer_rtx,
8922 GEN_INT (offset), style, false);
8923 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8924 frame.to_allocate, red_offset,
8925 style == 2);
8926 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8927 GEN_INT (frame.nsseregs * 16),
8928 style, false);
8930 else if (frame.to_allocate || frame.nsseregs)
8932 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8933 frame.to_allocate, red_offset,
8934 style == 2);
8935 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8936 GEN_INT (frame.to_allocate
8937 + frame.nsseregs * 16
8938 + frame.padding0), style,
8939 !using_drap && !frame_pointer_needed);
8942 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
8943 + frame.padding0);
8944 red_offset -= offset;
8946 if (frame_pointer_needed)
8948 /* Leave results in shorter dependency chains on CPUs that are
8949 able to grok it fast. */
8950 if (TARGET_USE_LEAVE)
8951 ix86_emit_leave (red_offset);
8952 else
8954 /* For stack realigned really happens, recover stack
8955 pointer to hard frame pointer is a must, if not using
8956 leave. */
8957 if (stack_realign_fp)
8958 pro_epilogue_adjust_stack (stack_pointer_rtx,
8959 hard_frame_pointer_rtx,
8960 const0_rtx, style, !using_drap);
8961 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
8962 red_offset);
8967 if (using_drap)
8969 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8970 ? 0 : UNITS_PER_WORD);
8971 rtx insn;
8973 gcc_assert (stack_realign_drap);
8975 insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8976 crtl->drap_reg,
8977 GEN_INT (-(UNITS_PER_WORD
8978 + param_ptr_offset))));
8980 ix86_cfa_state->reg = stack_pointer_rtx;
8981 ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
8983 add_reg_note (insn, REG_CFA_DEF_CFA,
8984 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
8985 GEN_INT (ix86_cfa_state->offset)));
8986 RTX_FRAME_RELATED_P (insn) = 1;
8988 if (param_ptr_offset)
8989 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
8992 /* Sibcall epilogues don't want a return instruction. */
8993 if (style == 0)
8995 *ix86_cfa_state = cfa_state_save;
8996 return;
8999 if (crtl->args.pops_args && crtl->args.size)
9001 rtx popc = GEN_INT (crtl->args.pops_args);
9003 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9004 address, do explicit add, and jump indirectly to the caller. */
9006 if (crtl->args.pops_args >= 65536)
9008 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9009 rtx insn;
9011 /* There is no "pascal" calling convention in any 64bit ABI. */
9012 gcc_assert (!TARGET_64BIT);
9014 insn = emit_insn (gen_popsi1 (ecx));
9015 ix86_cfa_state->offset -= UNITS_PER_WORD;
9017 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9018 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9019 add_reg_note (insn, REG_CFA_REGISTER,
9020 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
9021 RTX_FRAME_RELATED_P (insn) = 1;
9023 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9024 popc, -1, true);
9025 emit_jump_insn (gen_return_indirect_internal (ecx));
9027 else
9028 emit_jump_insn (gen_return_pop_internal (popc));
9030 else
9031 emit_jump_insn (gen_return_internal ());
9033 /* Restore the state back to the state from the prologue,
9034 so that it's correct for the next epilogue. */
9035 *ix86_cfa_state = cfa_state_save;
9038 /* Reset from the function's potential modifications. */
9040 static void
9041 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9042 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
9044 if (pic_offset_table_rtx)
9045 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9046 #if TARGET_MACHO
9047 /* Mach-O doesn't support labels at the end of objects, so if
9048 it looks like we might want one, insert a NOP. */
9050 rtx insn = get_last_insn ();
9051 while (insn
9052 && NOTE_P (insn)
9053 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9054 insn = PREV_INSN (insn);
9055 if (insn
9056 && (LABEL_P (insn)
9057 || (NOTE_P (insn)
9058 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
9059 fputs ("\tnop\n", file);
9061 #endif
9065 /* Extract the parts of an RTL expression that is a valid memory address
9066 for an instruction. Return 0 if the structure of the address is
9067 grossly off. Return -1 if the address contains ASHIFT, so it is not
9068 strictly valid, but still used for computing length of lea instruction. */
9071 ix86_decompose_address (rtx addr, struct ix86_address *out)
9073 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9074 rtx base_reg, index_reg;
9075 HOST_WIDE_INT scale = 1;
9076 rtx scale_rtx = NULL_RTX;
9077 int retval = 1;
9078 enum ix86_address_seg seg = SEG_DEFAULT;
9080 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
9081 base = addr;
9082 else if (GET_CODE (addr) == PLUS)
9084 rtx addends[4], op;
9085 int n = 0, i;
9087 op = addr;
9090 if (n >= 4)
9091 return 0;
9092 addends[n++] = XEXP (op, 1);
9093 op = XEXP (op, 0);
9095 while (GET_CODE (op) == PLUS);
9096 if (n >= 4)
9097 return 0;
9098 addends[n] = op;
9100 for (i = n; i >= 0; --i)
9102 op = addends[i];
9103 switch (GET_CODE (op))
9105 case MULT:
9106 if (index)
9107 return 0;
9108 index = XEXP (op, 0);
9109 scale_rtx = XEXP (op, 1);
9110 break;
9112 case UNSPEC:
9113 if (XINT (op, 1) == UNSPEC_TP
9114 && TARGET_TLS_DIRECT_SEG_REFS
9115 && seg == SEG_DEFAULT)
9116 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9117 else
9118 return 0;
9119 break;
9121 case REG:
9122 case SUBREG:
9123 if (!base)
9124 base = op;
9125 else if (!index)
9126 index = op;
9127 else
9128 return 0;
9129 break;
9131 case CONST:
9132 case CONST_INT:
9133 case SYMBOL_REF:
9134 case LABEL_REF:
9135 if (disp)
9136 return 0;
9137 disp = op;
9138 break;
9140 default:
9141 return 0;
9145 else if (GET_CODE (addr) == MULT)
9147 index = XEXP (addr, 0); /* index*scale */
9148 scale_rtx = XEXP (addr, 1);
9150 else if (GET_CODE (addr) == ASHIFT)
9152 rtx tmp;
9154 /* We're called for lea too, which implements ashift on occasion. */
9155 index = XEXP (addr, 0);
9156 tmp = XEXP (addr, 1);
9157 if (!CONST_INT_P (tmp))
9158 return 0;
9159 scale = INTVAL (tmp);
9160 if ((unsigned HOST_WIDE_INT) scale > 3)
9161 return 0;
9162 scale = 1 << scale;
9163 retval = -1;
9165 else
9166 disp = addr; /* displacement */
9168 /* Extract the integral value of scale. */
9169 if (scale_rtx)
9171 if (!CONST_INT_P (scale_rtx))
9172 return 0;
9173 scale = INTVAL (scale_rtx);
9176 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9177 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9179 /* Avoid useless 0 displacement. */
9180 if (disp == const0_rtx && (base || index))
9181 disp = NULL_RTX;
9183 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9184 if (base_reg && index_reg && scale == 1
9185 && (index_reg == arg_pointer_rtx
9186 || index_reg == frame_pointer_rtx
9187 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9189 rtx tmp;
9190 tmp = base, base = index, index = tmp;
9191 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9194 /* Special case: %ebp cannot be encoded as a base without a displacement.
9195 Similarly %r13. */
9196 if (!disp
9197 && base_reg
9198 && (base_reg == hard_frame_pointer_rtx
9199 || base_reg == frame_pointer_rtx
9200 || base_reg == arg_pointer_rtx
9201 || (REG_P (base_reg)
9202 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9203 || REGNO (base_reg) == R13_REG))))
9204 disp = const0_rtx;
9206 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9207 Avoid this by transforming to [%esi+0].
9208 Reload calls address legitimization without cfun defined, so we need
9209 to test cfun for being non-NULL. */
9210 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9211 && base_reg && !index_reg && !disp
9212 && REG_P (base_reg)
9213 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
9214 disp = const0_rtx;
9216 /* Special case: encode reg+reg instead of reg*2. */
9217 if (!base && index && scale == 2)
9218 base = index, base_reg = index_reg, scale = 1;
9220 /* Special case: scaling cannot be encoded without base or displacement. */
9221 if (!base && !disp && index && scale != 1)
9222 disp = const0_rtx;
9224 out->base = base;
9225 out->index = index;
9226 out->disp = disp;
9227 out->scale = scale;
9228 out->seg = seg;
9230 return retval;
9233 /* Return cost of the memory address x.
9234 For i386, it is better to use a complex address than let gcc copy
9235 the address into a reg and make a new pseudo. But not if the address
9236 requires to two regs - that would mean more pseudos with longer
9237 lifetimes. */
9238 static int
9239 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9241 struct ix86_address parts;
9242 int cost = 1;
9243 int ok = ix86_decompose_address (x, &parts);
9245 gcc_assert (ok);
9247 if (parts.base && GET_CODE (parts.base) == SUBREG)
9248 parts.base = SUBREG_REG (parts.base);
9249 if (parts.index && GET_CODE (parts.index) == SUBREG)
9250 parts.index = SUBREG_REG (parts.index);
9252 /* Attempt to minimize number of registers in the address. */
9253 if ((parts.base
9254 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9255 || (parts.index
9256 && (!REG_P (parts.index)
9257 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9258 cost++;
9260 if (parts.base
9261 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9262 && parts.index
9263 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9264 && parts.base != parts.index)
9265 cost++;
9267 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9268 since it's predecode logic can't detect the length of instructions
9269 and it degenerates to vector decoded. Increase cost of such
9270 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9271 to split such addresses or even refuse such addresses at all.
9273 Following addressing modes are affected:
9274 [base+scale*index]
9275 [scale*index+disp]
9276 [base+index]
9278 The first and last case may be avoidable by explicitly coding the zero in
9279 memory address, but I don't have AMD-K6 machine handy to check this
9280 theory. */
9282 if (TARGET_K6
9283 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9284 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9285 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9286 cost += 10;
9288 return cost;
9291 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9292 this is used for to form addresses to local data when -fPIC is in
9293 use. */
9295 static bool
9296 darwin_local_data_pic (rtx disp)
9298 return (GET_CODE (disp) == UNSPEC
9299 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9302 /* Determine if a given RTX is a valid constant. We already know this
9303 satisfies CONSTANT_P. */
9305 bool
9306 legitimate_constant_p (rtx x)
9308 switch (GET_CODE (x))
9310 case CONST:
9311 x = XEXP (x, 0);
9313 if (GET_CODE (x) == PLUS)
9315 if (!CONST_INT_P (XEXP (x, 1)))
9316 return false;
9317 x = XEXP (x, 0);
9320 if (TARGET_MACHO && darwin_local_data_pic (x))
9321 return true;
9323 /* Only some unspecs are valid as "constants". */
9324 if (GET_CODE (x) == UNSPEC)
9325 switch (XINT (x, 1))
9327 case UNSPEC_GOT:
9328 case UNSPEC_GOTOFF:
9329 case UNSPEC_PLTOFF:
9330 return TARGET_64BIT;
9331 case UNSPEC_TPOFF:
9332 case UNSPEC_NTPOFF:
9333 x = XVECEXP (x, 0, 0);
9334 return (GET_CODE (x) == SYMBOL_REF
9335 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9336 case UNSPEC_DTPOFF:
9337 x = XVECEXP (x, 0, 0);
9338 return (GET_CODE (x) == SYMBOL_REF
9339 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9340 default:
9341 return false;
9344 /* We must have drilled down to a symbol. */
9345 if (GET_CODE (x) == LABEL_REF)
9346 return true;
9347 if (GET_CODE (x) != SYMBOL_REF)
9348 return false;
9349 /* FALLTHRU */
9351 case SYMBOL_REF:
9352 /* TLS symbols are never valid. */
9353 if (SYMBOL_REF_TLS_MODEL (x))
9354 return false;
9356 /* DLLIMPORT symbols are never valid. */
9357 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9358 && SYMBOL_REF_DLLIMPORT_P (x))
9359 return false;
9360 break;
9362 case CONST_DOUBLE:
9363 if (GET_MODE (x) == TImode
9364 && x != CONST0_RTX (TImode)
9365 && !TARGET_64BIT)
9366 return false;
9367 break;
9369 case CONST_VECTOR:
9370 if (!standard_sse_constant_p (x))
9371 return false;
9373 default:
9374 break;
9377 /* Otherwise we handle everything else in the move patterns. */
9378 return true;
9381 /* Determine if it's legal to put X into the constant pool. This
9382 is not possible for the address of thread-local symbols, which
9383 is checked above. */
9385 static bool
9386 ix86_cannot_force_const_mem (rtx x)
9388 /* We can always put integral constants and vectors in memory. */
9389 switch (GET_CODE (x))
9391 case CONST_INT:
9392 case CONST_DOUBLE:
9393 case CONST_VECTOR:
9394 return false;
9396 default:
9397 break;
9399 return !legitimate_constant_p (x);
9403 /* Nonzero if the constant value X is a legitimate general operand
9404 when generating PIC code. It is given that flag_pic is on and
9405 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9407 bool
9408 legitimate_pic_operand_p (rtx x)
9410 rtx inner;
9412 switch (GET_CODE (x))
9414 case CONST:
9415 inner = XEXP (x, 0);
9416 if (GET_CODE (inner) == PLUS
9417 && CONST_INT_P (XEXP (inner, 1)))
9418 inner = XEXP (inner, 0);
9420 /* Only some unspecs are valid as "constants". */
9421 if (GET_CODE (inner) == UNSPEC)
9422 switch (XINT (inner, 1))
9424 case UNSPEC_GOT:
9425 case UNSPEC_GOTOFF:
9426 case UNSPEC_PLTOFF:
9427 return TARGET_64BIT;
9428 case UNSPEC_TPOFF:
9429 x = XVECEXP (inner, 0, 0);
9430 return (GET_CODE (x) == SYMBOL_REF
9431 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9432 case UNSPEC_MACHOPIC_OFFSET:
9433 return legitimate_pic_address_disp_p (x);
9434 default:
9435 return false;
9437 /* FALLTHRU */
9439 case SYMBOL_REF:
9440 case LABEL_REF:
9441 return legitimate_pic_address_disp_p (x);
9443 default:
9444 return true;
9448 /* Determine if a given CONST RTX is a valid memory displacement
9449 in PIC mode. */
9452 legitimate_pic_address_disp_p (rtx disp)
9454 bool saw_plus;
9456 /* In 64bit mode we can allow direct addresses of symbols and labels
9457 when they are not dynamic symbols. */
9458 if (TARGET_64BIT)
9460 rtx op0 = disp, op1;
9462 switch (GET_CODE (disp))
9464 case LABEL_REF:
9465 return true;
9467 case CONST:
9468 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9469 break;
9470 op0 = XEXP (XEXP (disp, 0), 0);
9471 op1 = XEXP (XEXP (disp, 0), 1);
9472 if (!CONST_INT_P (op1)
9473 || INTVAL (op1) >= 16*1024*1024
9474 || INTVAL (op1) < -16*1024*1024)
9475 break;
9476 if (GET_CODE (op0) == LABEL_REF)
9477 return true;
9478 if (GET_CODE (op0) != SYMBOL_REF)
9479 break;
9480 /* FALLTHRU */
9482 case SYMBOL_REF:
9483 /* TLS references should always be enclosed in UNSPEC. */
9484 if (SYMBOL_REF_TLS_MODEL (op0))
9485 return false;
9486 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9487 && ix86_cmodel != CM_LARGE_PIC)
9488 return true;
9489 break;
9491 default:
9492 break;
9495 if (GET_CODE (disp) != CONST)
9496 return 0;
9497 disp = XEXP (disp, 0);
9499 if (TARGET_64BIT)
9501 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9502 of GOT tables. We should not need these anyway. */
9503 if (GET_CODE (disp) != UNSPEC
9504 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9505 && XINT (disp, 1) != UNSPEC_GOTOFF
9506 && XINT (disp, 1) != UNSPEC_PLTOFF))
9507 return 0;
9509 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9510 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9511 return 0;
9512 return 1;
9515 saw_plus = false;
9516 if (GET_CODE (disp) == PLUS)
9518 if (!CONST_INT_P (XEXP (disp, 1)))
9519 return 0;
9520 disp = XEXP (disp, 0);
9521 saw_plus = true;
9524 if (TARGET_MACHO && darwin_local_data_pic (disp))
9525 return 1;
9527 if (GET_CODE (disp) != UNSPEC)
9528 return 0;
9530 switch (XINT (disp, 1))
9532 case UNSPEC_GOT:
9533 if (saw_plus)
9534 return false;
9535 /* We need to check for both symbols and labels because VxWorks loads
9536 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9537 details. */
9538 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9539 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9540 case UNSPEC_GOTOFF:
9541 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9542 While ABI specify also 32bit relocation but we don't produce it in
9543 small PIC model at all. */
9544 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9545 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9546 && !TARGET_64BIT)
9547 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9548 return false;
9549 case UNSPEC_GOTTPOFF:
9550 case UNSPEC_GOTNTPOFF:
9551 case UNSPEC_INDNTPOFF:
9552 if (saw_plus)
9553 return false;
9554 disp = XVECEXP (disp, 0, 0);
9555 return (GET_CODE (disp) == SYMBOL_REF
9556 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9557 case UNSPEC_NTPOFF:
9558 disp = XVECEXP (disp, 0, 0);
9559 return (GET_CODE (disp) == SYMBOL_REF
9560 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9561 case UNSPEC_DTPOFF:
9562 disp = XVECEXP (disp, 0, 0);
9563 return (GET_CODE (disp) == SYMBOL_REF
9564 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9567 return 0;
9570 /* Recognizes RTL expressions that are valid memory addresses for an
9571 instruction. The MODE argument is the machine mode for the MEM
9572 expression that wants to use this address.
9574 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9575 convert common non-canonical forms to canonical form so that they will
9576 be recognized. */
9578 static bool
9579 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9580 rtx addr, bool strict)
9582 struct ix86_address parts;
9583 rtx base, index, disp;
9584 HOST_WIDE_INT scale;
9585 const char *reason = NULL;
9586 rtx reason_rtx = NULL_RTX;
9588 if (ix86_decompose_address (addr, &parts) <= 0)
9590 reason = "decomposition failed";
9591 goto report_error;
9594 base = parts.base;
9595 index = parts.index;
9596 disp = parts.disp;
9597 scale = parts.scale;
9599 /* Validate base register.
9601 Don't allow SUBREG's that span more than a word here. It can lead to spill
9602 failures when the base is one word out of a two word structure, which is
9603 represented internally as a DImode int. */
9605 if (base)
9607 rtx reg;
9608 reason_rtx = base;
9610 if (REG_P (base))
9611 reg = base;
9612 else if (GET_CODE (base) == SUBREG
9613 && REG_P (SUBREG_REG (base))
9614 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9615 <= UNITS_PER_WORD)
9616 reg = SUBREG_REG (base);
9617 else
9619 reason = "base is not a register";
9620 goto report_error;
9623 if (GET_MODE (base) != Pmode)
9625 reason = "base is not in Pmode";
9626 goto report_error;
9629 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9630 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9632 reason = "base is not valid";
9633 goto report_error;
9637 /* Validate index register.
9639 Don't allow SUBREG's that span more than a word here -- same as above. */
9641 if (index)
9643 rtx reg;
9644 reason_rtx = index;
9646 if (REG_P (index))
9647 reg = index;
9648 else if (GET_CODE (index) == SUBREG
9649 && REG_P (SUBREG_REG (index))
9650 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9651 <= UNITS_PER_WORD)
9652 reg = SUBREG_REG (index);
9653 else
9655 reason = "index is not a register";
9656 goto report_error;
9659 if (GET_MODE (index) != Pmode)
9661 reason = "index is not in Pmode";
9662 goto report_error;
9665 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9666 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9668 reason = "index is not valid";
9669 goto report_error;
9673 /* Validate scale factor. */
9674 if (scale != 1)
9676 reason_rtx = GEN_INT (scale);
9677 if (!index)
9679 reason = "scale without index";
9680 goto report_error;
9683 if (scale != 2 && scale != 4 && scale != 8)
9685 reason = "scale is not a valid multiplier";
9686 goto report_error;
9690 /* Validate displacement. */
9691 if (disp)
9693 reason_rtx = disp;
9695 if (GET_CODE (disp) == CONST
9696 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9697 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9698 switch (XINT (XEXP (disp, 0), 1))
9700 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9701 used. While ABI specify also 32bit relocations, we don't produce
9702 them at all and use IP relative instead. */
9703 case UNSPEC_GOT:
9704 case UNSPEC_GOTOFF:
9705 gcc_assert (flag_pic);
9706 if (!TARGET_64BIT)
9707 goto is_legitimate_pic;
9708 reason = "64bit address unspec";
9709 goto report_error;
9711 case UNSPEC_GOTPCREL:
9712 gcc_assert (flag_pic);
9713 goto is_legitimate_pic;
9715 case UNSPEC_GOTTPOFF:
9716 case UNSPEC_GOTNTPOFF:
9717 case UNSPEC_INDNTPOFF:
9718 case UNSPEC_NTPOFF:
9719 case UNSPEC_DTPOFF:
9720 break;
9722 default:
9723 reason = "invalid address unspec";
9724 goto report_error;
9727 else if (SYMBOLIC_CONST (disp)
9728 && (flag_pic
9729 || (TARGET_MACHO
9730 #if TARGET_MACHO
9731 && MACHOPIC_INDIRECT
9732 && !machopic_operand_p (disp)
9733 #endif
9737 is_legitimate_pic:
9738 if (TARGET_64BIT && (index || base))
9740 /* foo@dtpoff(%rX) is ok. */
9741 if (GET_CODE (disp) != CONST
9742 || GET_CODE (XEXP (disp, 0)) != PLUS
9743 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9744 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9745 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9746 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9748 reason = "non-constant pic memory reference";
9749 goto report_error;
9752 else if (! legitimate_pic_address_disp_p (disp))
9754 reason = "displacement is an invalid pic construct";
9755 goto report_error;
9758 /* This code used to verify that a symbolic pic displacement
9759 includes the pic_offset_table_rtx register.
9761 While this is good idea, unfortunately these constructs may
9762 be created by "adds using lea" optimization for incorrect
9763 code like:
9765 int a;
9766 int foo(int i)
9768 return *(&a+i);
9771 This code is nonsensical, but results in addressing
9772 GOT table with pic_offset_table_rtx base. We can't
9773 just refuse it easily, since it gets matched by
9774 "addsi3" pattern, that later gets split to lea in the
9775 case output register differs from input. While this
9776 can be handled by separate addsi pattern for this case
9777 that never results in lea, this seems to be easier and
9778 correct fix for crash to disable this test. */
9780 else if (GET_CODE (disp) != LABEL_REF
9781 && !CONST_INT_P (disp)
9782 && (GET_CODE (disp) != CONST
9783 || !legitimate_constant_p (disp))
9784 && (GET_CODE (disp) != SYMBOL_REF
9785 || !legitimate_constant_p (disp)))
9787 reason = "displacement is not constant";
9788 goto report_error;
9790 else if (TARGET_64BIT
9791 && !x86_64_immediate_operand (disp, VOIDmode))
9793 reason = "displacement is out of range";
9794 goto report_error;
9798 /* Everything looks valid. */
9799 return TRUE;
9801 report_error:
9802 return FALSE;
9805 /* Determine if a given RTX is a valid constant address. */
9807 bool
9808 constant_address_p (rtx x)
9810 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9813 /* Return a unique alias set for the GOT. */
9815 static alias_set_type
9816 ix86_GOT_alias_set (void)
9818 static alias_set_type set = -1;
9819 if (set == -1)
9820 set = new_alias_set ();
9821 return set;
9824 /* Return a legitimate reference for ORIG (an address) using the
9825 register REG. If REG is 0, a new pseudo is generated.
9827 There are two types of references that must be handled:
9829 1. Global data references must load the address from the GOT, via
9830 the PIC reg. An insn is emitted to do this load, and the reg is
9831 returned.
9833 2. Static data references, constant pool addresses, and code labels
9834 compute the address as an offset from the GOT, whose base is in
9835 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9836 differentiate them from global data objects. The returned
9837 address is the PIC reg + an unspec constant.
9839 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9840 reg also appears in the address. */
9842 static rtx
9843 legitimize_pic_address (rtx orig, rtx reg)
9845 rtx addr = orig;
9846 rtx new_rtx = orig;
9847 rtx base;
9849 #if TARGET_MACHO
9850 if (TARGET_MACHO && !TARGET_64BIT)
9852 if (reg == 0)
9853 reg = gen_reg_rtx (Pmode);
9854 /* Use the generic Mach-O PIC machinery. */
9855 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9857 #endif
9859 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9860 new_rtx = addr;
9861 else if (TARGET_64BIT
9862 && ix86_cmodel != CM_SMALL_PIC
9863 && gotoff_operand (addr, Pmode))
9865 rtx tmpreg;
9866 /* This symbol may be referenced via a displacement from the PIC
9867 base address (@GOTOFF). */
9869 if (reload_in_progress)
9870 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9871 if (GET_CODE (addr) == CONST)
9872 addr = XEXP (addr, 0);
9873 if (GET_CODE (addr) == PLUS)
9875 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9876 UNSPEC_GOTOFF);
9877 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9879 else
9880 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9881 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9882 if (!reg)
9883 tmpreg = gen_reg_rtx (Pmode);
9884 else
9885 tmpreg = reg;
9886 emit_move_insn (tmpreg, new_rtx);
9888 if (reg != 0)
9890 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9891 tmpreg, 1, OPTAB_DIRECT);
9892 new_rtx = reg;
9894 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9896 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9898 /* This symbol may be referenced via a displacement from the PIC
9899 base address (@GOTOFF). */
9901 if (reload_in_progress)
9902 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9903 if (GET_CODE (addr) == CONST)
9904 addr = XEXP (addr, 0);
9905 if (GET_CODE (addr) == PLUS)
9907 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9908 UNSPEC_GOTOFF);
9909 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9911 else
9912 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9913 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9914 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9916 if (reg != 0)
9918 emit_move_insn (reg, new_rtx);
9919 new_rtx = reg;
9922 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9923 /* We can't use @GOTOFF for text labels on VxWorks;
9924 see gotoff_operand. */
9925 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9927 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9929 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9930 return legitimize_dllimport_symbol (addr, true);
9931 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9932 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9933 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9935 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9936 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9940 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9942 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9943 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9944 new_rtx = gen_const_mem (Pmode, new_rtx);
9945 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9947 if (reg == 0)
9948 reg = gen_reg_rtx (Pmode);
9949 /* Use directly gen_movsi, otherwise the address is loaded
9950 into register for CSE. We don't want to CSE this addresses,
9951 instead we CSE addresses from the GOT table, so skip this. */
9952 emit_insn (gen_movsi (reg, new_rtx));
9953 new_rtx = reg;
9955 else
9957 /* This symbol must be referenced via a load from the
9958 Global Offset Table (@GOT). */
9960 if (reload_in_progress)
9961 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9962 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9963 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9964 if (TARGET_64BIT)
9965 new_rtx = force_reg (Pmode, new_rtx);
9966 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9967 new_rtx = gen_const_mem (Pmode, new_rtx);
9968 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9970 if (reg == 0)
9971 reg = gen_reg_rtx (Pmode);
9972 emit_move_insn (reg, new_rtx);
9973 new_rtx = reg;
9976 else
9978 if (CONST_INT_P (addr)
9979 && !x86_64_immediate_operand (addr, VOIDmode))
9981 if (reg)
9983 emit_move_insn (reg, addr);
9984 new_rtx = reg;
9986 else
9987 new_rtx = force_reg (Pmode, addr);
9989 else if (GET_CODE (addr) == CONST)
9991 addr = XEXP (addr, 0);
9993 /* We must match stuff we generate before. Assume the only
9994 unspecs that can get here are ours. Not that we could do
9995 anything with them anyway.... */
9996 if (GET_CODE (addr) == UNSPEC
9997 || (GET_CODE (addr) == PLUS
9998 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9999 return orig;
10000 gcc_assert (GET_CODE (addr) == PLUS);
10002 if (GET_CODE (addr) == PLUS)
10004 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10006 /* Check first to see if this is a constant offset from a @GOTOFF
10007 symbol reference. */
10008 if (gotoff_operand (op0, Pmode)
10009 && CONST_INT_P (op1))
10011 if (!TARGET_64BIT)
10013 if (reload_in_progress)
10014 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10015 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10016 UNSPEC_GOTOFF);
10017 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10018 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10019 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10021 if (reg != 0)
10023 emit_move_insn (reg, new_rtx);
10024 new_rtx = reg;
10027 else
10029 if (INTVAL (op1) < -16*1024*1024
10030 || INTVAL (op1) >= 16*1024*1024)
10032 if (!x86_64_immediate_operand (op1, Pmode))
10033 op1 = force_reg (Pmode, op1);
10034 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10038 else
10040 base = legitimize_pic_address (XEXP (addr, 0), reg);
10041 new_rtx = legitimize_pic_address (XEXP (addr, 1),
10042 base == reg ? NULL_RTX : reg);
10044 if (CONST_INT_P (new_rtx))
10045 new_rtx = plus_constant (base, INTVAL (new_rtx));
10046 else
10048 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
10050 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
10051 new_rtx = XEXP (new_rtx, 1);
10053 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
10058 return new_rtx;
10061 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10063 static rtx
10064 get_thread_pointer (int to_reg)
10066 rtx tp, reg, insn;
10068 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10069 if (!to_reg)
10070 return tp;
10072 reg = gen_reg_rtx (Pmode);
10073 insn = gen_rtx_SET (VOIDmode, reg, tp);
10074 insn = emit_insn (insn);
10076 return reg;
10079 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10080 false if we expect this to be used for a memory address and true if
10081 we expect to load the address into a register. */
10083 static rtx
10084 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
10086 rtx dest, base, off, pic, tp;
10087 int type;
10089 switch (model)
10091 case TLS_MODEL_GLOBAL_DYNAMIC:
10092 dest = gen_reg_rtx (Pmode);
10093 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10095 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10097 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
10099 start_sequence ();
10100 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
10101 insns = get_insns ();
10102 end_sequence ();
10104 RTL_CONST_CALL_P (insns) = 1;
10105 emit_libcall_block (insns, dest, rax, x);
10107 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10108 emit_insn (gen_tls_global_dynamic_64 (dest, x));
10109 else
10110 emit_insn (gen_tls_global_dynamic_32 (dest, x));
10112 if (TARGET_GNU2_TLS)
10114 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10116 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10118 break;
10120 case TLS_MODEL_LOCAL_DYNAMIC:
10121 base = gen_reg_rtx (Pmode);
10122 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10124 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10126 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10128 start_sequence ();
10129 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10130 insns = get_insns ();
10131 end_sequence ();
10133 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10134 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10135 RTL_CONST_CALL_P (insns) = 1;
10136 emit_libcall_block (insns, base, rax, note);
10138 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10139 emit_insn (gen_tls_local_dynamic_base_64 (base));
10140 else
10141 emit_insn (gen_tls_local_dynamic_base_32 (base));
10143 if (TARGET_GNU2_TLS)
10145 rtx x = ix86_tls_module_base ();
10147 set_unique_reg_note (get_last_insn (), REG_EQUIV,
10148 gen_rtx_MINUS (Pmode, x, tp));
10151 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10152 off = gen_rtx_CONST (Pmode, off);
10154 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10156 if (TARGET_GNU2_TLS)
10158 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10160 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10163 break;
10165 case TLS_MODEL_INITIAL_EXEC:
10166 if (TARGET_64BIT)
10168 pic = NULL;
10169 type = UNSPEC_GOTNTPOFF;
10171 else if (flag_pic)
10173 if (reload_in_progress)
10174 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10175 pic = pic_offset_table_rtx;
10176 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10178 else if (!TARGET_ANY_GNU_TLS)
10180 pic = gen_reg_rtx (Pmode);
10181 emit_insn (gen_set_got (pic));
10182 type = UNSPEC_GOTTPOFF;
10184 else
10186 pic = NULL;
10187 type = UNSPEC_INDNTPOFF;
10190 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10191 off = gen_rtx_CONST (Pmode, off);
10192 if (pic)
10193 off = gen_rtx_PLUS (Pmode, pic, off);
10194 off = gen_const_mem (Pmode, off);
10195 set_mem_alias_set (off, ix86_GOT_alias_set ());
10197 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10199 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10200 off = force_reg (Pmode, off);
10201 return gen_rtx_PLUS (Pmode, base, off);
10203 else
10205 base = get_thread_pointer (true);
10206 dest = gen_reg_rtx (Pmode);
10207 emit_insn (gen_subsi3 (dest, base, off));
10209 break;
10211 case TLS_MODEL_LOCAL_EXEC:
10212 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10213 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10214 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10215 off = gen_rtx_CONST (Pmode, off);
10217 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10219 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10220 return gen_rtx_PLUS (Pmode, base, off);
10222 else
10224 base = get_thread_pointer (true);
10225 dest = gen_reg_rtx (Pmode);
10226 emit_insn (gen_subsi3 (dest, base, off));
10228 break;
10230 default:
10231 gcc_unreachable ();
10234 return dest;
10237 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10238 to symbol DECL. */
10240 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10241 htab_t dllimport_map;
10243 static tree
10244 get_dllimport_decl (tree decl)
10246 struct tree_map *h, in;
10247 void **loc;
10248 const char *name;
10249 const char *prefix;
10250 size_t namelen, prefixlen;
10251 char *imp_name;
10252 tree to;
10253 rtx rtl;
10255 if (!dllimport_map)
10256 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10258 in.hash = htab_hash_pointer (decl);
10259 in.base.from = decl;
10260 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10261 h = (struct tree_map *) *loc;
10262 if (h)
10263 return h->to;
10265 *loc = h = GGC_NEW (struct tree_map);
10266 h->hash = in.hash;
10267 h->base.from = decl;
10268 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
10269 VAR_DECL, NULL, ptr_type_node);
10270 DECL_ARTIFICIAL (to) = 1;
10271 DECL_IGNORED_P (to) = 1;
10272 DECL_EXTERNAL (to) = 1;
10273 TREE_READONLY (to) = 1;
10275 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10276 name = targetm.strip_name_encoding (name);
10277 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10278 ? "*__imp_" : "*__imp__";
10279 namelen = strlen (name);
10280 prefixlen = strlen (prefix);
10281 imp_name = (char *) alloca (namelen + prefixlen + 1);
10282 memcpy (imp_name, prefix, prefixlen);
10283 memcpy (imp_name + prefixlen, name, namelen + 1);
10285 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10286 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10287 SET_SYMBOL_REF_DECL (rtl, to);
10288 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10290 rtl = gen_const_mem (Pmode, rtl);
10291 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10293 SET_DECL_RTL (to, rtl);
10294 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10296 return to;
10299 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10300 true if we require the result be a register. */
10302 static rtx
10303 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10305 tree imp_decl;
10306 rtx x;
10308 gcc_assert (SYMBOL_REF_DECL (symbol));
10309 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10311 x = DECL_RTL (imp_decl);
10312 if (want_reg)
10313 x = force_reg (Pmode, x);
10314 return x;
10317 /* Try machine-dependent ways of modifying an illegitimate address
10318 to be legitimate. If we find one, return the new, valid address.
10319 This macro is used in only one place: `memory_address' in explow.c.
10321 OLDX is the address as it was before break_out_memory_refs was called.
10322 In some cases it is useful to look at this to decide what needs to be done.
10324 It is always safe for this macro to do nothing. It exists to recognize
10325 opportunities to optimize the output.
10327 For the 80386, we handle X+REG by loading X into a register R and
10328 using R+REG. R will go in a general reg and indexing will be used.
10329 However, if REG is a broken-out memory address or multiplication,
10330 nothing needs to be done because REG can certainly go in a general reg.
10332 When -fpic is used, special handling is needed for symbolic references.
10333 See comments by legitimize_pic_address in i386.c for details. */
10335 static rtx
10336 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10337 enum machine_mode mode)
10339 int changed = 0;
10340 unsigned log;
10342 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10343 if (log)
10344 return legitimize_tls_address (x, (enum tls_model) log, false);
10345 if (GET_CODE (x) == CONST
10346 && GET_CODE (XEXP (x, 0)) == PLUS
10347 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10348 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10350 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10351 (enum tls_model) log, false);
10352 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10355 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10357 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10358 return legitimize_dllimport_symbol (x, true);
10359 if (GET_CODE (x) == CONST
10360 && GET_CODE (XEXP (x, 0)) == PLUS
10361 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10362 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10364 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10365 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10369 if (flag_pic && SYMBOLIC_CONST (x))
10370 return legitimize_pic_address (x, 0);
10372 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10373 if (GET_CODE (x) == ASHIFT
10374 && CONST_INT_P (XEXP (x, 1))
10375 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10377 changed = 1;
10378 log = INTVAL (XEXP (x, 1));
10379 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10380 GEN_INT (1 << log));
10383 if (GET_CODE (x) == PLUS)
10385 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10387 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10388 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10389 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10391 changed = 1;
10392 log = INTVAL (XEXP (XEXP (x, 0), 1));
10393 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10394 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10395 GEN_INT (1 << log));
10398 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10399 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10400 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10402 changed = 1;
10403 log = INTVAL (XEXP (XEXP (x, 1), 1));
10404 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10405 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10406 GEN_INT (1 << log));
10409 /* Put multiply first if it isn't already. */
10410 if (GET_CODE (XEXP (x, 1)) == MULT)
10412 rtx tmp = XEXP (x, 0);
10413 XEXP (x, 0) = XEXP (x, 1);
10414 XEXP (x, 1) = tmp;
10415 changed = 1;
10418 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10419 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10420 created by virtual register instantiation, register elimination, and
10421 similar optimizations. */
10422 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10424 changed = 1;
10425 x = gen_rtx_PLUS (Pmode,
10426 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10427 XEXP (XEXP (x, 1), 0)),
10428 XEXP (XEXP (x, 1), 1));
10431 /* Canonicalize
10432 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10433 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10434 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10435 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10436 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10437 && CONSTANT_P (XEXP (x, 1)))
10439 rtx constant;
10440 rtx other = NULL_RTX;
10442 if (CONST_INT_P (XEXP (x, 1)))
10444 constant = XEXP (x, 1);
10445 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10447 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10449 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10450 other = XEXP (x, 1);
10452 else
10453 constant = 0;
10455 if (constant)
10457 changed = 1;
10458 x = gen_rtx_PLUS (Pmode,
10459 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10460 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10461 plus_constant (other, INTVAL (constant)));
10465 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10466 return x;
10468 if (GET_CODE (XEXP (x, 0)) == MULT)
10470 changed = 1;
10471 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10474 if (GET_CODE (XEXP (x, 1)) == MULT)
10476 changed = 1;
10477 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10480 if (changed
10481 && REG_P (XEXP (x, 1))
10482 && REG_P (XEXP (x, 0)))
10483 return x;
10485 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10487 changed = 1;
10488 x = legitimize_pic_address (x, 0);
10491 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10492 return x;
10494 if (REG_P (XEXP (x, 0)))
10496 rtx temp = gen_reg_rtx (Pmode);
10497 rtx val = force_operand (XEXP (x, 1), temp);
10498 if (val != temp)
10499 emit_move_insn (temp, val);
10501 XEXP (x, 1) = temp;
10502 return x;
10505 else if (REG_P (XEXP (x, 1)))
10507 rtx temp = gen_reg_rtx (Pmode);
10508 rtx val = force_operand (XEXP (x, 0), temp);
10509 if (val != temp)
10510 emit_move_insn (temp, val);
10512 XEXP (x, 0) = temp;
10513 return x;
10517 return x;
10520 /* Print an integer constant expression in assembler syntax. Addition
10521 and subtraction are the only arithmetic that may appear in these
10522 expressions. FILE is the stdio stream to write to, X is the rtx, and
10523 CODE is the operand print code from the output string. */
10525 static void
10526 output_pic_addr_const (FILE *file, rtx x, int code)
10528 char buf[256];
10530 switch (GET_CODE (x))
10532 case PC:
10533 gcc_assert (flag_pic);
10534 putc ('.', file);
10535 break;
10537 case SYMBOL_REF:
10538 if (! TARGET_MACHO || TARGET_64BIT)
10539 output_addr_const (file, x);
10540 else
10542 const char *name = XSTR (x, 0);
10544 /* Mark the decl as referenced so that cgraph will
10545 output the function. */
10546 if (SYMBOL_REF_DECL (x))
10547 mark_decl_referenced (SYMBOL_REF_DECL (x));
10549 #if TARGET_MACHO
10550 if (MACHOPIC_INDIRECT
10551 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10552 name = machopic_indirection_name (x, /*stub_p=*/true);
10553 #endif
10554 assemble_name (file, name);
10556 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10557 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10558 fputs ("@PLT", file);
10559 break;
10561 case LABEL_REF:
10562 x = XEXP (x, 0);
10563 /* FALLTHRU */
10564 case CODE_LABEL:
10565 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10566 assemble_name (asm_out_file, buf);
10567 break;
10569 case CONST_INT:
10570 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10571 break;
10573 case CONST:
10574 /* This used to output parentheses around the expression,
10575 but that does not work on the 386 (either ATT or BSD assembler). */
10576 output_pic_addr_const (file, XEXP (x, 0), code);
10577 break;
10579 case CONST_DOUBLE:
10580 if (GET_MODE (x) == VOIDmode)
10582 /* We can use %d if the number is <32 bits and positive. */
10583 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10584 fprintf (file, "0x%lx%08lx",
10585 (unsigned long) CONST_DOUBLE_HIGH (x),
10586 (unsigned long) CONST_DOUBLE_LOW (x));
10587 else
10588 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10590 else
10591 /* We can't handle floating point constants;
10592 PRINT_OPERAND must handle them. */
10593 output_operand_lossage ("floating constant misused");
10594 break;
10596 case PLUS:
10597 /* Some assemblers need integer constants to appear first. */
10598 if (CONST_INT_P (XEXP (x, 0)))
10600 output_pic_addr_const (file, XEXP (x, 0), code);
10601 putc ('+', file);
10602 output_pic_addr_const (file, XEXP (x, 1), code);
10604 else
10606 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10607 output_pic_addr_const (file, XEXP (x, 1), code);
10608 putc ('+', file);
10609 output_pic_addr_const (file, XEXP (x, 0), code);
10611 break;
10613 case MINUS:
10614 if (!TARGET_MACHO)
10615 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10616 output_pic_addr_const (file, XEXP (x, 0), code);
10617 putc ('-', file);
10618 output_pic_addr_const (file, XEXP (x, 1), code);
10619 if (!TARGET_MACHO)
10620 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10621 break;
10623 case UNSPEC:
10624 gcc_assert (XVECLEN (x, 0) == 1);
10625 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10626 switch (XINT (x, 1))
10628 case UNSPEC_GOT:
10629 fputs ("@GOT", file);
10630 break;
10631 case UNSPEC_GOTOFF:
10632 fputs ("@GOTOFF", file);
10633 break;
10634 case UNSPEC_PLTOFF:
10635 fputs ("@PLTOFF", file);
10636 break;
10637 case UNSPEC_GOTPCREL:
10638 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10639 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10640 break;
10641 case UNSPEC_GOTTPOFF:
10642 /* FIXME: This might be @TPOFF in Sun ld too. */
10643 fputs ("@GOTTPOFF", file);
10644 break;
10645 case UNSPEC_TPOFF:
10646 fputs ("@TPOFF", file);
10647 break;
10648 case UNSPEC_NTPOFF:
10649 if (TARGET_64BIT)
10650 fputs ("@TPOFF", file);
10651 else
10652 fputs ("@NTPOFF", file);
10653 break;
10654 case UNSPEC_DTPOFF:
10655 fputs ("@DTPOFF", file);
10656 break;
10657 case UNSPEC_GOTNTPOFF:
10658 if (TARGET_64BIT)
10659 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10660 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10661 else
10662 fputs ("@GOTNTPOFF", file);
10663 break;
10664 case UNSPEC_INDNTPOFF:
10665 fputs ("@INDNTPOFF", file);
10666 break;
10667 #if TARGET_MACHO
10668 case UNSPEC_MACHOPIC_OFFSET:
10669 putc ('-', file);
10670 machopic_output_function_base_name (file);
10671 break;
10672 #endif
10673 default:
10674 output_operand_lossage ("invalid UNSPEC as operand");
10675 break;
10677 break;
10679 default:
10680 output_operand_lossage ("invalid expression as operand");
10684 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10685 We need to emit DTP-relative relocations. */
10687 static void ATTRIBUTE_UNUSED
10688 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10690 fputs (ASM_LONG, file);
10691 output_addr_const (file, x);
10692 fputs ("@DTPOFF", file);
10693 switch (size)
10695 case 4:
10696 break;
10697 case 8:
10698 fputs (", 0", file);
10699 break;
10700 default:
10701 gcc_unreachable ();
10705 /* Return true if X is a representation of the PIC register. This copes
10706 with calls from ix86_find_base_term, where the register might have
10707 been replaced by a cselib value. */
10709 static bool
10710 ix86_pic_register_p (rtx x)
10712 if (GET_CODE (x) == VALUE)
10713 return (pic_offset_table_rtx
10714 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10715 else
10716 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10719 /* In the name of slightly smaller debug output, and to cater to
10720 general assembler lossage, recognize PIC+GOTOFF and turn it back
10721 into a direct symbol reference.
10723 On Darwin, this is necessary to avoid a crash, because Darwin
10724 has a different PIC label for each routine but the DWARF debugging
10725 information is not associated with any particular routine, so it's
10726 necessary to remove references to the PIC label from RTL stored by
10727 the DWARF output code. */
10729 static rtx
10730 ix86_delegitimize_address (rtx orig_x)
10732 rtx x = orig_x;
10733 /* reg_addend is NULL or a multiple of some register. */
10734 rtx reg_addend = NULL_RTX;
10735 /* const_addend is NULL or a const_int. */
10736 rtx const_addend = NULL_RTX;
10737 /* This is the result, or NULL. */
10738 rtx result = NULL_RTX;
10740 if (MEM_P (x))
10741 x = XEXP (x, 0);
10743 if (TARGET_64BIT)
10745 if (GET_CODE (x) != CONST
10746 || GET_CODE (XEXP (x, 0)) != UNSPEC
10747 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10748 || !MEM_P (orig_x))
10749 return orig_x;
10750 return XVECEXP (XEXP (x, 0), 0, 0);
10753 if (GET_CODE (x) != PLUS
10754 || GET_CODE (XEXP (x, 1)) != CONST)
10755 return orig_x;
10757 if (ix86_pic_register_p (XEXP (x, 0)))
10758 /* %ebx + GOT/GOTOFF */
10760 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10762 /* %ebx + %reg * scale + GOT/GOTOFF */
10763 reg_addend = XEXP (x, 0);
10764 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10765 reg_addend = XEXP (reg_addend, 1);
10766 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10767 reg_addend = XEXP (reg_addend, 0);
10768 else
10769 return orig_x;
10770 if (!REG_P (reg_addend)
10771 && GET_CODE (reg_addend) != MULT
10772 && GET_CODE (reg_addend) != ASHIFT)
10773 return orig_x;
10775 else
10776 return orig_x;
10778 x = XEXP (XEXP (x, 1), 0);
10779 if (GET_CODE (x) == PLUS
10780 && CONST_INT_P (XEXP (x, 1)))
10782 const_addend = XEXP (x, 1);
10783 x = XEXP (x, 0);
10786 if (GET_CODE (x) == UNSPEC
10787 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10788 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10789 result = XVECEXP (x, 0, 0);
10791 if (TARGET_MACHO && darwin_local_data_pic (x)
10792 && !MEM_P (orig_x))
10793 result = XVECEXP (x, 0, 0);
10795 if (! result)
10796 return orig_x;
10798 if (const_addend)
10799 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10800 if (reg_addend)
10801 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10802 return result;
10805 /* If X is a machine specific address (i.e. a symbol or label being
10806 referenced as a displacement from the GOT implemented using an
10807 UNSPEC), then return the base term. Otherwise return X. */
10810 ix86_find_base_term (rtx x)
10812 rtx term;
10814 if (TARGET_64BIT)
10816 if (GET_CODE (x) != CONST)
10817 return x;
10818 term = XEXP (x, 0);
10819 if (GET_CODE (term) == PLUS
10820 && (CONST_INT_P (XEXP (term, 1))
10821 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10822 term = XEXP (term, 0);
10823 if (GET_CODE (term) != UNSPEC
10824 || XINT (term, 1) != UNSPEC_GOTPCREL)
10825 return x;
10827 return XVECEXP (term, 0, 0);
10830 return ix86_delegitimize_address (x);
10833 static void
10834 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10835 int fp, FILE *file)
10837 const char *suffix;
10839 if (mode == CCFPmode || mode == CCFPUmode)
10841 code = ix86_fp_compare_code_to_integer (code);
10842 mode = CCmode;
10844 if (reverse)
10845 code = reverse_condition (code);
10847 switch (code)
10849 case EQ:
10850 switch (mode)
10852 case CCAmode:
10853 suffix = "a";
10854 break;
10856 case CCCmode:
10857 suffix = "c";
10858 break;
10860 case CCOmode:
10861 suffix = "o";
10862 break;
10864 case CCSmode:
10865 suffix = "s";
10866 break;
10868 default:
10869 suffix = "e";
10871 break;
10872 case NE:
10873 switch (mode)
10875 case CCAmode:
10876 suffix = "na";
10877 break;
10879 case CCCmode:
10880 suffix = "nc";
10881 break;
10883 case CCOmode:
10884 suffix = "no";
10885 break;
10887 case CCSmode:
10888 suffix = "ns";
10889 break;
10891 default:
10892 suffix = "ne";
10894 break;
10895 case GT:
10896 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10897 suffix = "g";
10898 break;
10899 case GTU:
10900 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10901 Those same assemblers have the same but opposite lossage on cmov. */
10902 if (mode == CCmode)
10903 suffix = fp ? "nbe" : "a";
10904 else if (mode == CCCmode)
10905 suffix = "b";
10906 else
10907 gcc_unreachable ();
10908 break;
10909 case LT:
10910 switch (mode)
10912 case CCNOmode:
10913 case CCGOCmode:
10914 suffix = "s";
10915 break;
10917 case CCmode:
10918 case CCGCmode:
10919 suffix = "l";
10920 break;
10922 default:
10923 gcc_unreachable ();
10925 break;
10926 case LTU:
10927 gcc_assert (mode == CCmode || mode == CCCmode);
10928 suffix = "b";
10929 break;
10930 case GE:
10931 switch (mode)
10933 case CCNOmode:
10934 case CCGOCmode:
10935 suffix = "ns";
10936 break;
10938 case CCmode:
10939 case CCGCmode:
10940 suffix = "ge";
10941 break;
10943 default:
10944 gcc_unreachable ();
10946 break;
10947 case GEU:
10948 /* ??? As above. */
10949 gcc_assert (mode == CCmode || mode == CCCmode);
10950 suffix = fp ? "nb" : "ae";
10951 break;
10952 case LE:
10953 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10954 suffix = "le";
10955 break;
10956 case LEU:
10957 /* ??? As above. */
10958 if (mode == CCmode)
10959 suffix = "be";
10960 else if (mode == CCCmode)
10961 suffix = fp ? "nb" : "ae";
10962 else
10963 gcc_unreachable ();
10964 break;
10965 case UNORDERED:
10966 suffix = fp ? "u" : "p";
10967 break;
10968 case ORDERED:
10969 suffix = fp ? "nu" : "np";
10970 break;
10971 default:
10972 gcc_unreachable ();
10974 fputs (suffix, file);
10977 /* Print the name of register X to FILE based on its machine mode and number.
10978 If CODE is 'w', pretend the mode is HImode.
10979 If CODE is 'b', pretend the mode is QImode.
10980 If CODE is 'k', pretend the mode is SImode.
10981 If CODE is 'q', pretend the mode is DImode.
10982 If CODE is 'x', pretend the mode is V4SFmode.
10983 If CODE is 't', pretend the mode is V8SFmode.
10984 If CODE is 'h', pretend the reg is the 'high' byte register.
10985 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10986 If CODE is 'd', duplicate the operand for AVX instruction.
10989 void
10990 print_reg (rtx x, int code, FILE *file)
10992 const char *reg;
10993 bool duplicated = code == 'd' && TARGET_AVX;
10995 gcc_assert (x == pc_rtx
10996 || (REGNO (x) != ARG_POINTER_REGNUM
10997 && REGNO (x) != FRAME_POINTER_REGNUM
10998 && REGNO (x) != FLAGS_REG
10999 && REGNO (x) != FPSR_REG
11000 && REGNO (x) != FPCR_REG));
11002 if (ASSEMBLER_DIALECT == ASM_ATT)
11003 putc ('%', file);
11005 if (x == pc_rtx)
11007 gcc_assert (TARGET_64BIT);
11008 fputs ("rip", file);
11009 return;
11012 if (code == 'w' || MMX_REG_P (x))
11013 code = 2;
11014 else if (code == 'b')
11015 code = 1;
11016 else if (code == 'k')
11017 code = 4;
11018 else if (code == 'q')
11019 code = 8;
11020 else if (code == 'y')
11021 code = 3;
11022 else if (code == 'h')
11023 code = 0;
11024 else if (code == 'x')
11025 code = 16;
11026 else if (code == 't')
11027 code = 32;
11028 else
11029 code = GET_MODE_SIZE (GET_MODE (x));
11031 /* Irritatingly, AMD extended registers use different naming convention
11032 from the normal registers. */
11033 if (REX_INT_REG_P (x))
11035 gcc_assert (TARGET_64BIT);
11036 switch (code)
11038 case 0:
11039 error ("extended registers have no high halves");
11040 break;
11041 case 1:
11042 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
11043 break;
11044 case 2:
11045 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
11046 break;
11047 case 4:
11048 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
11049 break;
11050 case 8:
11051 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
11052 break;
11053 default:
11054 error ("unsupported operand size for extended register");
11055 break;
11057 return;
11060 reg = NULL;
11061 switch (code)
11063 case 3:
11064 if (STACK_TOP_P (x))
11066 reg = "st(0)";
11067 break;
11069 /* FALLTHRU */
11070 case 8:
11071 case 4:
11072 case 12:
11073 if (! ANY_FP_REG_P (x))
11074 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
11075 /* FALLTHRU */
11076 case 16:
11077 case 2:
11078 normal:
11079 reg = hi_reg_name[REGNO (x)];
11080 break;
11081 case 1:
11082 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
11083 goto normal;
11084 reg = qi_reg_name[REGNO (x)];
11085 break;
11086 case 0:
11087 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
11088 goto normal;
11089 reg = qi_high_reg_name[REGNO (x)];
11090 break;
11091 case 32:
11092 if (SSE_REG_P (x))
11094 gcc_assert (!duplicated);
11095 putc ('y', file);
11096 fputs (hi_reg_name[REGNO (x)] + 1, file);
11097 return;
11099 break;
11100 default:
11101 gcc_unreachable ();
11104 fputs (reg, file);
11105 if (duplicated)
11107 if (ASSEMBLER_DIALECT == ASM_ATT)
11108 fprintf (file, ", %%%s", reg);
11109 else
11110 fprintf (file, ", %s", reg);
11114 /* Locate some local-dynamic symbol still in use by this function
11115 so that we can print its name in some tls_local_dynamic_base
11116 pattern. */
11118 static int
11119 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11121 rtx x = *px;
11123 if (GET_CODE (x) == SYMBOL_REF
11124 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11126 cfun->machine->some_ld_name = XSTR (x, 0);
11127 return 1;
11130 return 0;
11133 static const char *
11134 get_some_local_dynamic_name (void)
11136 rtx insn;
11138 if (cfun->machine->some_ld_name)
11139 return cfun->machine->some_ld_name;
11141 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11142 if (INSN_P (insn)
11143 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11144 return cfun->machine->some_ld_name;
11146 gcc_unreachable ();
11149 /* Meaning of CODE:
11150 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11151 C -- print opcode suffix for set/cmov insn.
11152 c -- like C, but print reversed condition
11153 E,e -- likewise, but for compare-and-branch fused insn.
11154 F,f -- likewise, but for floating-point.
11155 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11156 otherwise nothing
11157 R -- print the prefix for register names.
11158 z -- print the opcode suffix for the size of the current operand.
11159 Z -- likewise, with special suffixes for x87 instructions.
11160 * -- print a star (in certain assembler syntax)
11161 A -- print an absolute memory reference.
11162 w -- print the operand as if it's a "word" (HImode) even if it isn't.
11163 s -- print a shift double count, followed by the assemblers argument
11164 delimiter.
11165 b -- print the QImode name of the register for the indicated operand.
11166 %b0 would print %al if operands[0] is reg 0.
11167 w -- likewise, print the HImode name of the register.
11168 k -- likewise, print the SImode name of the register.
11169 q -- likewise, print the DImode name of the register.
11170 x -- likewise, print the V4SFmode name of the register.
11171 t -- likewise, print the V8SFmode name of the register.
11172 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11173 y -- print "st(0)" instead of "st" as a register.
11174 d -- print duplicated register operand for AVX instruction.
11175 D -- print condition for SSE cmp instruction.
11176 P -- if PIC, print an @PLT suffix.
11177 X -- don't print any sort of PIC '@' suffix for a symbol.
11178 & -- print some in-use local-dynamic symbol name.
11179 H -- print a memory address offset by 8; used for sse high-parts
11180 Y -- print condition for SSE5 com* instruction.
11181 + -- print a branch hint as 'cs' or 'ds' prefix
11182 ; -- print a semicolon (after prefixes due to bug in older gas).
11185 void
11186 print_operand (FILE *file, rtx x, int code)
11188 if (code)
11190 switch (code)
11192 case '*':
11193 if (ASSEMBLER_DIALECT == ASM_ATT)
11194 putc ('*', file);
11195 return;
11197 case '&':
11198 assemble_name (file, get_some_local_dynamic_name ());
11199 return;
11201 case 'A':
11202 switch (ASSEMBLER_DIALECT)
11204 case ASM_ATT:
11205 putc ('*', file);
11206 break;
11208 case ASM_INTEL:
11209 /* Intel syntax. For absolute addresses, registers should not
11210 be surrounded by braces. */
11211 if (!REG_P (x))
11213 putc ('[', file);
11214 PRINT_OPERAND (file, x, 0);
11215 putc (']', file);
11216 return;
11218 break;
11220 default:
11221 gcc_unreachable ();
11224 PRINT_OPERAND (file, x, 0);
11225 return;
11228 case 'L':
11229 if (ASSEMBLER_DIALECT == ASM_ATT)
11230 putc ('l', file);
11231 return;
11233 case 'W':
11234 if (ASSEMBLER_DIALECT == ASM_ATT)
11235 putc ('w', file);
11236 return;
11238 case 'B':
11239 if (ASSEMBLER_DIALECT == ASM_ATT)
11240 putc ('b', file);
11241 return;
11243 case 'Q':
11244 if (ASSEMBLER_DIALECT == ASM_ATT)
11245 putc ('l', file);
11246 return;
11248 case 'S':
11249 if (ASSEMBLER_DIALECT == ASM_ATT)
11250 putc ('s', file);
11251 return;
11253 case 'T':
11254 if (ASSEMBLER_DIALECT == ASM_ATT)
11255 putc ('t', file);
11256 return;
11258 case 'z':
11259 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11261 /* Opcodes don't get size suffixes if using Intel opcodes. */
11262 if (ASSEMBLER_DIALECT == ASM_INTEL)
11263 return;
11265 switch (GET_MODE_SIZE (GET_MODE (x)))
11267 case 1:
11268 putc ('b', file);
11269 return;
11271 case 2:
11272 putc ('w', file);
11273 return;
11275 case 4:
11276 putc ('l', file);
11277 return;
11279 case 8:
11280 putc ('q', file);
11281 return;
11283 default:
11284 output_operand_lossage
11285 ("invalid operand size for operand code '%c'", code);
11286 return;
11290 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11291 warning
11292 (0, "non-integer operand used with operand code '%c'", code);
11293 /* FALLTHRU */
11295 case 'Z':
11296 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11297 if (ASSEMBLER_DIALECT == ASM_INTEL)
11298 return;
11300 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11302 switch (GET_MODE_SIZE (GET_MODE (x)))
11304 case 2:
11305 #ifdef HAVE_AS_IX86_FILDS
11306 putc ('s', file);
11307 #endif
11308 return;
11310 case 4:
11311 putc ('l', file);
11312 return;
11314 case 8:
11315 #ifdef HAVE_AS_IX86_FILDQ
11316 putc ('q', file);
11317 #else
11318 fputs ("ll", file);
11319 #endif
11320 return;
11322 default:
11323 break;
11326 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11328 /* 387 opcodes don't get size suffixes
11329 if the operands are registers. */
11330 if (STACK_REG_P (x))
11331 return;
11333 switch (GET_MODE_SIZE (GET_MODE (x)))
11335 case 4:
11336 putc ('s', file);
11337 return;
11339 case 8:
11340 putc ('l', file);
11341 return;
11343 case 12:
11344 case 16:
11345 putc ('t', file);
11346 return;
11348 default:
11349 break;
11352 else
11354 output_operand_lossage
11355 ("invalid operand type used with operand code '%c'", code);
11356 return;
11359 output_operand_lossage
11360 ("invalid operand size for operand code '%c'", code);
11361 return;
11363 case 'd':
11364 case 'b':
11365 case 'w':
11366 case 'k':
11367 case 'q':
11368 case 'h':
11369 case 't':
11370 case 'y':
11371 case 'x':
11372 case 'X':
11373 case 'P':
11374 break;
11376 case 's':
11377 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11379 PRINT_OPERAND (file, x, 0);
11380 fputs (", ", file);
11382 return;
11384 case 'D':
11385 /* Little bit of braindamage here. The SSE compare instructions
11386 does use completely different names for the comparisons that the
11387 fp conditional moves. */
11388 if (TARGET_AVX)
11390 switch (GET_CODE (x))
11392 case EQ:
11393 fputs ("eq", file);
11394 break;
11395 case UNEQ:
11396 fputs ("eq_us", file);
11397 break;
11398 case LT:
11399 fputs ("lt", file);
11400 break;
11401 case UNLT:
11402 fputs ("nge", file);
11403 break;
11404 case LE:
11405 fputs ("le", file);
11406 break;
11407 case UNLE:
11408 fputs ("ngt", file);
11409 break;
11410 case UNORDERED:
11411 fputs ("unord", file);
11412 break;
11413 case NE:
11414 fputs ("neq", file);
11415 break;
11416 case LTGT:
11417 fputs ("neq_oq", file);
11418 break;
11419 case GE:
11420 fputs ("ge", file);
11421 break;
11422 case UNGE:
11423 fputs ("nlt", file);
11424 break;
11425 case GT:
11426 fputs ("gt", file);
11427 break;
11428 case UNGT:
11429 fputs ("nle", file);
11430 break;
11431 case ORDERED:
11432 fputs ("ord", file);
11433 break;
11434 default:
11435 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11436 return;
11439 else
11441 switch (GET_CODE (x))
11443 case EQ:
11444 case UNEQ:
11445 fputs ("eq", file);
11446 break;
11447 case LT:
11448 case UNLT:
11449 fputs ("lt", file);
11450 break;
11451 case LE:
11452 case UNLE:
11453 fputs ("le", file);
11454 break;
11455 case UNORDERED:
11456 fputs ("unord", file);
11457 break;
11458 case NE:
11459 case LTGT:
11460 fputs ("neq", file);
11461 break;
11462 case UNGE:
11463 case GE:
11464 fputs ("nlt", file);
11465 break;
11466 case UNGT:
11467 case GT:
11468 fputs ("nle", file);
11469 break;
11470 case ORDERED:
11471 fputs ("ord", file);
11472 break;
11473 default:
11474 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11475 return;
11478 return;
11479 case 'O':
11480 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11481 if (ASSEMBLER_DIALECT == ASM_ATT)
11483 switch (GET_MODE (x))
11485 case HImode: putc ('w', file); break;
11486 case SImode:
11487 case SFmode: putc ('l', file); break;
11488 case DImode:
11489 case DFmode: putc ('q', file); break;
11490 default: gcc_unreachable ();
11492 putc ('.', file);
11494 #endif
11495 return;
11496 case 'C':
11497 if (!COMPARISON_P (x))
11499 output_operand_lossage ("operand is neither a constant nor a "
11500 "condition code, invalid operand code "
11501 "'C'");
11502 return;
11504 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11505 return;
11506 case 'F':
11507 if (!COMPARISON_P (x))
11509 output_operand_lossage ("operand is neither a constant nor a "
11510 "condition code, invalid operand code "
11511 "'F'");
11512 return;
11514 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11515 if (ASSEMBLER_DIALECT == ASM_ATT)
11516 putc ('.', file);
11517 #endif
11518 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11519 return;
11521 /* Like above, but reverse condition */
11522 case 'c':
11523 /* Check to see if argument to %c is really a constant
11524 and not a condition code which needs to be reversed. */
11525 if (!COMPARISON_P (x))
11527 output_operand_lossage ("operand is neither a constant nor a "
11528 "condition code, invalid operand "
11529 "code 'c'");
11530 return;
11532 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11533 return;
11534 case 'f':
11535 if (!COMPARISON_P (x))
11537 output_operand_lossage ("operand is neither a constant nor a "
11538 "condition code, invalid operand "
11539 "code 'f'");
11540 return;
11542 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11543 if (ASSEMBLER_DIALECT == ASM_ATT)
11544 putc ('.', file);
11545 #endif
11546 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11547 return;
11549 case 'E':
11550 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11551 return;
11553 case 'e':
11554 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11555 return;
11557 case 'H':
11558 /* It doesn't actually matter what mode we use here, as we're
11559 only going to use this for printing. */
11560 x = adjust_address_nv (x, DImode, 8);
11561 break;
11563 case '+':
11565 rtx x;
11567 if (!optimize
11568 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11569 return;
11571 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11572 if (x)
11574 int pred_val = INTVAL (XEXP (x, 0));
11576 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11577 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11579 int taken = pred_val > REG_BR_PROB_BASE / 2;
11580 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11582 /* Emit hints only in the case default branch prediction
11583 heuristics would fail. */
11584 if (taken != cputaken)
11586 /* We use 3e (DS) prefix for taken branches and
11587 2e (CS) prefix for not taken branches. */
11588 if (taken)
11589 fputs ("ds ; ", file);
11590 else
11591 fputs ("cs ; ", file);
11595 return;
11598 case 'Y':
11599 switch (GET_CODE (x))
11601 case NE:
11602 fputs ("neq", file);
11603 break;
11604 case EQ:
11605 fputs ("eq", file);
11606 break;
11607 case GE:
11608 case GEU:
11609 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11610 break;
11611 case GT:
11612 case GTU:
11613 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11614 break;
11615 case LE:
11616 case LEU:
11617 fputs ("le", file);
11618 break;
11619 case LT:
11620 case LTU:
11621 fputs ("lt", file);
11622 break;
11623 case UNORDERED:
11624 fputs ("unord", file);
11625 break;
11626 case ORDERED:
11627 fputs ("ord", file);
11628 break;
11629 case UNEQ:
11630 fputs ("ueq", file);
11631 break;
11632 case UNGE:
11633 fputs ("nlt", file);
11634 break;
11635 case UNGT:
11636 fputs ("nle", file);
11637 break;
11638 case UNLE:
11639 fputs ("ule", file);
11640 break;
11641 case UNLT:
11642 fputs ("ult", file);
11643 break;
11644 case LTGT:
11645 fputs ("une", file);
11646 break;
11647 default:
11648 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11649 return;
11651 return;
11653 case ';':
11654 #if TARGET_MACHO
11655 fputs (" ; ", file);
11656 #else
11657 fputc (' ', file);
11658 #endif
11659 return;
11661 default:
11662 output_operand_lossage ("invalid operand code '%c'", code);
11666 if (REG_P (x))
11667 print_reg (x, code, file);
11669 else if (MEM_P (x))
11671 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11672 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11673 && GET_MODE (x) != BLKmode)
11675 const char * size;
11676 switch (GET_MODE_SIZE (GET_MODE (x)))
11678 case 1: size = "BYTE"; break;
11679 case 2: size = "WORD"; break;
11680 case 4: size = "DWORD"; break;
11681 case 8: size = "QWORD"; break;
11682 case 12: size = "XWORD"; break;
11683 case 16:
11684 if (GET_MODE (x) == XFmode)
11685 size = "XWORD";
11686 else
11687 size = "XMMWORD";
11688 break;
11689 default:
11690 gcc_unreachable ();
11693 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11694 if (code == 'b')
11695 size = "BYTE";
11696 else if (code == 'w')
11697 size = "WORD";
11698 else if (code == 'k')
11699 size = "DWORD";
11701 fputs (size, file);
11702 fputs (" PTR ", file);
11705 x = XEXP (x, 0);
11706 /* Avoid (%rip) for call operands. */
11707 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11708 && !CONST_INT_P (x))
11709 output_addr_const (file, x);
11710 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11711 output_operand_lossage ("invalid constraints for operand");
11712 else
11713 output_address (x);
11716 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11718 REAL_VALUE_TYPE r;
11719 long l;
11721 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11722 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11724 if (ASSEMBLER_DIALECT == ASM_ATT)
11725 putc ('$', file);
11726 fprintf (file, "0x%08lx", (long unsigned int) l);
11729 /* These float cases don't actually occur as immediate operands. */
11730 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11732 char dstr[30];
11734 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11735 fprintf (file, "%s", dstr);
11738 else if (GET_CODE (x) == CONST_DOUBLE
11739 && GET_MODE (x) == XFmode)
11741 char dstr[30];
11743 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11744 fprintf (file, "%s", dstr);
11747 else
11749 /* We have patterns that allow zero sets of memory, for instance.
11750 In 64-bit mode, we should probably support all 8-byte vectors,
11751 since we can in fact encode that into an immediate. */
11752 if (GET_CODE (x) == CONST_VECTOR)
11754 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11755 x = const0_rtx;
11758 if (code != 'P')
11760 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11762 if (ASSEMBLER_DIALECT == ASM_ATT)
11763 putc ('$', file);
11765 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11766 || GET_CODE (x) == LABEL_REF)
11768 if (ASSEMBLER_DIALECT == ASM_ATT)
11769 putc ('$', file);
11770 else
11771 fputs ("OFFSET FLAT:", file);
11774 if (CONST_INT_P (x))
11775 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11776 else if (flag_pic)
11777 output_pic_addr_const (file, x, code);
11778 else
11779 output_addr_const (file, x);
11783 /* Print a memory operand whose address is ADDR. */
11785 void
11786 print_operand_address (FILE *file, rtx addr)
11788 struct ix86_address parts;
11789 rtx base, index, disp;
11790 int scale;
11791 int ok = ix86_decompose_address (addr, &parts);
11793 gcc_assert (ok);
11795 base = parts.base;
11796 index = parts.index;
11797 disp = parts.disp;
11798 scale = parts.scale;
11800 switch (parts.seg)
11802 case SEG_DEFAULT:
11803 break;
11804 case SEG_FS:
11805 case SEG_GS:
11806 if (ASSEMBLER_DIALECT == ASM_ATT)
11807 putc ('%', file);
11808 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11809 break;
11810 default:
11811 gcc_unreachable ();
11814 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11815 if (TARGET_64BIT && !base && !index)
11817 rtx symbol = disp;
11819 if (GET_CODE (disp) == CONST
11820 && GET_CODE (XEXP (disp, 0)) == PLUS
11821 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11822 symbol = XEXP (XEXP (disp, 0), 0);
11824 if (GET_CODE (symbol) == LABEL_REF
11825 || (GET_CODE (symbol) == SYMBOL_REF
11826 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11827 base = pc_rtx;
11829 if (!base && !index)
11831 /* Displacement only requires special attention. */
11833 if (CONST_INT_P (disp))
11835 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11836 fputs ("ds:", file);
11837 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11839 else if (flag_pic)
11840 output_pic_addr_const (file, disp, 0);
11841 else
11842 output_addr_const (file, disp);
11844 else
11846 if (ASSEMBLER_DIALECT == ASM_ATT)
11848 if (disp)
11850 if (flag_pic)
11851 output_pic_addr_const (file, disp, 0);
11852 else if (GET_CODE (disp) == LABEL_REF)
11853 output_asm_label (disp);
11854 else
11855 output_addr_const (file, disp);
11858 putc ('(', file);
11859 if (base)
11860 print_reg (base, 0, file);
11861 if (index)
11863 putc (',', file);
11864 print_reg (index, 0, file);
11865 if (scale != 1)
11866 fprintf (file, ",%d", scale);
11868 putc (')', file);
11870 else
11872 rtx offset = NULL_RTX;
11874 if (disp)
11876 /* Pull out the offset of a symbol; print any symbol itself. */
11877 if (GET_CODE (disp) == CONST
11878 && GET_CODE (XEXP (disp, 0)) == PLUS
11879 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11881 offset = XEXP (XEXP (disp, 0), 1);
11882 disp = gen_rtx_CONST (VOIDmode,
11883 XEXP (XEXP (disp, 0), 0));
11886 if (flag_pic)
11887 output_pic_addr_const (file, disp, 0);
11888 else if (GET_CODE (disp) == LABEL_REF)
11889 output_asm_label (disp);
11890 else if (CONST_INT_P (disp))
11891 offset = disp;
11892 else
11893 output_addr_const (file, disp);
11896 putc ('[', file);
11897 if (base)
11899 print_reg (base, 0, file);
11900 if (offset)
11902 if (INTVAL (offset) >= 0)
11903 putc ('+', file);
11904 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11907 else if (offset)
11908 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11909 else
11910 putc ('0', file);
11912 if (index)
11914 putc ('+', file);
11915 print_reg (index, 0, file);
11916 if (scale != 1)
11917 fprintf (file, "*%d", scale);
11919 putc (']', file);
11924 bool
11925 output_addr_const_extra (FILE *file, rtx x)
11927 rtx op;
11929 if (GET_CODE (x) != UNSPEC)
11930 return false;
11932 op = XVECEXP (x, 0, 0);
11933 switch (XINT (x, 1))
11935 case UNSPEC_GOTTPOFF:
11936 output_addr_const (file, op);
11937 /* FIXME: This might be @TPOFF in Sun ld. */
11938 fputs ("@GOTTPOFF", file);
11939 break;
11940 case UNSPEC_TPOFF:
11941 output_addr_const (file, op);
11942 fputs ("@TPOFF", file);
11943 break;
11944 case UNSPEC_NTPOFF:
11945 output_addr_const (file, op);
11946 if (TARGET_64BIT)
11947 fputs ("@TPOFF", file);
11948 else
11949 fputs ("@NTPOFF", file);
11950 break;
11951 case UNSPEC_DTPOFF:
11952 output_addr_const (file, op);
11953 fputs ("@DTPOFF", file);
11954 break;
11955 case UNSPEC_GOTNTPOFF:
11956 output_addr_const (file, op);
11957 if (TARGET_64BIT)
11958 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11959 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11960 else
11961 fputs ("@GOTNTPOFF", file);
11962 break;
11963 case UNSPEC_INDNTPOFF:
11964 output_addr_const (file, op);
11965 fputs ("@INDNTPOFF", file);
11966 break;
11967 #if TARGET_MACHO
11968 case UNSPEC_MACHOPIC_OFFSET:
11969 output_addr_const (file, op);
11970 putc ('-', file);
11971 machopic_output_function_base_name (file);
11972 break;
11973 #endif
11975 default:
11976 return false;
11979 return true;
11982 /* Split one or more DImode RTL references into pairs of SImode
11983 references. The RTL can be REG, offsettable MEM, integer constant, or
11984 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11985 split and "num" is its length. lo_half and hi_half are output arrays
11986 that parallel "operands". */
11988 void
11989 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11991 while (num--)
11993 rtx op = operands[num];
11995 /* simplify_subreg refuse to split volatile memory addresses,
11996 but we still have to handle it. */
11997 if (MEM_P (op))
11999 lo_half[num] = adjust_address (op, SImode, 0);
12000 hi_half[num] = adjust_address (op, SImode, 4);
12002 else
12004 lo_half[num] = simplify_gen_subreg (SImode, op,
12005 GET_MODE (op) == VOIDmode
12006 ? DImode : GET_MODE (op), 0);
12007 hi_half[num] = simplify_gen_subreg (SImode, op,
12008 GET_MODE (op) == VOIDmode
12009 ? DImode : GET_MODE (op), 4);
12013 /* Split one or more TImode RTL references into pairs of DImode
12014 references. The RTL can be REG, offsettable MEM, integer constant, or
12015 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12016 split and "num" is its length. lo_half and hi_half are output arrays
12017 that parallel "operands". */
12019 void
12020 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12022 while (num--)
12024 rtx op = operands[num];
12026 /* simplify_subreg refuse to split volatile memory addresses, but we
12027 still have to handle it. */
12028 if (MEM_P (op))
12030 lo_half[num] = adjust_address (op, DImode, 0);
12031 hi_half[num] = adjust_address (op, DImode, 8);
12033 else
12035 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
12036 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
12041 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
12042 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
12043 is the expression of the binary operation. The output may either be
12044 emitted here, or returned to the caller, like all output_* functions.
12046 There is no guarantee that the operands are the same mode, as they
12047 might be within FLOAT or FLOAT_EXTEND expressions. */
12049 #ifndef SYSV386_COMPAT
12050 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
12051 wants to fix the assemblers because that causes incompatibility
12052 with gcc. No-one wants to fix gcc because that causes
12053 incompatibility with assemblers... You can use the option of
12054 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
12055 #define SYSV386_COMPAT 1
12056 #endif
12058 const char *
12059 output_387_binary_op (rtx insn, rtx *operands)
12061 static char buf[40];
12062 const char *p;
12063 const char *ssep;
12064 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
12066 #ifdef ENABLE_CHECKING
12067 /* Even if we do not want to check the inputs, this documents input
12068 constraints. Which helps in understanding the following code. */
12069 if (STACK_REG_P (operands[0])
12070 && ((REG_P (operands[1])
12071 && REGNO (operands[0]) == REGNO (operands[1])
12072 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
12073 || (REG_P (operands[2])
12074 && REGNO (operands[0]) == REGNO (operands[2])
12075 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
12076 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
12077 ; /* ok */
12078 else
12079 gcc_assert (is_sse);
12080 #endif
12082 switch (GET_CODE (operands[3]))
12084 case PLUS:
12085 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12086 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12087 p = "fiadd";
12088 else
12089 p = "fadd";
12090 ssep = "vadd";
12091 break;
12093 case MINUS:
12094 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12095 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12096 p = "fisub";
12097 else
12098 p = "fsub";
12099 ssep = "vsub";
12100 break;
12102 case MULT:
12103 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12104 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12105 p = "fimul";
12106 else
12107 p = "fmul";
12108 ssep = "vmul";
12109 break;
12111 case DIV:
12112 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12113 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12114 p = "fidiv";
12115 else
12116 p = "fdiv";
12117 ssep = "vdiv";
12118 break;
12120 default:
12121 gcc_unreachable ();
12124 if (is_sse)
12126 if (TARGET_AVX)
12128 strcpy (buf, ssep);
12129 if (GET_MODE (operands[0]) == SFmode)
12130 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12131 else
12132 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12134 else
12136 strcpy (buf, ssep + 1);
12137 if (GET_MODE (operands[0]) == SFmode)
12138 strcat (buf, "ss\t{%2, %0|%0, %2}");
12139 else
12140 strcat (buf, "sd\t{%2, %0|%0, %2}");
12142 return buf;
12144 strcpy (buf, p);
12146 switch (GET_CODE (operands[3]))
12148 case MULT:
12149 case PLUS:
12150 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12152 rtx temp = operands[2];
12153 operands[2] = operands[1];
12154 operands[1] = temp;
12157 /* know operands[0] == operands[1]. */
12159 if (MEM_P (operands[2]))
12161 p = "%Z2\t%2";
12162 break;
12165 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12167 if (STACK_TOP_P (operands[0]))
12168 /* How is it that we are storing to a dead operand[2]?
12169 Well, presumably operands[1] is dead too. We can't
12170 store the result to st(0) as st(0) gets popped on this
12171 instruction. Instead store to operands[2] (which I
12172 think has to be st(1)). st(1) will be popped later.
12173 gcc <= 2.8.1 didn't have this check and generated
12174 assembly code that the Unixware assembler rejected. */
12175 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12176 else
12177 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12178 break;
12181 if (STACK_TOP_P (operands[0]))
12182 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12183 else
12184 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12185 break;
12187 case MINUS:
12188 case DIV:
12189 if (MEM_P (operands[1]))
12191 p = "r%Z1\t%1";
12192 break;
12195 if (MEM_P (operands[2]))
12197 p = "%Z2\t%2";
12198 break;
12201 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12203 #if SYSV386_COMPAT
12204 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12205 derived assemblers, confusingly reverse the direction of
12206 the operation for fsub{r} and fdiv{r} when the
12207 destination register is not st(0). The Intel assembler
12208 doesn't have this brain damage. Read !SYSV386_COMPAT to
12209 figure out what the hardware really does. */
12210 if (STACK_TOP_P (operands[0]))
12211 p = "{p\t%0, %2|rp\t%2, %0}";
12212 else
12213 p = "{rp\t%2, %0|p\t%0, %2}";
12214 #else
12215 if (STACK_TOP_P (operands[0]))
12216 /* As above for fmul/fadd, we can't store to st(0). */
12217 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12218 else
12219 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12220 #endif
12221 break;
12224 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12226 #if SYSV386_COMPAT
12227 if (STACK_TOP_P (operands[0]))
12228 p = "{rp\t%0, %1|p\t%1, %0}";
12229 else
12230 p = "{p\t%1, %0|rp\t%0, %1}";
12231 #else
12232 if (STACK_TOP_P (operands[0]))
12233 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
12234 else
12235 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
12236 #endif
12237 break;
12240 if (STACK_TOP_P (operands[0]))
12242 if (STACK_TOP_P (operands[1]))
12243 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12244 else
12245 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12246 break;
12248 else if (STACK_TOP_P (operands[1]))
12250 #if SYSV386_COMPAT
12251 p = "{\t%1, %0|r\t%0, %1}";
12252 #else
12253 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
12254 #endif
12256 else
12258 #if SYSV386_COMPAT
12259 p = "{r\t%2, %0|\t%0, %2}";
12260 #else
12261 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12262 #endif
12264 break;
12266 default:
12267 gcc_unreachable ();
12270 strcat (buf, p);
12271 return buf;
12274 /* Return needed mode for entity in optimize_mode_switching pass. */
12277 ix86_mode_needed (int entity, rtx insn)
12279 enum attr_i387_cw mode;
12281 /* The mode UNINITIALIZED is used to store control word after a
12282 function call or ASM pattern. The mode ANY specify that function
12283 has no requirements on the control word and make no changes in the
12284 bits we are interested in. */
12286 if (CALL_P (insn)
12287 || (NONJUMP_INSN_P (insn)
12288 && (asm_noperands (PATTERN (insn)) >= 0
12289 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12290 return I387_CW_UNINITIALIZED;
12292 if (recog_memoized (insn) < 0)
12293 return I387_CW_ANY;
12295 mode = get_attr_i387_cw (insn);
12297 switch (entity)
12299 case I387_TRUNC:
12300 if (mode == I387_CW_TRUNC)
12301 return mode;
12302 break;
12304 case I387_FLOOR:
12305 if (mode == I387_CW_FLOOR)
12306 return mode;
12307 break;
12309 case I387_CEIL:
12310 if (mode == I387_CW_CEIL)
12311 return mode;
12312 break;
12314 case I387_MASK_PM:
12315 if (mode == I387_CW_MASK_PM)
12316 return mode;
12317 break;
12319 default:
12320 gcc_unreachable ();
12323 return I387_CW_ANY;
12326 /* Output code to initialize control word copies used by trunc?f?i and
12327 rounding patterns. CURRENT_MODE is set to current control word,
12328 while NEW_MODE is set to new control word. */
12330 void
12331 emit_i387_cw_initialization (int mode)
12333 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12334 rtx new_mode;
12336 enum ix86_stack_slot slot;
12338 rtx reg = gen_reg_rtx (HImode);
12340 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12341 emit_move_insn (reg, copy_rtx (stored_mode));
12343 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12344 || optimize_function_for_size_p (cfun))
12346 switch (mode)
12348 case I387_CW_TRUNC:
12349 /* round toward zero (truncate) */
12350 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12351 slot = SLOT_CW_TRUNC;
12352 break;
12354 case I387_CW_FLOOR:
12355 /* round down toward -oo */
12356 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12357 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12358 slot = SLOT_CW_FLOOR;
12359 break;
12361 case I387_CW_CEIL:
12362 /* round up toward +oo */
12363 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12364 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12365 slot = SLOT_CW_CEIL;
12366 break;
12368 case I387_CW_MASK_PM:
12369 /* mask precision exception for nearbyint() */
12370 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12371 slot = SLOT_CW_MASK_PM;
12372 break;
12374 default:
12375 gcc_unreachable ();
12378 else
12380 switch (mode)
12382 case I387_CW_TRUNC:
12383 /* round toward zero (truncate) */
12384 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12385 slot = SLOT_CW_TRUNC;
12386 break;
12388 case I387_CW_FLOOR:
12389 /* round down toward -oo */
12390 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12391 slot = SLOT_CW_FLOOR;
12392 break;
12394 case I387_CW_CEIL:
12395 /* round up toward +oo */
12396 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12397 slot = SLOT_CW_CEIL;
12398 break;
12400 case I387_CW_MASK_PM:
12401 /* mask precision exception for nearbyint() */
12402 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12403 slot = SLOT_CW_MASK_PM;
12404 break;
12406 default:
12407 gcc_unreachable ();
12411 gcc_assert (slot < MAX_386_STACK_LOCALS);
12413 new_mode = assign_386_stack_local (HImode, slot);
12414 emit_move_insn (new_mode, reg);
12417 /* Output code for INSN to convert a float to a signed int. OPERANDS
12418 are the insn operands. The output may be [HSD]Imode and the input
12419 operand may be [SDX]Fmode. */
12421 const char *
12422 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12424 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12425 int dimode_p = GET_MODE (operands[0]) == DImode;
12426 int round_mode = get_attr_i387_cw (insn);
12428 /* Jump through a hoop or two for DImode, since the hardware has no
12429 non-popping instruction. We used to do this a different way, but
12430 that was somewhat fragile and broke with post-reload splitters. */
12431 if ((dimode_p || fisttp) && !stack_top_dies)
12432 output_asm_insn ("fld\t%y1", operands);
12434 gcc_assert (STACK_TOP_P (operands[1]));
12435 gcc_assert (MEM_P (operands[0]));
12436 gcc_assert (GET_MODE (operands[1]) != TFmode);
12438 if (fisttp)
12439 output_asm_insn ("fisttp%Z0\t%0", operands);
12440 else
12442 if (round_mode != I387_CW_ANY)
12443 output_asm_insn ("fldcw\t%3", operands);
12444 if (stack_top_dies || dimode_p)
12445 output_asm_insn ("fistp%Z0\t%0", operands);
12446 else
12447 output_asm_insn ("fist%Z0\t%0", operands);
12448 if (round_mode != I387_CW_ANY)
12449 output_asm_insn ("fldcw\t%2", operands);
12452 return "";
12455 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12456 have the values zero or one, indicates the ffreep insn's operand
12457 from the OPERANDS array. */
12459 static const char *
12460 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12462 if (TARGET_USE_FFREEP)
12463 #if HAVE_AS_IX86_FFREEP
12464 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12465 #else
12467 static char retval[] = ".word\t0xc_df";
12468 int regno = REGNO (operands[opno]);
12470 gcc_assert (FP_REGNO_P (regno));
12472 retval[9] = '0' + (regno - FIRST_STACK_REG);
12473 return retval;
12475 #endif
12477 return opno ? "fstp\t%y1" : "fstp\t%y0";
12481 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12482 should be used. UNORDERED_P is true when fucom should be used. */
12484 const char *
12485 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12487 int stack_top_dies;
12488 rtx cmp_op0, cmp_op1;
12489 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12491 if (eflags_p)
12493 cmp_op0 = operands[0];
12494 cmp_op1 = operands[1];
12496 else
12498 cmp_op0 = operands[1];
12499 cmp_op1 = operands[2];
12502 if (is_sse)
12504 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12505 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12506 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12507 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12509 if (GET_MODE (operands[0]) == SFmode)
12510 if (unordered_p)
12511 return &ucomiss[TARGET_AVX ? 0 : 1];
12512 else
12513 return &comiss[TARGET_AVX ? 0 : 1];
12514 else
12515 if (unordered_p)
12516 return &ucomisd[TARGET_AVX ? 0 : 1];
12517 else
12518 return &comisd[TARGET_AVX ? 0 : 1];
12521 gcc_assert (STACK_TOP_P (cmp_op0));
12523 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12525 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12527 if (stack_top_dies)
12529 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12530 return output_387_ffreep (operands, 1);
12532 else
12533 return "ftst\n\tfnstsw\t%0";
12536 if (STACK_REG_P (cmp_op1)
12537 && stack_top_dies
12538 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12539 && REGNO (cmp_op1) != FIRST_STACK_REG)
12541 /* If both the top of the 387 stack dies, and the other operand
12542 is also a stack register that dies, then this must be a
12543 `fcompp' float compare */
12545 if (eflags_p)
12547 /* There is no double popping fcomi variant. Fortunately,
12548 eflags is immune from the fstp's cc clobbering. */
12549 if (unordered_p)
12550 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12551 else
12552 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12553 return output_387_ffreep (operands, 0);
12555 else
12557 if (unordered_p)
12558 return "fucompp\n\tfnstsw\t%0";
12559 else
12560 return "fcompp\n\tfnstsw\t%0";
12563 else
12565 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12567 static const char * const alt[16] =
12569 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12570 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12571 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12572 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12574 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12575 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12576 NULL,
12577 NULL,
12579 "fcomi\t{%y1, %0|%0, %y1}",
12580 "fcomip\t{%y1, %0|%0, %y1}",
12581 "fucomi\t{%y1, %0|%0, %y1}",
12582 "fucomip\t{%y1, %0|%0, %y1}",
12584 NULL,
12585 NULL,
12586 NULL,
12587 NULL
12590 int mask;
12591 const char *ret;
12593 mask = eflags_p << 3;
12594 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12595 mask |= unordered_p << 1;
12596 mask |= stack_top_dies;
12598 gcc_assert (mask < 16);
12599 ret = alt[mask];
12600 gcc_assert (ret);
12602 return ret;
12606 void
12607 ix86_output_addr_vec_elt (FILE *file, int value)
12609 const char *directive = ASM_LONG;
12611 #ifdef ASM_QUAD
12612 if (TARGET_64BIT)
12613 directive = ASM_QUAD;
12614 #else
12615 gcc_assert (!TARGET_64BIT);
12616 #endif
12618 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12621 void
12622 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12624 const char *directive = ASM_LONG;
12626 #ifdef ASM_QUAD
12627 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12628 directive = ASM_QUAD;
12629 #else
12630 gcc_assert (!TARGET_64BIT);
12631 #endif
12632 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12633 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12634 fprintf (file, "%s%s%d-%s%d\n",
12635 directive, LPREFIX, value, LPREFIX, rel);
12636 else if (HAVE_AS_GOTOFF_IN_DATA)
12637 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12638 #if TARGET_MACHO
12639 else if (TARGET_MACHO)
12641 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12642 machopic_output_function_base_name (file);
12643 fprintf(file, "\n");
12645 #endif
12646 else
12647 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12648 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12651 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12652 for the target. */
12654 void
12655 ix86_expand_clear (rtx dest)
12657 rtx tmp;
12659 /* We play register width games, which are only valid after reload. */
12660 gcc_assert (reload_completed);
12662 /* Avoid HImode and its attendant prefix byte. */
12663 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12664 dest = gen_rtx_REG (SImode, REGNO (dest));
12665 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12667 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12668 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12670 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12671 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12674 emit_insn (tmp);
12677 /* X is an unchanging MEM. If it is a constant pool reference, return
12678 the constant pool rtx, else NULL. */
12681 maybe_get_pool_constant (rtx x)
12683 x = ix86_delegitimize_address (XEXP (x, 0));
12685 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12686 return get_pool_constant (x);
12688 return NULL_RTX;
12691 void
12692 ix86_expand_move (enum machine_mode mode, rtx operands[])
12694 rtx op0, op1;
12695 enum tls_model model;
12697 op0 = operands[0];
12698 op1 = operands[1];
12700 if (GET_CODE (op1) == SYMBOL_REF)
12702 model = SYMBOL_REF_TLS_MODEL (op1);
12703 if (model)
12705 op1 = legitimize_tls_address (op1, model, true);
12706 op1 = force_operand (op1, op0);
12707 if (op1 == op0)
12708 return;
12710 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12711 && SYMBOL_REF_DLLIMPORT_P (op1))
12712 op1 = legitimize_dllimport_symbol (op1, false);
12714 else if (GET_CODE (op1) == CONST
12715 && GET_CODE (XEXP (op1, 0)) == PLUS
12716 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12718 rtx addend = XEXP (XEXP (op1, 0), 1);
12719 rtx symbol = XEXP (XEXP (op1, 0), 0);
12720 rtx tmp = NULL;
12722 model = SYMBOL_REF_TLS_MODEL (symbol);
12723 if (model)
12724 tmp = legitimize_tls_address (symbol, model, true);
12725 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12726 && SYMBOL_REF_DLLIMPORT_P (symbol))
12727 tmp = legitimize_dllimport_symbol (symbol, true);
12729 if (tmp)
12731 tmp = force_operand (tmp, NULL);
12732 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12733 op0, 1, OPTAB_DIRECT);
12734 if (tmp == op0)
12735 return;
12739 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12741 if (TARGET_MACHO && !TARGET_64BIT)
12743 #if TARGET_MACHO
12744 if (MACHOPIC_PURE)
12746 rtx temp = ((reload_in_progress
12747 || ((op0 && REG_P (op0))
12748 && mode == Pmode))
12749 ? op0 : gen_reg_rtx (Pmode));
12750 op1 = machopic_indirect_data_reference (op1, temp);
12751 op1 = machopic_legitimize_pic_address (op1, mode,
12752 temp == op1 ? 0 : temp);
12754 else if (MACHOPIC_INDIRECT)
12755 op1 = machopic_indirect_data_reference (op1, 0);
12756 if (op0 == op1)
12757 return;
12758 #endif
12760 else
12762 if (MEM_P (op0))
12763 op1 = force_reg (Pmode, op1);
12764 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12766 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12767 op1 = legitimize_pic_address (op1, reg);
12768 if (op0 == op1)
12769 return;
12773 else
12775 if (MEM_P (op0)
12776 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12777 || !push_operand (op0, mode))
12778 && MEM_P (op1))
12779 op1 = force_reg (mode, op1);
12781 if (push_operand (op0, mode)
12782 && ! general_no_elim_operand (op1, mode))
12783 op1 = copy_to_mode_reg (mode, op1);
12785 /* Force large constants in 64bit compilation into register
12786 to get them CSEed. */
12787 if (can_create_pseudo_p ()
12788 && (mode == DImode) && TARGET_64BIT
12789 && immediate_operand (op1, mode)
12790 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12791 && !register_operand (op0, mode)
12792 && optimize)
12793 op1 = copy_to_mode_reg (mode, op1);
12795 if (can_create_pseudo_p ()
12796 && FLOAT_MODE_P (mode)
12797 && GET_CODE (op1) == CONST_DOUBLE)
12799 /* If we are loading a floating point constant to a register,
12800 force the value to memory now, since we'll get better code
12801 out the back end. */
12803 op1 = validize_mem (force_const_mem (mode, op1));
12804 if (!register_operand (op0, mode))
12806 rtx temp = gen_reg_rtx (mode);
12807 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12808 emit_move_insn (op0, temp);
12809 return;
12814 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12817 void
12818 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12820 rtx op0 = operands[0], op1 = operands[1];
12821 unsigned int align = GET_MODE_ALIGNMENT (mode);
12823 /* Force constants other than zero into memory. We do not know how
12824 the instructions used to build constants modify the upper 64 bits
12825 of the register, once we have that information we may be able
12826 to handle some of them more efficiently. */
12827 if (can_create_pseudo_p ()
12828 && register_operand (op0, mode)
12829 && (CONSTANT_P (op1)
12830 || (GET_CODE (op1) == SUBREG
12831 && CONSTANT_P (SUBREG_REG (op1))))
12832 && standard_sse_constant_p (op1) <= 0)
12833 op1 = validize_mem (force_const_mem (mode, op1));
12835 /* We need to check memory alignment for SSE mode since attribute
12836 can make operands unaligned. */
12837 if (can_create_pseudo_p ()
12838 && SSE_REG_MODE_P (mode)
12839 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12840 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12842 rtx tmp[2];
12844 /* ix86_expand_vector_move_misalign() does not like constants ... */
12845 if (CONSTANT_P (op1)
12846 || (GET_CODE (op1) == SUBREG
12847 && CONSTANT_P (SUBREG_REG (op1))))
12848 op1 = validize_mem (force_const_mem (mode, op1));
12850 /* ... nor both arguments in memory. */
12851 if (!register_operand (op0, mode)
12852 && !register_operand (op1, mode))
12853 op1 = force_reg (mode, op1);
12855 tmp[0] = op0; tmp[1] = op1;
12856 ix86_expand_vector_move_misalign (mode, tmp);
12857 return;
12860 /* Make operand1 a register if it isn't already. */
12861 if (can_create_pseudo_p ()
12862 && !register_operand (op0, mode)
12863 && !register_operand (op1, mode))
12865 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12866 return;
12869 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12872 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12873 straight to ix86_expand_vector_move. */
12874 /* Code generation for scalar reg-reg moves of single and double precision data:
12875 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12876 movaps reg, reg
12877 else
12878 movss reg, reg
12879 if (x86_sse_partial_reg_dependency == true)
12880 movapd reg, reg
12881 else
12882 movsd reg, reg
12884 Code generation for scalar loads of double precision data:
12885 if (x86_sse_split_regs == true)
12886 movlpd mem, reg (gas syntax)
12887 else
12888 movsd mem, reg
12890 Code generation for unaligned packed loads of single precision data
12891 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12892 if (x86_sse_unaligned_move_optimal)
12893 movups mem, reg
12895 if (x86_sse_partial_reg_dependency == true)
12897 xorps reg, reg
12898 movlps mem, reg
12899 movhps mem+8, reg
12901 else
12903 movlps mem, reg
12904 movhps mem+8, reg
12907 Code generation for unaligned packed loads of double precision data
12908 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12909 if (x86_sse_unaligned_move_optimal)
12910 movupd mem, reg
12912 if (x86_sse_split_regs == true)
12914 movlpd mem, reg
12915 movhpd mem+8, reg
12917 else
12919 movsd mem, reg
12920 movhpd mem+8, reg
12924 void
12925 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12927 rtx op0, op1, m;
12929 op0 = operands[0];
12930 op1 = operands[1];
12932 if (TARGET_AVX)
12934 switch (GET_MODE_CLASS (mode))
12936 case MODE_VECTOR_INT:
12937 case MODE_INT:
12938 switch (GET_MODE_SIZE (mode))
12940 case 16:
12941 op0 = gen_lowpart (V16QImode, op0);
12942 op1 = gen_lowpart (V16QImode, op1);
12943 emit_insn (gen_avx_movdqu (op0, op1));
12944 break;
12945 case 32:
12946 op0 = gen_lowpart (V32QImode, op0);
12947 op1 = gen_lowpart (V32QImode, op1);
12948 emit_insn (gen_avx_movdqu256 (op0, op1));
12949 break;
12950 default:
12951 gcc_unreachable ();
12953 break;
12954 case MODE_VECTOR_FLOAT:
12955 op0 = gen_lowpart (mode, op0);
12956 op1 = gen_lowpart (mode, op1);
12958 switch (mode)
12960 case V4SFmode:
12961 emit_insn (gen_avx_movups (op0, op1));
12962 break;
12963 case V8SFmode:
12964 emit_insn (gen_avx_movups256 (op0, op1));
12965 break;
12966 case V2DFmode:
12967 emit_insn (gen_avx_movupd (op0, op1));
12968 break;
12969 case V4DFmode:
12970 emit_insn (gen_avx_movupd256 (op0, op1));
12971 break;
12972 default:
12973 gcc_unreachable ();
12975 break;
12977 default:
12978 gcc_unreachable ();
12981 return;
12984 if (MEM_P (op1))
12986 /* If we're optimizing for size, movups is the smallest. */
12987 if (optimize_insn_for_size_p ())
12989 op0 = gen_lowpart (V4SFmode, op0);
12990 op1 = gen_lowpart (V4SFmode, op1);
12991 emit_insn (gen_sse_movups (op0, op1));
12992 return;
12995 /* ??? If we have typed data, then it would appear that using
12996 movdqu is the only way to get unaligned data loaded with
12997 integer type. */
12998 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13000 op0 = gen_lowpart (V16QImode, op0);
13001 op1 = gen_lowpart (V16QImode, op1);
13002 emit_insn (gen_sse2_movdqu (op0, op1));
13003 return;
13006 if (TARGET_SSE2 && mode == V2DFmode)
13008 rtx zero;
13010 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13012 op0 = gen_lowpart (V2DFmode, op0);
13013 op1 = gen_lowpart (V2DFmode, op1);
13014 emit_insn (gen_sse2_movupd (op0, op1));
13015 return;
13018 /* When SSE registers are split into halves, we can avoid
13019 writing to the top half twice. */
13020 if (TARGET_SSE_SPLIT_REGS)
13022 emit_clobber (op0);
13023 zero = op0;
13025 else
13027 /* ??? Not sure about the best option for the Intel chips.
13028 The following would seem to satisfy; the register is
13029 entirely cleared, breaking the dependency chain. We
13030 then store to the upper half, with a dependency depth
13031 of one. A rumor has it that Intel recommends two movsd
13032 followed by an unpacklpd, but this is unconfirmed. And
13033 given that the dependency depth of the unpacklpd would
13034 still be one, I'm not sure why this would be better. */
13035 zero = CONST0_RTX (V2DFmode);
13038 m = adjust_address (op1, DFmode, 0);
13039 emit_insn (gen_sse2_loadlpd (op0, zero, m));
13040 m = adjust_address (op1, DFmode, 8);
13041 emit_insn (gen_sse2_loadhpd (op0, op0, m));
13043 else
13045 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13047 op0 = gen_lowpart (V4SFmode, op0);
13048 op1 = gen_lowpart (V4SFmode, op1);
13049 emit_insn (gen_sse_movups (op0, op1));
13050 return;
13053 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
13054 emit_move_insn (op0, CONST0_RTX (mode));
13055 else
13056 emit_clobber (op0);
13058 if (mode != V4SFmode)
13059 op0 = gen_lowpart (V4SFmode, op0);
13060 m = adjust_address (op1, V2SFmode, 0);
13061 emit_insn (gen_sse_loadlps (op0, op0, m));
13062 m = adjust_address (op1, V2SFmode, 8);
13063 emit_insn (gen_sse_loadhps (op0, op0, m));
13066 else if (MEM_P (op0))
13068 /* If we're optimizing for size, movups is the smallest. */
13069 if (optimize_insn_for_size_p ())
13071 op0 = gen_lowpart (V4SFmode, op0);
13072 op1 = gen_lowpart (V4SFmode, op1);
13073 emit_insn (gen_sse_movups (op0, op1));
13074 return;
13077 /* ??? Similar to above, only less clear because of quote
13078 typeless stores unquote. */
13079 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
13080 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13082 op0 = gen_lowpart (V16QImode, op0);
13083 op1 = gen_lowpart (V16QImode, op1);
13084 emit_insn (gen_sse2_movdqu (op0, op1));
13085 return;
13088 if (TARGET_SSE2 && mode == V2DFmode)
13090 m = adjust_address (op0, DFmode, 0);
13091 emit_insn (gen_sse2_storelpd (m, op1));
13092 m = adjust_address (op0, DFmode, 8);
13093 emit_insn (gen_sse2_storehpd (m, op1));
13095 else
13097 if (mode != V4SFmode)
13098 op1 = gen_lowpart (V4SFmode, op1);
13099 m = adjust_address (op0, V2SFmode, 0);
13100 emit_insn (gen_sse_storelps (m, op1));
13101 m = adjust_address (op0, V2SFmode, 8);
13102 emit_insn (gen_sse_storehps (m, op1));
13105 else
13106 gcc_unreachable ();
13109 /* Expand a push in MODE. This is some mode for which we do not support
13110 proper push instructions, at least from the registers that we expect
13111 the value to live in. */
13113 void
13114 ix86_expand_push (enum machine_mode mode, rtx x)
13116 rtx tmp;
13118 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13119 GEN_INT (-GET_MODE_SIZE (mode)),
13120 stack_pointer_rtx, 1, OPTAB_DIRECT);
13121 if (tmp != stack_pointer_rtx)
13122 emit_move_insn (stack_pointer_rtx, tmp);
13124 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13126 /* When we push an operand onto stack, it has to be aligned at least
13127 at the function argument boundary. However since we don't have
13128 the argument type, we can't determine the actual argument
13129 boundary. */
13130 emit_move_insn (tmp, x);
13133 /* Helper function of ix86_fixup_binary_operands to canonicalize
13134 operand order. Returns true if the operands should be swapped. */
13136 static bool
13137 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13138 rtx operands[])
13140 rtx dst = operands[0];
13141 rtx src1 = operands[1];
13142 rtx src2 = operands[2];
13144 /* If the operation is not commutative, we can't do anything. */
13145 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13146 return false;
13148 /* Highest priority is that src1 should match dst. */
13149 if (rtx_equal_p (dst, src1))
13150 return false;
13151 if (rtx_equal_p (dst, src2))
13152 return true;
13154 /* Next highest priority is that immediate constants come second. */
13155 if (immediate_operand (src2, mode))
13156 return false;
13157 if (immediate_operand (src1, mode))
13158 return true;
13160 /* Lowest priority is that memory references should come second. */
13161 if (MEM_P (src2))
13162 return false;
13163 if (MEM_P (src1))
13164 return true;
13166 return false;
13170 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
13171 destination to use for the operation. If different from the true
13172 destination in operands[0], a copy operation will be required. */
13175 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13176 rtx operands[])
13178 rtx dst = operands[0];
13179 rtx src1 = operands[1];
13180 rtx src2 = operands[2];
13182 /* Canonicalize operand order. */
13183 if (ix86_swap_binary_operands_p (code, mode, operands))
13185 rtx temp;
13187 /* It is invalid to swap operands of different modes. */
13188 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13190 temp = src1;
13191 src1 = src2;
13192 src2 = temp;
13195 /* Both source operands cannot be in memory. */
13196 if (MEM_P (src1) && MEM_P (src2))
13198 /* Optimization: Only read from memory once. */
13199 if (rtx_equal_p (src1, src2))
13201 src2 = force_reg (mode, src2);
13202 src1 = src2;
13204 else
13205 src2 = force_reg (mode, src2);
13208 /* If the destination is memory, and we do not have matching source
13209 operands, do things in registers. */
13210 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13211 dst = gen_reg_rtx (mode);
13213 /* Source 1 cannot be a constant. */
13214 if (CONSTANT_P (src1))
13215 src1 = force_reg (mode, src1);
13217 /* Source 1 cannot be a non-matching memory. */
13218 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13219 src1 = force_reg (mode, src1);
13221 operands[1] = src1;
13222 operands[2] = src2;
13223 return dst;
13226 /* Similarly, but assume that the destination has already been
13227 set up properly. */
13229 void
13230 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13231 enum machine_mode mode, rtx operands[])
13233 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13234 gcc_assert (dst == operands[0]);
13237 /* Attempt to expand a binary operator. Make the expansion closer to the
13238 actual machine, then just general_operand, which will allow 3 separate
13239 memory references (one output, two input) in a single insn. */
13241 void
13242 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13243 rtx operands[])
13245 rtx src1, src2, dst, op, clob;
13247 dst = ix86_fixup_binary_operands (code, mode, operands);
13248 src1 = operands[1];
13249 src2 = operands[2];
13251 /* Emit the instruction. */
13253 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13254 if (reload_in_progress)
13256 /* Reload doesn't know about the flags register, and doesn't know that
13257 it doesn't want to clobber it. We can only do this with PLUS. */
13258 gcc_assert (code == PLUS);
13259 emit_insn (op);
13261 else
13263 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13264 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13267 /* Fix up the destination if needed. */
13268 if (dst != operands[0])
13269 emit_move_insn (operands[0], dst);
13272 /* Return TRUE or FALSE depending on whether the binary operator meets the
13273 appropriate constraints. */
13276 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13277 rtx operands[3])
13279 rtx dst = operands[0];
13280 rtx src1 = operands[1];
13281 rtx src2 = operands[2];
13283 /* Both source operands cannot be in memory. */
13284 if (MEM_P (src1) && MEM_P (src2))
13285 return 0;
13287 /* Canonicalize operand order for commutative operators. */
13288 if (ix86_swap_binary_operands_p (code, mode, operands))
13290 rtx temp = src1;
13291 src1 = src2;
13292 src2 = temp;
13295 /* If the destination is memory, we must have a matching source operand. */
13296 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13297 return 0;
13299 /* Source 1 cannot be a constant. */
13300 if (CONSTANT_P (src1))
13301 return 0;
13303 /* Source 1 cannot be a non-matching memory. */
13304 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13305 return 0;
13307 return 1;
13310 /* Attempt to expand a unary operator. Make the expansion closer to the
13311 actual machine, then just general_operand, which will allow 2 separate
13312 memory references (one output, one input) in a single insn. */
13314 void
13315 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13316 rtx operands[])
13318 int matching_memory;
13319 rtx src, dst, op, clob;
13321 dst = operands[0];
13322 src = operands[1];
13324 /* If the destination is memory, and we do not have matching source
13325 operands, do things in registers. */
13326 matching_memory = 0;
13327 if (MEM_P (dst))
13329 if (rtx_equal_p (dst, src))
13330 matching_memory = 1;
13331 else
13332 dst = gen_reg_rtx (mode);
13335 /* When source operand is memory, destination must match. */
13336 if (MEM_P (src) && !matching_memory)
13337 src = force_reg (mode, src);
13339 /* Emit the instruction. */
13341 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13342 if (reload_in_progress || code == NOT)
13344 /* Reload doesn't know about the flags register, and doesn't know that
13345 it doesn't want to clobber it. */
13346 gcc_assert (code == NOT);
13347 emit_insn (op);
13349 else
13351 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13352 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13355 /* Fix up the destination if needed. */
13356 if (dst != operands[0])
13357 emit_move_insn (operands[0], dst);
13360 #define LEA_SEARCH_THRESHOLD 12
13362 /* Search backward for non-agu definition of register number REGNO1
13363 or register number REGNO2 in INSN's basic block until
13364 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13365 2. Reach BB boundary, or
13366 3. Reach agu definition.
13367 Returns the distance between the non-agu definition point and INSN.
13368 If no definition point, returns -1. */
13370 static int
13371 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13372 rtx insn)
13374 basic_block bb = BLOCK_FOR_INSN (insn);
13375 int distance = 0;
13376 df_ref *def_rec;
13377 enum attr_type insn_type;
13379 if (insn != BB_HEAD (bb))
13381 rtx prev = PREV_INSN (insn);
13382 while (prev && distance < LEA_SEARCH_THRESHOLD)
13384 if (INSN_P (prev))
13386 distance++;
13387 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13388 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13389 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13390 && (regno1 == DF_REF_REGNO (*def_rec)
13391 || regno2 == DF_REF_REGNO (*def_rec)))
13393 insn_type = get_attr_type (prev);
13394 if (insn_type != TYPE_LEA)
13395 goto done;
13398 if (prev == BB_HEAD (bb))
13399 break;
13400 prev = PREV_INSN (prev);
13404 if (distance < LEA_SEARCH_THRESHOLD)
13406 edge e;
13407 edge_iterator ei;
13408 bool simple_loop = false;
13410 FOR_EACH_EDGE (e, ei, bb->preds)
13411 if (e->src == bb)
13413 simple_loop = true;
13414 break;
13417 if (simple_loop)
13419 rtx prev = BB_END (bb);
13420 while (prev
13421 && prev != insn
13422 && distance < LEA_SEARCH_THRESHOLD)
13424 if (INSN_P (prev))
13426 distance++;
13427 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13428 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13429 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13430 && (regno1 == DF_REF_REGNO (*def_rec)
13431 || regno2 == DF_REF_REGNO (*def_rec)))
13433 insn_type = get_attr_type (prev);
13434 if (insn_type != TYPE_LEA)
13435 goto done;
13438 prev = PREV_INSN (prev);
13443 distance = -1;
13445 done:
13446 /* get_attr_type may modify recog data. We want to make sure
13447 that recog data is valid for instruction INSN, on which
13448 distance_non_agu_define is called. INSN is unchanged here. */
13449 extract_insn_cached (insn);
13450 return distance;
13453 /* Return the distance between INSN and the next insn that uses
13454 register number REGNO0 in memory address. Return -1 if no such
13455 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13457 static int
13458 distance_agu_use (unsigned int regno0, rtx insn)
13460 basic_block bb = BLOCK_FOR_INSN (insn);
13461 int distance = 0;
13462 df_ref *def_rec;
13463 df_ref *use_rec;
13465 if (insn != BB_END (bb))
13467 rtx next = NEXT_INSN (insn);
13468 while (next && distance < LEA_SEARCH_THRESHOLD)
13470 if (INSN_P (next))
13472 distance++;
13474 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13475 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13476 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13477 && regno0 == DF_REF_REGNO (*use_rec))
13479 /* Return DISTANCE if OP0 is used in memory
13480 address in NEXT. */
13481 return distance;
13484 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13485 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13486 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13487 && regno0 == DF_REF_REGNO (*def_rec))
13489 /* Return -1 if OP0 is set in NEXT. */
13490 return -1;
13493 if (next == BB_END (bb))
13494 break;
13495 next = NEXT_INSN (next);
13499 if (distance < LEA_SEARCH_THRESHOLD)
13501 edge e;
13502 edge_iterator ei;
13503 bool simple_loop = false;
13505 FOR_EACH_EDGE (e, ei, bb->succs)
13506 if (e->dest == bb)
13508 simple_loop = true;
13509 break;
13512 if (simple_loop)
13514 rtx next = BB_HEAD (bb);
13515 while (next
13516 && next != insn
13517 && distance < LEA_SEARCH_THRESHOLD)
13519 if (INSN_P (next))
13521 distance++;
13523 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13524 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13525 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13526 && regno0 == DF_REF_REGNO (*use_rec))
13528 /* Return DISTANCE if OP0 is used in memory
13529 address in NEXT. */
13530 return distance;
13533 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13534 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13535 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13536 && regno0 == DF_REF_REGNO (*def_rec))
13538 /* Return -1 if OP0 is set in NEXT. */
13539 return -1;
13543 next = NEXT_INSN (next);
13548 return -1;
13551 /* Define this macro to tune LEA priority vs ADD, it take effect when
13552 there is a dilemma of choicing LEA or ADD
13553 Negative value: ADD is more preferred than LEA
13554 Zero: Netrual
13555 Positive value: LEA is more preferred than ADD*/
13556 #define IX86_LEA_PRIORITY 2
13558 /* Return true if it is ok to optimize an ADD operation to LEA
13559 operation to avoid flag register consumation. For the processors
13560 like ATOM, if the destination register of LEA holds an actual
13561 address which will be used soon, LEA is better and otherwise ADD
13562 is better. */
13564 bool
13565 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13566 rtx insn, rtx operands[])
13568 unsigned int regno0 = true_regnum (operands[0]);
13569 unsigned int regno1 = true_regnum (operands[1]);
13570 unsigned int regno2;
13572 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13573 return regno0 != regno1;
13575 regno2 = true_regnum (operands[2]);
13577 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13578 if (regno0 != regno1 && regno0 != regno2)
13579 return true;
13580 else
13582 int dist_define, dist_use;
13583 dist_define = distance_non_agu_define (regno1, regno2, insn);
13584 if (dist_define <= 0)
13585 return true;
13587 /* If this insn has both backward non-agu dependence and forward
13588 agu dependence, the one with short distance take effect. */
13589 dist_use = distance_agu_use (regno0, insn);
13590 if (dist_use <= 0
13591 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13592 return false;
13594 return true;
13598 /* Return true if destination reg of SET_BODY is shift count of
13599 USE_BODY. */
13601 static bool
13602 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13604 rtx set_dest;
13605 rtx shift_rtx;
13606 int i;
13608 /* Retrieve destination of SET_BODY. */
13609 switch (GET_CODE (set_body))
13611 case SET:
13612 set_dest = SET_DEST (set_body);
13613 if (!set_dest || !REG_P (set_dest))
13614 return false;
13615 break;
13616 case PARALLEL:
13617 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13618 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13619 use_body))
13620 return true;
13621 default:
13622 return false;
13623 break;
13626 /* Retrieve shift count of USE_BODY. */
13627 switch (GET_CODE (use_body))
13629 case SET:
13630 shift_rtx = XEXP (use_body, 1);
13631 break;
13632 case PARALLEL:
13633 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13634 if (ix86_dep_by_shift_count_body (set_body,
13635 XVECEXP (use_body, 0, i)))
13636 return true;
13637 default:
13638 return false;
13639 break;
13642 if (shift_rtx
13643 && (GET_CODE (shift_rtx) == ASHIFT
13644 || GET_CODE (shift_rtx) == LSHIFTRT
13645 || GET_CODE (shift_rtx) == ASHIFTRT
13646 || GET_CODE (shift_rtx) == ROTATE
13647 || GET_CODE (shift_rtx) == ROTATERT))
13649 rtx shift_count = XEXP (shift_rtx, 1);
13651 /* Return true if shift count is dest of SET_BODY. */
13652 if (REG_P (shift_count)
13653 && true_regnum (set_dest) == true_regnum (shift_count))
13654 return true;
13657 return false;
13660 /* Return true if destination reg of SET_INSN is shift count of
13661 USE_INSN. */
13663 bool
13664 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13666 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13667 PATTERN (use_insn));
13670 /* Return TRUE or FALSE depending on whether the unary operator meets the
13671 appropriate constraints. */
13674 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13675 enum machine_mode mode ATTRIBUTE_UNUSED,
13676 rtx operands[2] ATTRIBUTE_UNUSED)
13678 /* If one of operands is memory, source and destination must match. */
13679 if ((MEM_P (operands[0])
13680 || MEM_P (operands[1]))
13681 && ! rtx_equal_p (operands[0], operands[1]))
13682 return FALSE;
13683 return TRUE;
13686 /* Post-reload splitter for converting an SF or DFmode value in an
13687 SSE register into an unsigned SImode. */
13689 void
13690 ix86_split_convert_uns_si_sse (rtx operands[])
13692 enum machine_mode vecmode;
13693 rtx value, large, zero_or_two31, input, two31, x;
13695 large = operands[1];
13696 zero_or_two31 = operands[2];
13697 input = operands[3];
13698 two31 = operands[4];
13699 vecmode = GET_MODE (large);
13700 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13702 /* Load up the value into the low element. We must ensure that the other
13703 elements are valid floats -- zero is the easiest such value. */
13704 if (MEM_P (input))
13706 if (vecmode == V4SFmode)
13707 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13708 else
13709 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13711 else
13713 input = gen_rtx_REG (vecmode, REGNO (input));
13714 emit_move_insn (value, CONST0_RTX (vecmode));
13715 if (vecmode == V4SFmode)
13716 emit_insn (gen_sse_movss (value, value, input));
13717 else
13718 emit_insn (gen_sse2_movsd (value, value, input));
13721 emit_move_insn (large, two31);
13722 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13724 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13725 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13727 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13728 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13730 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13731 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13733 large = gen_rtx_REG (V4SImode, REGNO (large));
13734 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13736 x = gen_rtx_REG (V4SImode, REGNO (value));
13737 if (vecmode == V4SFmode)
13738 emit_insn (gen_sse2_cvttps2dq (x, value));
13739 else
13740 emit_insn (gen_sse2_cvttpd2dq (x, value));
13741 value = x;
13743 emit_insn (gen_xorv4si3 (value, value, large));
13746 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13747 Expects the 64-bit DImode to be supplied in a pair of integral
13748 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13749 -mfpmath=sse, !optimize_size only. */
13751 void
13752 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13754 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13755 rtx int_xmm, fp_xmm;
13756 rtx biases, exponents;
13757 rtx x;
13759 int_xmm = gen_reg_rtx (V4SImode);
13760 if (TARGET_INTER_UNIT_MOVES)
13761 emit_insn (gen_movdi_to_sse (int_xmm, input));
13762 else if (TARGET_SSE_SPLIT_REGS)
13764 emit_clobber (int_xmm);
13765 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13767 else
13769 x = gen_reg_rtx (V2DImode);
13770 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13771 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13774 x = gen_rtx_CONST_VECTOR (V4SImode,
13775 gen_rtvec (4, GEN_INT (0x43300000UL),
13776 GEN_INT (0x45300000UL),
13777 const0_rtx, const0_rtx));
13778 exponents = validize_mem (force_const_mem (V4SImode, x));
13780 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13781 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13783 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13784 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13785 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13786 (0x1.0p84 + double(fp_value_hi_xmm)).
13787 Note these exponents differ by 32. */
13789 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13791 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13792 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13793 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13794 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13795 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13796 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13797 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13798 biases = validize_mem (force_const_mem (V2DFmode, biases));
13799 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13801 /* Add the upper and lower DFmode values together. */
13802 if (TARGET_SSE3)
13803 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13804 else
13806 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13807 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13808 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13811 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13814 /* Not used, but eases macroization of patterns. */
13815 void
13816 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13817 rtx input ATTRIBUTE_UNUSED)
13819 gcc_unreachable ();
13822 /* Convert an unsigned SImode value into a DFmode. Only currently used
13823 for SSE, but applicable anywhere. */
13825 void
13826 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13828 REAL_VALUE_TYPE TWO31r;
13829 rtx x, fp;
13831 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13832 NULL, 1, OPTAB_DIRECT);
13834 fp = gen_reg_rtx (DFmode);
13835 emit_insn (gen_floatsidf2 (fp, x));
13837 real_ldexp (&TWO31r, &dconst1, 31);
13838 x = const_double_from_real_value (TWO31r, DFmode);
13840 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13841 if (x != target)
13842 emit_move_insn (target, x);
13845 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13846 32-bit mode; otherwise we have a direct convert instruction. */
13848 void
13849 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13851 REAL_VALUE_TYPE TWO32r;
13852 rtx fp_lo, fp_hi, x;
13854 fp_lo = gen_reg_rtx (DFmode);
13855 fp_hi = gen_reg_rtx (DFmode);
13857 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13859 real_ldexp (&TWO32r, &dconst1, 32);
13860 x = const_double_from_real_value (TWO32r, DFmode);
13861 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13863 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13865 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13866 0, OPTAB_DIRECT);
13867 if (x != target)
13868 emit_move_insn (target, x);
13871 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13872 For x86_32, -mfpmath=sse, !optimize_size only. */
13873 void
13874 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13876 REAL_VALUE_TYPE ONE16r;
13877 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13879 real_ldexp (&ONE16r, &dconst1, 16);
13880 x = const_double_from_real_value (ONE16r, SFmode);
13881 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13882 NULL, 0, OPTAB_DIRECT);
13883 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13884 NULL, 0, OPTAB_DIRECT);
13885 fp_hi = gen_reg_rtx (SFmode);
13886 fp_lo = gen_reg_rtx (SFmode);
13887 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13888 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13889 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13890 0, OPTAB_DIRECT);
13891 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13892 0, OPTAB_DIRECT);
13893 if (!rtx_equal_p (target, fp_hi))
13894 emit_move_insn (target, fp_hi);
13897 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13898 then replicate the value for all elements of the vector
13899 register. */
13902 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13904 rtvec v;
13905 switch (mode)
13907 case SImode:
13908 gcc_assert (vect);
13909 v = gen_rtvec (4, value, value, value, value);
13910 return gen_rtx_CONST_VECTOR (V4SImode, v);
13912 case DImode:
13913 gcc_assert (vect);
13914 v = gen_rtvec (2, value, value);
13915 return gen_rtx_CONST_VECTOR (V2DImode, v);
13917 case SFmode:
13918 if (vect)
13919 v = gen_rtvec (4, value, value, value, value);
13920 else
13921 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13922 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13923 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13925 case DFmode:
13926 if (vect)
13927 v = gen_rtvec (2, value, value);
13928 else
13929 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13930 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13932 default:
13933 gcc_unreachable ();
13937 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13938 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13939 for an SSE register. If VECT is true, then replicate the mask for
13940 all elements of the vector register. If INVERT is true, then create
13941 a mask excluding the sign bit. */
13943 static rtx
13944 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13946 enum machine_mode vec_mode, imode;
13947 HOST_WIDE_INT hi, lo;
13948 int shift = 63;
13949 rtx v;
13950 rtx mask;
13952 /* Find the sign bit, sign extended to 2*HWI. */
13953 switch (mode)
13955 case SImode:
13956 case SFmode:
13957 imode = SImode;
13958 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13959 lo = 0x80000000, hi = lo < 0;
13960 break;
13962 case DImode:
13963 case DFmode:
13964 imode = DImode;
13965 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13966 if (HOST_BITS_PER_WIDE_INT >= 64)
13967 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13968 else
13969 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13970 break;
13972 case TImode:
13973 case TFmode:
13974 vec_mode = VOIDmode;
13975 if (HOST_BITS_PER_WIDE_INT >= 64)
13977 imode = TImode;
13978 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13980 else
13982 rtvec vec;
13984 imode = DImode;
13985 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13987 if (invert)
13989 lo = ~lo, hi = ~hi;
13990 v = constm1_rtx;
13992 else
13993 v = const0_rtx;
13995 mask = immed_double_const (lo, hi, imode);
13997 vec = gen_rtvec (2, v, mask);
13998 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13999 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
14001 return v;
14003 break;
14005 default:
14006 gcc_unreachable ();
14009 if (invert)
14010 lo = ~lo, hi = ~hi;
14012 /* Force this value into the low part of a fp vector constant. */
14013 mask = immed_double_const (lo, hi, imode);
14014 mask = gen_lowpart (mode, mask);
14016 if (vec_mode == VOIDmode)
14017 return force_reg (mode, mask);
14019 v = ix86_build_const_vector (mode, vect, mask);
14020 return force_reg (vec_mode, v);
14023 /* Generate code for floating point ABS or NEG. */
14025 void
14026 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
14027 rtx operands[])
14029 rtx mask, set, use, clob, dst, src;
14030 bool use_sse = false;
14031 bool vector_mode = VECTOR_MODE_P (mode);
14032 enum machine_mode elt_mode = mode;
14034 if (vector_mode)
14036 elt_mode = GET_MODE_INNER (mode);
14037 use_sse = true;
14039 else if (mode == TFmode)
14040 use_sse = true;
14041 else if (TARGET_SSE_MATH)
14042 use_sse = SSE_FLOAT_MODE_P (mode);
14044 /* NEG and ABS performed with SSE use bitwise mask operations.
14045 Create the appropriate mask now. */
14046 if (use_sse)
14047 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
14048 else
14049 mask = NULL_RTX;
14051 dst = operands[0];
14052 src = operands[1];
14054 if (vector_mode)
14056 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
14057 set = gen_rtx_SET (VOIDmode, dst, set);
14058 emit_insn (set);
14060 else
14062 set = gen_rtx_fmt_e (code, mode, src);
14063 set = gen_rtx_SET (VOIDmode, dst, set);
14064 if (mask)
14066 use = gen_rtx_USE (VOIDmode, mask);
14067 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14068 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14069 gen_rtvec (3, set, use, clob)));
14071 else
14072 emit_insn (set);
14076 /* Expand a copysign operation. Special case operand 0 being a constant. */
14078 void
14079 ix86_expand_copysign (rtx operands[])
14081 enum machine_mode mode;
14082 rtx dest, op0, op1, mask, nmask;
14084 dest = operands[0];
14085 op0 = operands[1];
14086 op1 = operands[2];
14088 mode = GET_MODE (dest);
14090 if (GET_CODE (op0) == CONST_DOUBLE)
14092 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
14094 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
14095 op0 = simplify_unary_operation (ABS, mode, op0, mode);
14097 if (mode == SFmode || mode == DFmode)
14099 enum machine_mode vmode;
14101 vmode = mode == SFmode ? V4SFmode : V2DFmode;
14103 if (op0 == CONST0_RTX (mode))
14104 op0 = CONST0_RTX (vmode);
14105 else
14107 rtvec v;
14109 if (mode == SFmode)
14110 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
14111 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14112 else
14113 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
14115 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
14118 else if (op0 != CONST0_RTX (mode))
14119 op0 = force_reg (mode, op0);
14121 mask = ix86_build_signbit_mask (mode, 0, 0);
14123 if (mode == SFmode)
14124 copysign_insn = gen_copysignsf3_const;
14125 else if (mode == DFmode)
14126 copysign_insn = gen_copysigndf3_const;
14127 else
14128 copysign_insn = gen_copysigntf3_const;
14130 emit_insn (copysign_insn (dest, op0, op1, mask));
14132 else
14134 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14136 nmask = ix86_build_signbit_mask (mode, 0, 1);
14137 mask = ix86_build_signbit_mask (mode, 0, 0);
14139 if (mode == SFmode)
14140 copysign_insn = gen_copysignsf3_var;
14141 else if (mode == DFmode)
14142 copysign_insn = gen_copysigndf3_var;
14143 else
14144 copysign_insn = gen_copysigntf3_var;
14146 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14150 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
14151 be a constant, and so has already been expanded into a vector constant. */
14153 void
14154 ix86_split_copysign_const (rtx operands[])
14156 enum machine_mode mode, vmode;
14157 rtx dest, op0, op1, mask, x;
14159 dest = operands[0];
14160 op0 = operands[1];
14161 op1 = operands[2];
14162 mask = operands[3];
14164 mode = GET_MODE (dest);
14165 vmode = GET_MODE (mask);
14167 dest = simplify_gen_subreg (vmode, dest, mode, 0);
14168 x = gen_rtx_AND (vmode, dest, mask);
14169 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14171 if (op0 != CONST0_RTX (vmode))
14173 x = gen_rtx_IOR (vmode, dest, op0);
14174 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14178 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
14179 so we have to do two masks. */
14181 void
14182 ix86_split_copysign_var (rtx operands[])
14184 enum machine_mode mode, vmode;
14185 rtx dest, scratch, op0, op1, mask, nmask, x;
14187 dest = operands[0];
14188 scratch = operands[1];
14189 op0 = operands[2];
14190 op1 = operands[3];
14191 nmask = operands[4];
14192 mask = operands[5];
14194 mode = GET_MODE (dest);
14195 vmode = GET_MODE (mask);
14197 if (rtx_equal_p (op0, op1))
14199 /* Shouldn't happen often (it's useless, obviously), but when it does
14200 we'd generate incorrect code if we continue below. */
14201 emit_move_insn (dest, op0);
14202 return;
14205 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
14207 gcc_assert (REGNO (op1) == REGNO (scratch));
14209 x = gen_rtx_AND (vmode, scratch, mask);
14210 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14212 dest = mask;
14213 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14214 x = gen_rtx_NOT (vmode, dest);
14215 x = gen_rtx_AND (vmode, x, op0);
14216 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14218 else
14220 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
14222 x = gen_rtx_AND (vmode, scratch, mask);
14224 else /* alternative 2,4 */
14226 gcc_assert (REGNO (mask) == REGNO (scratch));
14227 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14228 x = gen_rtx_AND (vmode, scratch, op1);
14230 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14232 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
14234 dest = simplify_gen_subreg (vmode, op0, mode, 0);
14235 x = gen_rtx_AND (vmode, dest, nmask);
14237 else /* alternative 3,4 */
14239 gcc_assert (REGNO (nmask) == REGNO (dest));
14240 dest = nmask;
14241 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14242 x = gen_rtx_AND (vmode, dest, op0);
14244 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14247 x = gen_rtx_IOR (vmode, dest, scratch);
14248 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14251 /* Return TRUE or FALSE depending on whether the first SET in INSN
14252 has source and destination with matching CC modes, and that the
14253 CC mode is at least as constrained as REQ_MODE. */
14256 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14258 rtx set;
14259 enum machine_mode set_mode;
14261 set = PATTERN (insn);
14262 if (GET_CODE (set) == PARALLEL)
14263 set = XVECEXP (set, 0, 0);
14264 gcc_assert (GET_CODE (set) == SET);
14265 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14267 set_mode = GET_MODE (SET_DEST (set));
14268 switch (set_mode)
14270 case CCNOmode:
14271 if (req_mode != CCNOmode
14272 && (req_mode != CCmode
14273 || XEXP (SET_SRC (set), 1) != const0_rtx))
14274 return 0;
14275 break;
14276 case CCmode:
14277 if (req_mode == CCGCmode)
14278 return 0;
14279 /* FALLTHRU */
14280 case CCGCmode:
14281 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14282 return 0;
14283 /* FALLTHRU */
14284 case CCGOCmode:
14285 if (req_mode == CCZmode)
14286 return 0;
14287 /* FALLTHRU */
14288 case CCAmode:
14289 case CCCmode:
14290 case CCOmode:
14291 case CCSmode:
14292 case CCZmode:
14293 break;
14295 default:
14296 gcc_unreachable ();
14299 return (GET_MODE (SET_SRC (set)) == set_mode);
14302 /* Generate insn patterns to do an integer compare of OPERANDS. */
14304 static rtx
14305 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14307 enum machine_mode cmpmode;
14308 rtx tmp, flags;
14310 cmpmode = SELECT_CC_MODE (code, op0, op1);
14311 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14313 /* This is very simple, but making the interface the same as in the
14314 FP case makes the rest of the code easier. */
14315 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14316 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14318 /* Return the test that should be put into the flags user, i.e.
14319 the bcc, scc, or cmov instruction. */
14320 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14323 /* Figure out whether to use ordered or unordered fp comparisons.
14324 Return the appropriate mode to use. */
14326 enum machine_mode
14327 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14329 /* ??? In order to make all comparisons reversible, we do all comparisons
14330 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14331 all forms trapping and nontrapping comparisons, we can make inequality
14332 comparisons trapping again, since it results in better code when using
14333 FCOM based compares. */
14334 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14337 enum machine_mode
14338 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14340 enum machine_mode mode = GET_MODE (op0);
14342 if (SCALAR_FLOAT_MODE_P (mode))
14344 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14345 return ix86_fp_compare_mode (code);
14348 switch (code)
14350 /* Only zero flag is needed. */
14351 case EQ: /* ZF=0 */
14352 case NE: /* ZF!=0 */
14353 return CCZmode;
14354 /* Codes needing carry flag. */
14355 case GEU: /* CF=0 */
14356 case LTU: /* CF=1 */
14357 /* Detect overflow checks. They need just the carry flag. */
14358 if (GET_CODE (op0) == PLUS
14359 && rtx_equal_p (op1, XEXP (op0, 0)))
14360 return CCCmode;
14361 else
14362 return CCmode;
14363 case GTU: /* CF=0 & ZF=0 */
14364 case LEU: /* CF=1 | ZF=1 */
14365 /* Detect overflow checks. They need just the carry flag. */
14366 if (GET_CODE (op0) == MINUS
14367 && rtx_equal_p (op1, XEXP (op0, 0)))
14368 return CCCmode;
14369 else
14370 return CCmode;
14371 /* Codes possibly doable only with sign flag when
14372 comparing against zero. */
14373 case GE: /* SF=OF or SF=0 */
14374 case LT: /* SF<>OF or SF=1 */
14375 if (op1 == const0_rtx)
14376 return CCGOCmode;
14377 else
14378 /* For other cases Carry flag is not required. */
14379 return CCGCmode;
14380 /* Codes doable only with sign flag when comparing
14381 against zero, but we miss jump instruction for it
14382 so we need to use relational tests against overflow
14383 that thus needs to be zero. */
14384 case GT: /* ZF=0 & SF=OF */
14385 case LE: /* ZF=1 | SF<>OF */
14386 if (op1 == const0_rtx)
14387 return CCNOmode;
14388 else
14389 return CCGCmode;
14390 /* strcmp pattern do (use flags) and combine may ask us for proper
14391 mode. */
14392 case USE:
14393 return CCmode;
14394 default:
14395 gcc_unreachable ();
14399 /* Return the fixed registers used for condition codes. */
14401 static bool
14402 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14404 *p1 = FLAGS_REG;
14405 *p2 = FPSR_REG;
14406 return true;
14409 /* If two condition code modes are compatible, return a condition code
14410 mode which is compatible with both. Otherwise, return
14411 VOIDmode. */
14413 static enum machine_mode
14414 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14416 if (m1 == m2)
14417 return m1;
14419 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14420 return VOIDmode;
14422 if ((m1 == CCGCmode && m2 == CCGOCmode)
14423 || (m1 == CCGOCmode && m2 == CCGCmode))
14424 return CCGCmode;
14426 switch (m1)
14428 default:
14429 gcc_unreachable ();
14431 case CCmode:
14432 case CCGCmode:
14433 case CCGOCmode:
14434 case CCNOmode:
14435 case CCAmode:
14436 case CCCmode:
14437 case CCOmode:
14438 case CCSmode:
14439 case CCZmode:
14440 switch (m2)
14442 default:
14443 return VOIDmode;
14445 case CCmode:
14446 case CCGCmode:
14447 case CCGOCmode:
14448 case CCNOmode:
14449 case CCAmode:
14450 case CCCmode:
14451 case CCOmode:
14452 case CCSmode:
14453 case CCZmode:
14454 return CCmode;
14457 case CCFPmode:
14458 case CCFPUmode:
14459 /* These are only compatible with themselves, which we already
14460 checked above. */
14461 return VOIDmode;
14466 /* Return a comparison we can do and that it is equivalent to
14467 swap_condition (code) apart possibly from orderedness.
14468 But, never change orderedness if TARGET_IEEE_FP, returning
14469 UNKNOWN in that case if necessary. */
14471 static enum rtx_code
14472 ix86_fp_swap_condition (enum rtx_code code)
14474 switch (code)
14476 case GT: /* GTU - CF=0 & ZF=0 */
14477 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
14478 case GE: /* GEU - CF=0 */
14479 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
14480 case UNLT: /* LTU - CF=1 */
14481 return TARGET_IEEE_FP ? UNKNOWN : GT;
14482 case UNLE: /* LEU - CF=1 | ZF=1 */
14483 return TARGET_IEEE_FP ? UNKNOWN : GE;
14484 default:
14485 return swap_condition (code);
14489 /* Return cost of comparison CODE using the best strategy for performance.
14490 All following functions do use number of instructions as a cost metrics.
14491 In future this should be tweaked to compute bytes for optimize_size and
14492 take into account performance of various instructions on various CPUs. */
14494 static int
14495 ix86_fp_comparison_cost (enum rtx_code code)
14497 int arith_cost;
14499 /* The cost of code using bit-twiddling on %ah. */
14500 switch (code)
14502 case UNLE:
14503 case UNLT:
14504 case LTGT:
14505 case GT:
14506 case GE:
14507 case UNORDERED:
14508 case ORDERED:
14509 case UNEQ:
14510 arith_cost = 4;
14511 break;
14512 case LT:
14513 case NE:
14514 case EQ:
14515 case UNGE:
14516 arith_cost = TARGET_IEEE_FP ? 5 : 4;
14517 break;
14518 case LE:
14519 case UNGT:
14520 arith_cost = TARGET_IEEE_FP ? 6 : 4;
14521 break;
14522 default:
14523 gcc_unreachable ();
14526 switch (ix86_fp_comparison_strategy (code))
14528 case IX86_FPCMP_COMI:
14529 return arith_cost > 4 ? 3 : 2;
14530 case IX86_FPCMP_SAHF:
14531 return arith_cost > 4 ? 4 : 3;
14532 default:
14533 return arith_cost;
14537 /* Return strategy to use for floating-point. We assume that fcomi is always
14538 preferrable where available, since that is also true when looking at size
14539 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
14541 enum ix86_fpcmp_strategy
14542 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
14544 /* Do fcomi/sahf based test when profitable. */
14546 if (TARGET_CMOVE)
14547 return IX86_FPCMP_COMI;
14549 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
14550 return IX86_FPCMP_SAHF;
14552 return IX86_FPCMP_ARITH;
14555 /* Swap, force into registers, or otherwise massage the two operands
14556 to a fp comparison. The operands are updated in place; the new
14557 comparison code is returned. */
14559 static enum rtx_code
14560 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14562 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14563 rtx op0 = *pop0, op1 = *pop1;
14564 enum machine_mode op_mode = GET_MODE (op0);
14565 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14567 /* All of the unordered compare instructions only work on registers.
14568 The same is true of the fcomi compare instructions. The XFmode
14569 compare instructions require registers except when comparing
14570 against zero or when converting operand 1 from fixed point to
14571 floating point. */
14573 if (!is_sse
14574 && (fpcmp_mode == CCFPUmode
14575 || (op_mode == XFmode
14576 && ! (standard_80387_constant_p (op0) == 1
14577 || standard_80387_constant_p (op1) == 1)
14578 && GET_CODE (op1) != FLOAT)
14579 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
14581 op0 = force_reg (op_mode, op0);
14582 op1 = force_reg (op_mode, op1);
14584 else
14586 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14587 things around if they appear profitable, otherwise force op0
14588 into a register. */
14590 if (standard_80387_constant_p (op0) == 0
14591 || (MEM_P (op0)
14592 && ! (standard_80387_constant_p (op1) == 0
14593 || MEM_P (op1))))
14595 enum rtx_code new_code = ix86_fp_swap_condition (code);
14596 if (new_code != UNKNOWN)
14598 rtx tmp;
14599 tmp = op0, op0 = op1, op1 = tmp;
14600 code = new_code;
14604 if (!REG_P (op0))
14605 op0 = force_reg (op_mode, op0);
14607 if (CONSTANT_P (op1))
14609 int tmp = standard_80387_constant_p (op1);
14610 if (tmp == 0)
14611 op1 = validize_mem (force_const_mem (op_mode, op1));
14612 else if (tmp == 1)
14614 if (TARGET_CMOVE)
14615 op1 = force_reg (op_mode, op1);
14617 else
14618 op1 = force_reg (op_mode, op1);
14622 /* Try to rearrange the comparison to make it cheaper. */
14623 if (ix86_fp_comparison_cost (code)
14624 > ix86_fp_comparison_cost (swap_condition (code))
14625 && (REG_P (op1) || can_create_pseudo_p ()))
14627 rtx tmp;
14628 tmp = op0, op0 = op1, op1 = tmp;
14629 code = swap_condition (code);
14630 if (!REG_P (op0))
14631 op0 = force_reg (op_mode, op0);
14634 *pop0 = op0;
14635 *pop1 = op1;
14636 return code;
14639 /* Convert comparison codes we use to represent FP comparison to integer
14640 code that will result in proper branch. Return UNKNOWN if no such code
14641 is available. */
14643 enum rtx_code
14644 ix86_fp_compare_code_to_integer (enum rtx_code code)
14646 switch (code)
14648 case GT:
14649 return GTU;
14650 case GE:
14651 return GEU;
14652 case ORDERED:
14653 case UNORDERED:
14654 return code;
14655 break;
14656 case UNEQ:
14657 return EQ;
14658 break;
14659 case UNLT:
14660 return LTU;
14661 break;
14662 case UNLE:
14663 return LEU;
14664 break;
14665 case LTGT:
14666 return NE;
14667 break;
14668 default:
14669 return UNKNOWN;
14673 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14675 static rtx
14676 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
14678 enum machine_mode fpcmp_mode, intcmp_mode;
14679 rtx tmp, tmp2;
14681 fpcmp_mode = ix86_fp_compare_mode (code);
14682 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14684 /* Do fcomi/sahf based test when profitable. */
14685 switch (ix86_fp_comparison_strategy (code))
14687 case IX86_FPCMP_COMI:
14688 intcmp_mode = fpcmp_mode;
14689 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14690 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14691 tmp);
14692 emit_insn (tmp);
14693 break;
14695 case IX86_FPCMP_SAHF:
14696 intcmp_mode = fpcmp_mode;
14697 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14698 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14699 tmp);
14701 if (!scratch)
14702 scratch = gen_reg_rtx (HImode);
14703 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14704 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14705 break;
14707 case IX86_FPCMP_ARITH:
14708 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14709 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14710 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14711 if (!scratch)
14712 scratch = gen_reg_rtx (HImode);
14713 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14715 /* In the unordered case, we have to check C2 for NaN's, which
14716 doesn't happen to work out to anything nice combination-wise.
14717 So do some bit twiddling on the value we've got in AH to come
14718 up with an appropriate set of condition codes. */
14720 intcmp_mode = CCNOmode;
14721 switch (code)
14723 case GT:
14724 case UNGT:
14725 if (code == GT || !TARGET_IEEE_FP)
14727 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14728 code = EQ;
14730 else
14732 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14733 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14734 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14735 intcmp_mode = CCmode;
14736 code = GEU;
14738 break;
14739 case LT:
14740 case UNLT:
14741 if (code == LT && TARGET_IEEE_FP)
14743 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14744 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14745 intcmp_mode = CCmode;
14746 code = EQ;
14748 else
14750 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14751 code = NE;
14753 break;
14754 case GE:
14755 case UNGE:
14756 if (code == GE || !TARGET_IEEE_FP)
14758 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14759 code = EQ;
14761 else
14763 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14764 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14765 GEN_INT (0x01)));
14766 code = NE;
14768 break;
14769 case LE:
14770 case UNLE:
14771 if (code == LE && TARGET_IEEE_FP)
14773 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14774 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14775 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14776 intcmp_mode = CCmode;
14777 code = LTU;
14779 else
14781 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14782 code = NE;
14784 break;
14785 case EQ:
14786 case UNEQ:
14787 if (code == EQ && TARGET_IEEE_FP)
14789 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14790 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14791 intcmp_mode = CCmode;
14792 code = EQ;
14794 else
14796 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14797 code = NE;
14798 break;
14800 break;
14801 case NE:
14802 case LTGT:
14803 if (code == NE && TARGET_IEEE_FP)
14805 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14806 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14807 GEN_INT (0x40)));
14808 code = NE;
14810 else
14812 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14813 code = EQ;
14815 break;
14817 case UNORDERED:
14818 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14819 code = NE;
14820 break;
14821 case ORDERED:
14822 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14823 code = EQ;
14824 break;
14826 default:
14827 gcc_unreachable ();
14829 break;
14831 default:
14832 gcc_unreachable();
14835 /* Return the test that should be put into the flags user, i.e.
14836 the bcc, scc, or cmov instruction. */
14837 return gen_rtx_fmt_ee (code, VOIDmode,
14838 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14839 const0_rtx);
14843 ix86_expand_compare (enum rtx_code code)
14845 rtx op0, op1, ret;
14846 op0 = ix86_compare_op0;
14847 op1 = ix86_compare_op1;
14849 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14850 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14852 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14854 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14855 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
14857 else
14858 ret = ix86_expand_int_compare (code, op0, op1);
14860 return ret;
14863 void
14864 ix86_expand_branch (enum rtx_code code, rtx label)
14866 rtx tmp;
14868 switch (GET_MODE (ix86_compare_op0))
14870 case SFmode:
14871 case DFmode:
14872 case XFmode:
14873 case QImode:
14874 case HImode:
14875 case SImode:
14876 simple:
14877 tmp = ix86_expand_compare (code);
14878 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14879 gen_rtx_LABEL_REF (VOIDmode, label),
14880 pc_rtx);
14881 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14882 return;
14884 case DImode:
14885 if (TARGET_64BIT)
14886 goto simple;
14887 case TImode:
14888 /* Expand DImode branch into multiple compare+branch. */
14890 rtx lo[2], hi[2], label2;
14891 enum rtx_code code1, code2, code3;
14892 enum machine_mode submode;
14894 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14896 tmp = ix86_compare_op0;
14897 ix86_compare_op0 = ix86_compare_op1;
14898 ix86_compare_op1 = tmp;
14899 code = swap_condition (code);
14901 if (GET_MODE (ix86_compare_op0) == DImode)
14903 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14904 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14905 submode = SImode;
14907 else
14909 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14910 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14911 submode = DImode;
14914 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14915 avoid two branches. This costs one extra insn, so disable when
14916 optimizing for size. */
14918 if ((code == EQ || code == NE)
14919 && (!optimize_insn_for_size_p ()
14920 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14922 rtx xor0, xor1;
14924 xor1 = hi[0];
14925 if (hi[1] != const0_rtx)
14926 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14927 NULL_RTX, 0, OPTAB_WIDEN);
14929 xor0 = lo[0];
14930 if (lo[1] != const0_rtx)
14931 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14932 NULL_RTX, 0, OPTAB_WIDEN);
14934 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14935 NULL_RTX, 0, OPTAB_WIDEN);
14937 ix86_compare_op0 = tmp;
14938 ix86_compare_op1 = const0_rtx;
14939 ix86_expand_branch (code, label);
14940 return;
14943 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14944 op1 is a constant and the low word is zero, then we can just
14945 examine the high word. Similarly for low word -1 and
14946 less-or-equal-than or greater-than. */
14948 if (CONST_INT_P (hi[1]))
14949 switch (code)
14951 case LT: case LTU: case GE: case GEU:
14952 if (lo[1] == const0_rtx)
14954 ix86_compare_op0 = hi[0];
14955 ix86_compare_op1 = hi[1];
14956 ix86_expand_branch (code, label);
14957 return;
14959 break;
14960 case LE: case LEU: case GT: case GTU:
14961 if (lo[1] == constm1_rtx)
14963 ix86_compare_op0 = hi[0];
14964 ix86_compare_op1 = hi[1];
14965 ix86_expand_branch (code, label);
14966 return;
14968 break;
14969 default:
14970 break;
14973 /* Otherwise, we need two or three jumps. */
14975 label2 = gen_label_rtx ();
14977 code1 = code;
14978 code2 = swap_condition (code);
14979 code3 = unsigned_condition (code);
14981 switch (code)
14983 case LT: case GT: case LTU: case GTU:
14984 break;
14986 case LE: code1 = LT; code2 = GT; break;
14987 case GE: code1 = GT; code2 = LT; break;
14988 case LEU: code1 = LTU; code2 = GTU; break;
14989 case GEU: code1 = GTU; code2 = LTU; break;
14991 case EQ: code1 = UNKNOWN; code2 = NE; break;
14992 case NE: code2 = UNKNOWN; break;
14994 default:
14995 gcc_unreachable ();
14999 * a < b =>
15000 * if (hi(a) < hi(b)) goto true;
15001 * if (hi(a) > hi(b)) goto false;
15002 * if (lo(a) < lo(b)) goto true;
15003 * false:
15006 ix86_compare_op0 = hi[0];
15007 ix86_compare_op1 = hi[1];
15009 if (code1 != UNKNOWN)
15010 ix86_expand_branch (code1, label);
15011 if (code2 != UNKNOWN)
15012 ix86_expand_branch (code2, label2);
15014 ix86_compare_op0 = lo[0];
15015 ix86_compare_op1 = lo[1];
15016 ix86_expand_branch (code3, label);
15018 if (code2 != UNKNOWN)
15019 emit_label (label2);
15020 return;
15023 default:
15024 /* If we have already emitted a compare insn, go straight to simple.
15025 ix86_expand_compare won't emit anything if ix86_compare_emitted
15026 is non NULL. */
15027 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15028 goto simple;
15032 /* Split branch based on floating point condition. */
15033 void
15034 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15035 rtx target1, rtx target2, rtx tmp, rtx pushed)
15037 rtx condition;
15038 rtx i;
15040 if (target2 != pc_rtx)
15042 rtx tmp = target2;
15043 code = reverse_condition_maybe_unordered (code);
15044 target2 = target1;
15045 target1 = tmp;
15048 condition = ix86_expand_fp_compare (code, op1, op2,
15049 tmp);
15051 /* Remove pushed operand from stack. */
15052 if (pushed)
15053 ix86_free_from_memory (GET_MODE (pushed));
15055 i = emit_jump_insn (gen_rtx_SET
15056 (VOIDmode, pc_rtx,
15057 gen_rtx_IF_THEN_ELSE (VOIDmode,
15058 condition, target1, target2)));
15059 if (split_branch_probability >= 0)
15060 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
15063 void
15064 ix86_expand_setcc (enum rtx_code code, rtx dest)
15066 rtx ret;
15068 gcc_assert (GET_MODE (dest) == QImode);
15070 ret = ix86_expand_compare (code);
15071 PUT_MODE (ret, QImode);
15072 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
15075 /* Expand comparison setting or clearing carry flag. Return true when
15076 successful and set pop for the operation. */
15077 static bool
15078 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15080 enum machine_mode mode =
15081 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15083 /* Do not handle DImode compares that go through special path. */
15084 if (mode == (TARGET_64BIT ? TImode : DImode))
15085 return false;
15087 if (SCALAR_FLOAT_MODE_P (mode))
15089 rtx compare_op, compare_seq;
15091 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15093 /* Shortcut: following common codes never translate
15094 into carry flag compares. */
15095 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15096 || code == ORDERED || code == UNORDERED)
15097 return false;
15099 /* These comparisons require zero flag; swap operands so they won't. */
15100 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15101 && !TARGET_IEEE_FP)
15103 rtx tmp = op0;
15104 op0 = op1;
15105 op1 = tmp;
15106 code = swap_condition (code);
15109 /* Try to expand the comparison and verify that we end up with
15110 carry flag based comparison. This fails to be true only when
15111 we decide to expand comparison using arithmetic that is not
15112 too common scenario. */
15113 start_sequence ();
15114 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15115 compare_seq = get_insns ();
15116 end_sequence ();
15118 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15119 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15120 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15121 else
15122 code = GET_CODE (compare_op);
15124 if (code != LTU && code != GEU)
15125 return false;
15127 emit_insn (compare_seq);
15128 *pop = compare_op;
15129 return true;
15132 if (!INTEGRAL_MODE_P (mode))
15133 return false;
15135 switch (code)
15137 case LTU:
15138 case GEU:
15139 break;
15141 /* Convert a==0 into (unsigned)a<1. */
15142 case EQ:
15143 case NE:
15144 if (op1 != const0_rtx)
15145 return false;
15146 op1 = const1_rtx;
15147 code = (code == EQ ? LTU : GEU);
15148 break;
15150 /* Convert a>b into b<a or a>=b-1. */
15151 case GTU:
15152 case LEU:
15153 if (CONST_INT_P (op1))
15155 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15156 /* Bail out on overflow. We still can swap operands but that
15157 would force loading of the constant into register. */
15158 if (op1 == const0_rtx
15159 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15160 return false;
15161 code = (code == GTU ? GEU : LTU);
15163 else
15165 rtx tmp = op1;
15166 op1 = op0;
15167 op0 = tmp;
15168 code = (code == GTU ? LTU : GEU);
15170 break;
15172 /* Convert a>=0 into (unsigned)a<0x80000000. */
15173 case LT:
15174 case GE:
15175 if (mode == DImode || op1 != const0_rtx)
15176 return false;
15177 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15178 code = (code == LT ? GEU : LTU);
15179 break;
15180 case LE:
15181 case GT:
15182 if (mode == DImode || op1 != constm1_rtx)
15183 return false;
15184 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15185 code = (code == LE ? GEU : LTU);
15186 break;
15188 default:
15189 return false;
15191 /* Swapping operands may cause constant to appear as first operand. */
15192 if (!nonimmediate_operand (op0, VOIDmode))
15194 if (!can_create_pseudo_p ())
15195 return false;
15196 op0 = force_reg (mode, op0);
15198 ix86_compare_op0 = op0;
15199 ix86_compare_op1 = op1;
15200 *pop = ix86_expand_compare (code);
15201 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15202 return true;
15206 ix86_expand_int_movcc (rtx operands[])
15208 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15209 rtx compare_seq, compare_op;
15210 enum machine_mode mode = GET_MODE (operands[0]);
15211 bool sign_bit_compare_p = false;;
15213 start_sequence ();
15214 ix86_compare_op0 = XEXP (operands[1], 0);
15215 ix86_compare_op1 = XEXP (operands[1], 1);
15216 compare_op = ix86_expand_compare (code);
15217 compare_seq = get_insns ();
15218 end_sequence ();
15220 compare_code = GET_CODE (compare_op);
15222 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15223 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15224 sign_bit_compare_p = true;
15226 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15227 HImode insns, we'd be swallowed in word prefix ops. */
15229 if ((mode != HImode || TARGET_FAST_PREFIX)
15230 && (mode != (TARGET_64BIT ? TImode : DImode))
15231 && CONST_INT_P (operands[2])
15232 && CONST_INT_P (operands[3]))
15234 rtx out = operands[0];
15235 HOST_WIDE_INT ct = INTVAL (operands[2]);
15236 HOST_WIDE_INT cf = INTVAL (operands[3]);
15237 HOST_WIDE_INT diff;
15239 diff = ct - cf;
15240 /* Sign bit compares are better done using shifts than we do by using
15241 sbb. */
15242 if (sign_bit_compare_p
15243 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15244 ix86_compare_op1, &compare_op))
15246 /* Detect overlap between destination and compare sources. */
15247 rtx tmp = out;
15249 if (!sign_bit_compare_p)
15251 bool fpcmp = false;
15253 compare_code = GET_CODE (compare_op);
15255 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15256 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15258 fpcmp = true;
15259 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15262 /* To simplify rest of code, restrict to the GEU case. */
15263 if (compare_code == LTU)
15265 HOST_WIDE_INT tmp = ct;
15266 ct = cf;
15267 cf = tmp;
15268 compare_code = reverse_condition (compare_code);
15269 code = reverse_condition (code);
15271 else
15273 if (fpcmp)
15274 PUT_CODE (compare_op,
15275 reverse_condition_maybe_unordered
15276 (GET_CODE (compare_op)));
15277 else
15278 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15280 diff = ct - cf;
15282 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15283 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15284 tmp = gen_reg_rtx (mode);
15286 if (mode == DImode)
15287 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15288 else
15289 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15291 else
15293 if (code == GT || code == GE)
15294 code = reverse_condition (code);
15295 else
15297 HOST_WIDE_INT tmp = ct;
15298 ct = cf;
15299 cf = tmp;
15300 diff = ct - cf;
15302 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15303 ix86_compare_op1, VOIDmode, 0, -1);
15306 if (diff == 1)
15309 * cmpl op0,op1
15310 * sbbl dest,dest
15311 * [addl dest, ct]
15313 * Size 5 - 8.
15315 if (ct)
15316 tmp = expand_simple_binop (mode, PLUS,
15317 tmp, GEN_INT (ct),
15318 copy_rtx (tmp), 1, OPTAB_DIRECT);
15320 else if (cf == -1)
15323 * cmpl op0,op1
15324 * sbbl dest,dest
15325 * orl $ct, dest
15327 * Size 8.
15329 tmp = expand_simple_binop (mode, IOR,
15330 tmp, GEN_INT (ct),
15331 copy_rtx (tmp), 1, OPTAB_DIRECT);
15333 else if (diff == -1 && ct)
15336 * cmpl op0,op1
15337 * sbbl dest,dest
15338 * notl dest
15339 * [addl dest, cf]
15341 * Size 8 - 11.
15343 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15344 if (cf)
15345 tmp = expand_simple_binop (mode, PLUS,
15346 copy_rtx (tmp), GEN_INT (cf),
15347 copy_rtx (tmp), 1, OPTAB_DIRECT);
15349 else
15352 * cmpl op0,op1
15353 * sbbl dest,dest
15354 * [notl dest]
15355 * andl cf - ct, dest
15356 * [addl dest, ct]
15358 * Size 8 - 11.
15361 if (cf == 0)
15363 cf = ct;
15364 ct = 0;
15365 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15368 tmp = expand_simple_binop (mode, AND,
15369 copy_rtx (tmp),
15370 gen_int_mode (cf - ct, mode),
15371 copy_rtx (tmp), 1, OPTAB_DIRECT);
15372 if (ct)
15373 tmp = expand_simple_binop (mode, PLUS,
15374 copy_rtx (tmp), GEN_INT (ct),
15375 copy_rtx (tmp), 1, OPTAB_DIRECT);
15378 if (!rtx_equal_p (tmp, out))
15379 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15381 return 1; /* DONE */
15384 if (diff < 0)
15386 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15388 HOST_WIDE_INT tmp;
15389 tmp = ct, ct = cf, cf = tmp;
15390 diff = -diff;
15392 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15394 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15396 /* We may be reversing unordered compare to normal compare, that
15397 is not valid in general (we may convert non-trapping condition
15398 to trapping one), however on i386 we currently emit all
15399 comparisons unordered. */
15400 compare_code = reverse_condition_maybe_unordered (compare_code);
15401 code = reverse_condition_maybe_unordered (code);
15403 else
15405 compare_code = reverse_condition (compare_code);
15406 code = reverse_condition (code);
15410 compare_code = UNKNOWN;
15411 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15412 && CONST_INT_P (ix86_compare_op1))
15414 if (ix86_compare_op1 == const0_rtx
15415 && (code == LT || code == GE))
15416 compare_code = code;
15417 else if (ix86_compare_op1 == constm1_rtx)
15419 if (code == LE)
15420 compare_code = LT;
15421 else if (code == GT)
15422 compare_code = GE;
15426 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15427 if (compare_code != UNKNOWN
15428 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15429 && (cf == -1 || ct == -1))
15431 /* If lea code below could be used, only optimize
15432 if it results in a 2 insn sequence. */
15434 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15435 || diff == 3 || diff == 5 || diff == 9)
15436 || (compare_code == LT && ct == -1)
15437 || (compare_code == GE && cf == -1))
15440 * notl op1 (if necessary)
15441 * sarl $31, op1
15442 * orl cf, op1
15444 if (ct != -1)
15446 cf = ct;
15447 ct = -1;
15448 code = reverse_condition (code);
15451 out = emit_store_flag (out, code, ix86_compare_op0,
15452 ix86_compare_op1, VOIDmode, 0, -1);
15454 out = expand_simple_binop (mode, IOR,
15455 out, GEN_INT (cf),
15456 out, 1, OPTAB_DIRECT);
15457 if (out != operands[0])
15458 emit_move_insn (operands[0], out);
15460 return 1; /* DONE */
15465 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15466 || diff == 3 || diff == 5 || diff == 9)
15467 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15468 && (mode != DImode
15469 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15472 * xorl dest,dest
15473 * cmpl op1,op2
15474 * setcc dest
15475 * lea cf(dest*(ct-cf)),dest
15477 * Size 14.
15479 * This also catches the degenerate setcc-only case.
15482 rtx tmp;
15483 int nops;
15485 out = emit_store_flag (out, code, ix86_compare_op0,
15486 ix86_compare_op1, VOIDmode, 0, 1);
15488 nops = 0;
15489 /* On x86_64 the lea instruction operates on Pmode, so we need
15490 to get arithmetics done in proper mode to match. */
15491 if (diff == 1)
15492 tmp = copy_rtx (out);
15493 else
15495 rtx out1;
15496 out1 = copy_rtx (out);
15497 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15498 nops++;
15499 if (diff & 1)
15501 tmp = gen_rtx_PLUS (mode, tmp, out1);
15502 nops++;
15505 if (cf != 0)
15507 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15508 nops++;
15510 if (!rtx_equal_p (tmp, out))
15512 if (nops == 1)
15513 out = force_operand (tmp, copy_rtx (out));
15514 else
15515 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15517 if (!rtx_equal_p (out, operands[0]))
15518 emit_move_insn (operands[0], copy_rtx (out));
15520 return 1; /* DONE */
15524 * General case: Jumpful:
15525 * xorl dest,dest cmpl op1, op2
15526 * cmpl op1, op2 movl ct, dest
15527 * setcc dest jcc 1f
15528 * decl dest movl cf, dest
15529 * andl (cf-ct),dest 1:
15530 * addl ct,dest
15532 * Size 20. Size 14.
15534 * This is reasonably steep, but branch mispredict costs are
15535 * high on modern cpus, so consider failing only if optimizing
15536 * for space.
15539 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15540 && BRANCH_COST (optimize_insn_for_speed_p (),
15541 false) >= 2)
15543 if (cf == 0)
15545 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15547 cf = ct;
15548 ct = 0;
15550 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15552 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15554 /* We may be reversing unordered compare to normal compare,
15555 that is not valid in general (we may convert non-trapping
15556 condition to trapping one), however on i386 we currently
15557 emit all comparisons unordered. */
15558 code = reverse_condition_maybe_unordered (code);
15560 else
15562 code = reverse_condition (code);
15563 if (compare_code != UNKNOWN)
15564 compare_code = reverse_condition (compare_code);
15568 if (compare_code != UNKNOWN)
15570 /* notl op1 (if needed)
15571 sarl $31, op1
15572 andl (cf-ct), op1
15573 addl ct, op1
15575 For x < 0 (resp. x <= -1) there will be no notl,
15576 so if possible swap the constants to get rid of the
15577 complement.
15578 True/false will be -1/0 while code below (store flag
15579 followed by decrement) is 0/-1, so the constants need
15580 to be exchanged once more. */
15582 if (compare_code == GE || !cf)
15584 code = reverse_condition (code);
15585 compare_code = LT;
15587 else
15589 HOST_WIDE_INT tmp = cf;
15590 cf = ct;
15591 ct = tmp;
15594 out = emit_store_flag (out, code, ix86_compare_op0,
15595 ix86_compare_op1, VOIDmode, 0, -1);
15597 else
15599 out = emit_store_flag (out, code, ix86_compare_op0,
15600 ix86_compare_op1, VOIDmode, 0, 1);
15602 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15603 copy_rtx (out), 1, OPTAB_DIRECT);
15606 out = expand_simple_binop (mode, AND, copy_rtx (out),
15607 gen_int_mode (cf - ct, mode),
15608 copy_rtx (out), 1, OPTAB_DIRECT);
15609 if (ct)
15610 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15611 copy_rtx (out), 1, OPTAB_DIRECT);
15612 if (!rtx_equal_p (out, operands[0]))
15613 emit_move_insn (operands[0], copy_rtx (out));
15615 return 1; /* DONE */
15619 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15621 /* Try a few things more with specific constants and a variable. */
15623 optab op;
15624 rtx var, orig_out, out, tmp;
15626 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15627 return 0; /* FAIL */
15629 /* If one of the two operands is an interesting constant, load a
15630 constant with the above and mask it in with a logical operation. */
15632 if (CONST_INT_P (operands[2]))
15634 var = operands[3];
15635 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15636 operands[3] = constm1_rtx, op = and_optab;
15637 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15638 operands[3] = const0_rtx, op = ior_optab;
15639 else
15640 return 0; /* FAIL */
15642 else if (CONST_INT_P (operands[3]))
15644 var = operands[2];
15645 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15646 operands[2] = constm1_rtx, op = and_optab;
15647 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15648 operands[2] = const0_rtx, op = ior_optab;
15649 else
15650 return 0; /* FAIL */
15652 else
15653 return 0; /* FAIL */
15655 orig_out = operands[0];
15656 tmp = gen_reg_rtx (mode);
15657 operands[0] = tmp;
15659 /* Recurse to get the constant loaded. */
15660 if (ix86_expand_int_movcc (operands) == 0)
15661 return 0; /* FAIL */
15663 /* Mask in the interesting variable. */
15664 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15665 OPTAB_WIDEN);
15666 if (!rtx_equal_p (out, orig_out))
15667 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15669 return 1; /* DONE */
15673 * For comparison with above,
15675 * movl cf,dest
15676 * movl ct,tmp
15677 * cmpl op1,op2
15678 * cmovcc tmp,dest
15680 * Size 15.
15683 if (! nonimmediate_operand (operands[2], mode))
15684 operands[2] = force_reg (mode, operands[2]);
15685 if (! nonimmediate_operand (operands[3], mode))
15686 operands[3] = force_reg (mode, operands[3]);
15688 if (! register_operand (operands[2], VOIDmode)
15689 && (mode == QImode
15690 || ! register_operand (operands[3], VOIDmode)))
15691 operands[2] = force_reg (mode, operands[2]);
15693 if (mode == QImode
15694 && ! register_operand (operands[3], VOIDmode))
15695 operands[3] = force_reg (mode, operands[3]);
15697 emit_insn (compare_seq);
15698 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15699 gen_rtx_IF_THEN_ELSE (mode,
15700 compare_op, operands[2],
15701 operands[3])));
15703 return 1; /* DONE */
15706 /* Swap, force into registers, or otherwise massage the two operands
15707 to an sse comparison with a mask result. Thus we differ a bit from
15708 ix86_prepare_fp_compare_args which expects to produce a flags result.
15710 The DEST operand exists to help determine whether to commute commutative
15711 operators. The POP0/POP1 operands are updated in place. The new
15712 comparison code is returned, or UNKNOWN if not implementable. */
15714 static enum rtx_code
15715 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15716 rtx *pop0, rtx *pop1)
15718 rtx tmp;
15720 switch (code)
15722 case LTGT:
15723 case UNEQ:
15724 /* We have no LTGT as an operator. We could implement it with
15725 NE & ORDERED, but this requires an extra temporary. It's
15726 not clear that it's worth it. */
15727 return UNKNOWN;
15729 case LT:
15730 case LE:
15731 case UNGT:
15732 case UNGE:
15733 /* These are supported directly. */
15734 break;
15736 case EQ:
15737 case NE:
15738 case UNORDERED:
15739 case ORDERED:
15740 /* For commutative operators, try to canonicalize the destination
15741 operand to be first in the comparison - this helps reload to
15742 avoid extra moves. */
15743 if (!dest || !rtx_equal_p (dest, *pop1))
15744 break;
15745 /* FALLTHRU */
15747 case GE:
15748 case GT:
15749 case UNLE:
15750 case UNLT:
15751 /* These are not supported directly. Swap the comparison operands
15752 to transform into something that is supported. */
15753 tmp = *pop0;
15754 *pop0 = *pop1;
15755 *pop1 = tmp;
15756 code = swap_condition (code);
15757 break;
15759 default:
15760 gcc_unreachable ();
15763 return code;
15766 /* Detect conditional moves that exactly match min/max operational
15767 semantics. Note that this is IEEE safe, as long as we don't
15768 interchange the operands.
15770 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15771 and TRUE if the operation is successful and instructions are emitted. */
15773 static bool
15774 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15775 rtx cmp_op1, rtx if_true, rtx if_false)
15777 enum machine_mode mode;
15778 bool is_min;
15779 rtx tmp;
15781 if (code == LT)
15783 else if (code == UNGE)
15785 tmp = if_true;
15786 if_true = if_false;
15787 if_false = tmp;
15789 else
15790 return false;
15792 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15793 is_min = true;
15794 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15795 is_min = false;
15796 else
15797 return false;
15799 mode = GET_MODE (dest);
15801 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15802 but MODE may be a vector mode and thus not appropriate. */
15803 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15805 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15806 rtvec v;
15808 if_true = force_reg (mode, if_true);
15809 v = gen_rtvec (2, if_true, if_false);
15810 tmp = gen_rtx_UNSPEC (mode, v, u);
15812 else
15814 code = is_min ? SMIN : SMAX;
15815 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15818 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15819 return true;
15822 /* Expand an sse vector comparison. Return the register with the result. */
15824 static rtx
15825 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15826 rtx op_true, rtx op_false)
15828 enum machine_mode mode = GET_MODE (dest);
15829 rtx x;
15831 cmp_op0 = force_reg (mode, cmp_op0);
15832 if (!nonimmediate_operand (cmp_op1, mode))
15833 cmp_op1 = force_reg (mode, cmp_op1);
15835 if (optimize
15836 || reg_overlap_mentioned_p (dest, op_true)
15837 || reg_overlap_mentioned_p (dest, op_false))
15838 dest = gen_reg_rtx (mode);
15840 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15841 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15843 return dest;
15846 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15847 operations. This is used for both scalar and vector conditional moves. */
15849 static void
15850 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15852 enum machine_mode mode = GET_MODE (dest);
15853 rtx t2, t3, x;
15855 if (op_false == CONST0_RTX (mode))
15857 op_true = force_reg (mode, op_true);
15858 x = gen_rtx_AND (mode, cmp, op_true);
15859 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15861 else if (op_true == CONST0_RTX (mode))
15863 op_false = force_reg (mode, op_false);
15864 x = gen_rtx_NOT (mode, cmp);
15865 x = gen_rtx_AND (mode, x, op_false);
15866 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15868 else if (TARGET_SSE5)
15870 rtx pcmov = gen_rtx_SET (mode, dest,
15871 gen_rtx_IF_THEN_ELSE (mode, cmp,
15872 op_true,
15873 op_false));
15874 emit_insn (pcmov);
15876 else
15878 op_true = force_reg (mode, op_true);
15879 op_false = force_reg (mode, op_false);
15881 t2 = gen_reg_rtx (mode);
15882 if (optimize)
15883 t3 = gen_reg_rtx (mode);
15884 else
15885 t3 = dest;
15887 x = gen_rtx_AND (mode, op_true, cmp);
15888 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15890 x = gen_rtx_NOT (mode, cmp);
15891 x = gen_rtx_AND (mode, x, op_false);
15892 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15894 x = gen_rtx_IOR (mode, t3, t2);
15895 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15899 /* Expand a floating-point conditional move. Return true if successful. */
15902 ix86_expand_fp_movcc (rtx operands[])
15904 enum machine_mode mode = GET_MODE (operands[0]);
15905 enum rtx_code code = GET_CODE (operands[1]);
15906 rtx tmp, compare_op;
15908 ix86_compare_op0 = XEXP (operands[1], 0);
15909 ix86_compare_op1 = XEXP (operands[1], 1);
15910 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15912 enum machine_mode cmode;
15914 /* Since we've no cmove for sse registers, don't force bad register
15915 allocation just to gain access to it. Deny movcc when the
15916 comparison mode doesn't match the move mode. */
15917 cmode = GET_MODE (ix86_compare_op0);
15918 if (cmode == VOIDmode)
15919 cmode = GET_MODE (ix86_compare_op1);
15920 if (cmode != mode)
15921 return 0;
15923 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15924 &ix86_compare_op0,
15925 &ix86_compare_op1);
15926 if (code == UNKNOWN)
15927 return 0;
15929 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15930 ix86_compare_op1, operands[2],
15931 operands[3]))
15932 return 1;
15934 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15935 ix86_compare_op1, operands[2], operands[3]);
15936 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15937 return 1;
15940 /* The floating point conditional move instructions don't directly
15941 support conditions resulting from a signed integer comparison. */
15943 compare_op = ix86_expand_compare (code);
15944 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15946 tmp = gen_reg_rtx (QImode);
15947 ix86_expand_setcc (code, tmp);
15948 code = NE;
15949 ix86_compare_op0 = tmp;
15950 ix86_compare_op1 = const0_rtx;
15951 compare_op = ix86_expand_compare (code);
15954 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15955 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15956 operands[2], operands[3])));
15958 return 1;
15961 /* Expand a floating-point vector conditional move; a vcond operation
15962 rather than a movcc operation. */
15964 bool
15965 ix86_expand_fp_vcond (rtx operands[])
15967 enum rtx_code code = GET_CODE (operands[3]);
15968 rtx cmp;
15970 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15971 &operands[4], &operands[5]);
15972 if (code == UNKNOWN)
15973 return false;
15975 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15976 operands[5], operands[1], operands[2]))
15977 return true;
15979 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15980 operands[1], operands[2]);
15981 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15982 return true;
15985 /* Expand a signed/unsigned integral vector conditional move. */
15987 bool
15988 ix86_expand_int_vcond (rtx operands[])
15990 enum machine_mode mode = GET_MODE (operands[0]);
15991 enum rtx_code code = GET_CODE (operands[3]);
15992 bool negate = false;
15993 rtx x, cop0, cop1;
15995 cop0 = operands[4];
15996 cop1 = operands[5];
15998 /* SSE5 supports all of the comparisons on all vector int types. */
15999 if (!TARGET_SSE5)
16001 /* Canonicalize the comparison to EQ, GT, GTU. */
16002 switch (code)
16004 case EQ:
16005 case GT:
16006 case GTU:
16007 break;
16009 case NE:
16010 case LE:
16011 case LEU:
16012 code = reverse_condition (code);
16013 negate = true;
16014 break;
16016 case GE:
16017 case GEU:
16018 code = reverse_condition (code);
16019 negate = true;
16020 /* FALLTHRU */
16022 case LT:
16023 case LTU:
16024 code = swap_condition (code);
16025 x = cop0, cop0 = cop1, cop1 = x;
16026 break;
16028 default:
16029 gcc_unreachable ();
16032 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16033 if (mode == V2DImode)
16035 switch (code)
16037 case EQ:
16038 /* SSE4.1 supports EQ. */
16039 if (!TARGET_SSE4_1)
16040 return false;
16041 break;
16043 case GT:
16044 case GTU:
16045 /* SSE4.2 supports GT/GTU. */
16046 if (!TARGET_SSE4_2)
16047 return false;
16048 break;
16050 default:
16051 gcc_unreachable ();
16055 /* Unsigned parallel compare is not supported by the hardware. Play some
16056 tricks to turn this into a signed comparison against 0. */
16057 if (code == GTU)
16059 cop0 = force_reg (mode, cop0);
16061 switch (mode)
16063 case V4SImode:
16064 case V2DImode:
16066 rtx t1, t2, mask;
16068 /* Perform a parallel modulo subtraction. */
16069 t1 = gen_reg_rtx (mode);
16070 emit_insn ((mode == V4SImode
16071 ? gen_subv4si3
16072 : gen_subv2di3) (t1, cop0, cop1));
16074 /* Extract the original sign bit of op0. */
16075 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16076 true, false);
16077 t2 = gen_reg_rtx (mode);
16078 emit_insn ((mode == V4SImode
16079 ? gen_andv4si3
16080 : gen_andv2di3) (t2, cop0, mask));
16082 /* XOR it back into the result of the subtraction. This results
16083 in the sign bit set iff we saw unsigned underflow. */
16084 x = gen_reg_rtx (mode);
16085 emit_insn ((mode == V4SImode
16086 ? gen_xorv4si3
16087 : gen_xorv2di3) (x, t1, t2));
16089 code = GT;
16091 break;
16093 case V16QImode:
16094 case V8HImode:
16095 /* Perform a parallel unsigned saturating subtraction. */
16096 x = gen_reg_rtx (mode);
16097 emit_insn (gen_rtx_SET (VOIDmode, x,
16098 gen_rtx_US_MINUS (mode, cop0, cop1)));
16100 code = EQ;
16101 negate = !negate;
16102 break;
16104 default:
16105 gcc_unreachable ();
16108 cop0 = x;
16109 cop1 = CONST0_RTX (mode);
16113 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16114 operands[1+negate], operands[2-negate]);
16116 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16117 operands[2-negate]);
16118 return true;
16121 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16122 true if we should do zero extension, else sign extension. HIGH_P is
16123 true if we want the N/2 high elements, else the low elements. */
16125 void
16126 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16128 enum machine_mode imode = GET_MODE (operands[1]);
16129 rtx (*unpack)(rtx, rtx, rtx);
16130 rtx se, dest;
16132 switch (imode)
16134 case V16QImode:
16135 if (high_p)
16136 unpack = gen_vec_interleave_highv16qi;
16137 else
16138 unpack = gen_vec_interleave_lowv16qi;
16139 break;
16140 case V8HImode:
16141 if (high_p)
16142 unpack = gen_vec_interleave_highv8hi;
16143 else
16144 unpack = gen_vec_interleave_lowv8hi;
16145 break;
16146 case V4SImode:
16147 if (high_p)
16148 unpack = gen_vec_interleave_highv4si;
16149 else
16150 unpack = gen_vec_interleave_lowv4si;
16151 break;
16152 default:
16153 gcc_unreachable ();
16156 dest = gen_lowpart (imode, operands[0]);
16158 if (unsigned_p)
16159 se = force_reg (imode, CONST0_RTX (imode));
16160 else
16161 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16162 operands[1], pc_rtx, pc_rtx);
16164 emit_insn (unpack (dest, operands[1], se));
16167 /* This function performs the same task as ix86_expand_sse_unpack,
16168 but with SSE4.1 instructions. */
16170 void
16171 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16173 enum machine_mode imode = GET_MODE (operands[1]);
16174 rtx (*unpack)(rtx, rtx);
16175 rtx src, dest;
16177 switch (imode)
16179 case V16QImode:
16180 if (unsigned_p)
16181 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16182 else
16183 unpack = gen_sse4_1_extendv8qiv8hi2;
16184 break;
16185 case V8HImode:
16186 if (unsigned_p)
16187 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16188 else
16189 unpack = gen_sse4_1_extendv4hiv4si2;
16190 break;
16191 case V4SImode:
16192 if (unsigned_p)
16193 unpack = gen_sse4_1_zero_extendv2siv2di2;
16194 else
16195 unpack = gen_sse4_1_extendv2siv2di2;
16196 break;
16197 default:
16198 gcc_unreachable ();
16201 dest = operands[0];
16202 if (high_p)
16204 /* Shift higher 8 bytes to lower 8 bytes. */
16205 src = gen_reg_rtx (imode);
16206 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16207 gen_lowpart (TImode, operands[1]),
16208 GEN_INT (64)));
16210 else
16211 src = operands[1];
16213 emit_insn (unpack (dest, src));
16216 /* This function performs the same task as ix86_expand_sse_unpack,
16217 but with sse5 instructions. */
16219 void
16220 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16222 enum machine_mode imode = GET_MODE (operands[1]);
16223 int pperm_bytes[16];
16224 int i;
16225 int h = (high_p) ? 8 : 0;
16226 int h2;
16227 int sign_extend;
16228 rtvec v = rtvec_alloc (16);
16229 rtvec vs;
16230 rtx x, p;
16231 rtx op0 = operands[0], op1 = operands[1];
16233 switch (imode)
16235 case V16QImode:
16236 vs = rtvec_alloc (8);
16237 h2 = (high_p) ? 8 : 0;
16238 for (i = 0; i < 8; i++)
16240 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16241 pperm_bytes[2*i+1] = ((unsigned_p)
16242 ? PPERM_ZERO
16243 : PPERM_SIGN | PPERM_SRC2 | i | h);
16246 for (i = 0; i < 16; i++)
16247 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16249 for (i = 0; i < 8; i++)
16250 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16252 p = gen_rtx_PARALLEL (VOIDmode, vs);
16253 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16254 if (unsigned_p)
16255 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16256 else
16257 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16258 break;
16260 case V8HImode:
16261 vs = rtvec_alloc (4);
16262 h2 = (high_p) ? 4 : 0;
16263 for (i = 0; i < 4; i++)
16265 sign_extend = ((unsigned_p)
16266 ? PPERM_ZERO
16267 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16268 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16269 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16270 pperm_bytes[4*i+2] = sign_extend;
16271 pperm_bytes[4*i+3] = sign_extend;
16274 for (i = 0; i < 16; i++)
16275 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16277 for (i = 0; i < 4; i++)
16278 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16280 p = gen_rtx_PARALLEL (VOIDmode, vs);
16281 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16282 if (unsigned_p)
16283 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16284 else
16285 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16286 break;
16288 case V4SImode:
16289 vs = rtvec_alloc (2);
16290 h2 = (high_p) ? 2 : 0;
16291 for (i = 0; i < 2; i++)
16293 sign_extend = ((unsigned_p)
16294 ? PPERM_ZERO
16295 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16296 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16297 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16298 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16299 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16300 pperm_bytes[8*i+4] = sign_extend;
16301 pperm_bytes[8*i+5] = sign_extend;
16302 pperm_bytes[8*i+6] = sign_extend;
16303 pperm_bytes[8*i+7] = sign_extend;
16306 for (i = 0; i < 16; i++)
16307 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16309 for (i = 0; i < 2; i++)
16310 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16312 p = gen_rtx_PARALLEL (VOIDmode, vs);
16313 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16314 if (unsigned_p)
16315 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16316 else
16317 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16318 break;
16320 default:
16321 gcc_unreachable ();
16324 return;
16327 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16328 next narrower integer vector type */
16329 void
16330 ix86_expand_sse5_pack (rtx operands[3])
16332 enum machine_mode imode = GET_MODE (operands[0]);
16333 int pperm_bytes[16];
16334 int i;
16335 rtvec v = rtvec_alloc (16);
16336 rtx x;
16337 rtx op0 = operands[0];
16338 rtx op1 = operands[1];
16339 rtx op2 = operands[2];
16341 switch (imode)
16343 case V16QImode:
16344 for (i = 0; i < 8; i++)
16346 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16347 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16350 for (i = 0; i < 16; i++)
16351 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16353 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16354 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16355 break;
16357 case V8HImode:
16358 for (i = 0; i < 4; i++)
16360 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16361 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16362 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16363 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16366 for (i = 0; i < 16; i++)
16367 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16369 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16370 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16371 break;
16373 case V4SImode:
16374 for (i = 0; i < 2; i++)
16376 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16377 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16378 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16379 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16380 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16381 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16382 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16383 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16386 for (i = 0; i < 16; i++)
16387 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16389 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16390 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16391 break;
16393 default:
16394 gcc_unreachable ();
16397 return;
16400 /* Expand conditional increment or decrement using adb/sbb instructions.
16401 The default case using setcc followed by the conditional move can be
16402 done by generic code. */
16404 ix86_expand_int_addcc (rtx operands[])
16406 enum rtx_code code = GET_CODE (operands[1]);
16407 rtx compare_op;
16408 rtx val = const0_rtx;
16409 bool fpcmp = false;
16410 enum machine_mode mode = GET_MODE (operands[0]);
16412 ix86_compare_op0 = XEXP (operands[1], 0);
16413 ix86_compare_op1 = XEXP (operands[1], 1);
16414 if (operands[3] != const1_rtx
16415 && operands[3] != constm1_rtx)
16416 return 0;
16417 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16418 ix86_compare_op1, &compare_op))
16419 return 0;
16420 code = GET_CODE (compare_op);
16422 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16423 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16425 fpcmp = true;
16426 code = ix86_fp_compare_code_to_integer (code);
16429 if (code != LTU)
16431 val = constm1_rtx;
16432 if (fpcmp)
16433 PUT_CODE (compare_op,
16434 reverse_condition_maybe_unordered
16435 (GET_CODE (compare_op)));
16436 else
16437 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16439 PUT_MODE (compare_op, mode);
16441 /* Construct either adc or sbb insn. */
16442 if ((code == LTU) == (operands[3] == constm1_rtx))
16444 switch (GET_MODE (operands[0]))
16446 case QImode:
16447 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16448 break;
16449 case HImode:
16450 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16451 break;
16452 case SImode:
16453 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16454 break;
16455 case DImode:
16456 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16457 break;
16458 default:
16459 gcc_unreachable ();
16462 else
16464 switch (GET_MODE (operands[0]))
16466 case QImode:
16467 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16468 break;
16469 case HImode:
16470 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16471 break;
16472 case SImode:
16473 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16474 break;
16475 case DImode:
16476 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16477 break;
16478 default:
16479 gcc_unreachable ();
16482 return 1; /* DONE */
16486 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16487 works for floating pointer parameters and nonoffsetable memories.
16488 For pushes, it returns just stack offsets; the values will be saved
16489 in the right order. Maximally three parts are generated. */
16491 static int
16492 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16494 int size;
16496 if (!TARGET_64BIT)
16497 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16498 else
16499 size = (GET_MODE_SIZE (mode) + 4) / 8;
16501 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16502 gcc_assert (size >= 2 && size <= 4);
16504 /* Optimize constant pool reference to immediates. This is used by fp
16505 moves, that force all constants to memory to allow combining. */
16506 if (MEM_P (operand) && MEM_READONLY_P (operand))
16508 rtx tmp = maybe_get_pool_constant (operand);
16509 if (tmp)
16510 operand = tmp;
16513 if (MEM_P (operand) && !offsettable_memref_p (operand))
16515 /* The only non-offsetable memories we handle are pushes. */
16516 int ok = push_operand (operand, VOIDmode);
16518 gcc_assert (ok);
16520 operand = copy_rtx (operand);
16521 PUT_MODE (operand, Pmode);
16522 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16523 return size;
16526 if (GET_CODE (operand) == CONST_VECTOR)
16528 enum machine_mode imode = int_mode_for_mode (mode);
16529 /* Caution: if we looked through a constant pool memory above,
16530 the operand may actually have a different mode now. That's
16531 ok, since we want to pun this all the way back to an integer. */
16532 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16533 gcc_assert (operand != NULL);
16534 mode = imode;
16537 if (!TARGET_64BIT)
16539 if (mode == DImode)
16540 split_di (&operand, 1, &parts[0], &parts[1]);
16541 else
16543 int i;
16545 if (REG_P (operand))
16547 gcc_assert (reload_completed);
16548 for (i = 0; i < size; i++)
16549 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16551 else if (offsettable_memref_p (operand))
16553 operand = adjust_address (operand, SImode, 0);
16554 parts[0] = operand;
16555 for (i = 1; i < size; i++)
16556 parts[i] = adjust_address (operand, SImode, 4 * i);
16558 else if (GET_CODE (operand) == CONST_DOUBLE)
16560 REAL_VALUE_TYPE r;
16561 long l[4];
16563 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16564 switch (mode)
16566 case TFmode:
16567 real_to_target (l, &r, mode);
16568 parts[3] = gen_int_mode (l[3], SImode);
16569 parts[2] = gen_int_mode (l[2], SImode);
16570 break;
16571 case XFmode:
16572 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16573 parts[2] = gen_int_mode (l[2], SImode);
16574 break;
16575 case DFmode:
16576 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16577 break;
16578 default:
16579 gcc_unreachable ();
16581 parts[1] = gen_int_mode (l[1], SImode);
16582 parts[0] = gen_int_mode (l[0], SImode);
16584 else
16585 gcc_unreachable ();
16588 else
16590 if (mode == TImode)
16591 split_ti (&operand, 1, &parts[0], &parts[1]);
16592 if (mode == XFmode || mode == TFmode)
16594 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16595 if (REG_P (operand))
16597 gcc_assert (reload_completed);
16598 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16599 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16601 else if (offsettable_memref_p (operand))
16603 operand = adjust_address (operand, DImode, 0);
16604 parts[0] = operand;
16605 parts[1] = adjust_address (operand, upper_mode, 8);
16607 else if (GET_CODE (operand) == CONST_DOUBLE)
16609 REAL_VALUE_TYPE r;
16610 long l[4];
16612 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16613 real_to_target (l, &r, mode);
16615 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16616 if (HOST_BITS_PER_WIDE_INT >= 64)
16617 parts[0]
16618 = gen_int_mode
16619 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16620 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16621 DImode);
16622 else
16623 parts[0] = immed_double_const (l[0], l[1], DImode);
16625 if (upper_mode == SImode)
16626 parts[1] = gen_int_mode (l[2], SImode);
16627 else if (HOST_BITS_PER_WIDE_INT >= 64)
16628 parts[1]
16629 = gen_int_mode
16630 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16631 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16632 DImode);
16633 else
16634 parts[1] = immed_double_const (l[2], l[3], DImode);
16636 else
16637 gcc_unreachable ();
16641 return size;
16644 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16645 Return false when normal moves are needed; true when all required
16646 insns have been emitted. Operands 2-4 contain the input values
16647 int the correct order; operands 5-7 contain the output values. */
16649 void
16650 ix86_split_long_move (rtx operands[])
16652 rtx part[2][4];
16653 int nparts, i, j;
16654 int push = 0;
16655 int collisions = 0;
16656 enum machine_mode mode = GET_MODE (operands[0]);
16657 bool collisionparts[4];
16659 /* The DFmode expanders may ask us to move double.
16660 For 64bit target this is single move. By hiding the fact
16661 here we simplify i386.md splitters. */
16662 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16664 /* Optimize constant pool reference to immediates. This is used by
16665 fp moves, that force all constants to memory to allow combining. */
16667 if (MEM_P (operands[1])
16668 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16669 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16670 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16671 if (push_operand (operands[0], VOIDmode))
16673 operands[0] = copy_rtx (operands[0]);
16674 PUT_MODE (operands[0], Pmode);
16676 else
16677 operands[0] = gen_lowpart (DImode, operands[0]);
16678 operands[1] = gen_lowpart (DImode, operands[1]);
16679 emit_move_insn (operands[0], operands[1]);
16680 return;
16683 /* The only non-offsettable memory we handle is push. */
16684 if (push_operand (operands[0], VOIDmode))
16685 push = 1;
16686 else
16687 gcc_assert (!MEM_P (operands[0])
16688 || offsettable_memref_p (operands[0]));
16690 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16691 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16693 /* When emitting push, take care for source operands on the stack. */
16694 if (push && MEM_P (operands[1])
16695 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16696 for (i = 0; i < nparts - 1; i++)
16697 part[1][i] = change_address (part[1][i],
16698 GET_MODE (part[1][i]),
16699 XEXP (part[1][i + 1], 0));
16701 /* We need to do copy in the right order in case an address register
16702 of the source overlaps the destination. */
16703 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16705 rtx tmp;
16707 for (i = 0; i < nparts; i++)
16709 collisionparts[i]
16710 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16711 if (collisionparts[i])
16712 collisions++;
16715 /* Collision in the middle part can be handled by reordering. */
16716 if (collisions == 1 && nparts == 3 && collisionparts [1])
16718 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16719 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16721 else if (collisions == 1
16722 && nparts == 4
16723 && (collisionparts [1] || collisionparts [2]))
16725 if (collisionparts [1])
16727 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16728 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16730 else
16732 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16733 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16737 /* If there are more collisions, we can't handle it by reordering.
16738 Do an lea to the last part and use only one colliding move. */
16739 else if (collisions > 1)
16741 rtx base;
16743 collisions = 1;
16745 base = part[0][nparts - 1];
16747 /* Handle the case when the last part isn't valid for lea.
16748 Happens in 64-bit mode storing the 12-byte XFmode. */
16749 if (GET_MODE (base) != Pmode)
16750 base = gen_rtx_REG (Pmode, REGNO (base));
16752 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16753 part[1][0] = replace_equiv_address (part[1][0], base);
16754 for (i = 1; i < nparts; i++)
16756 tmp = plus_constant (base, UNITS_PER_WORD * i);
16757 part[1][i] = replace_equiv_address (part[1][i], tmp);
16762 if (push)
16764 if (!TARGET_64BIT)
16766 if (nparts == 3)
16768 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16769 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16770 emit_move_insn (part[0][2], part[1][2]);
16772 else if (nparts == 4)
16774 emit_move_insn (part[0][3], part[1][3]);
16775 emit_move_insn (part[0][2], part[1][2]);
16778 else
16780 /* In 64bit mode we don't have 32bit push available. In case this is
16781 register, it is OK - we will just use larger counterpart. We also
16782 retype memory - these comes from attempt to avoid REX prefix on
16783 moving of second half of TFmode value. */
16784 if (GET_MODE (part[1][1]) == SImode)
16786 switch (GET_CODE (part[1][1]))
16788 case MEM:
16789 part[1][1] = adjust_address (part[1][1], DImode, 0);
16790 break;
16792 case REG:
16793 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16794 break;
16796 default:
16797 gcc_unreachable ();
16800 if (GET_MODE (part[1][0]) == SImode)
16801 part[1][0] = part[1][1];
16804 emit_move_insn (part[0][1], part[1][1]);
16805 emit_move_insn (part[0][0], part[1][0]);
16806 return;
16809 /* Choose correct order to not overwrite the source before it is copied. */
16810 if ((REG_P (part[0][0])
16811 && REG_P (part[1][1])
16812 && (REGNO (part[0][0]) == REGNO (part[1][1])
16813 || (nparts == 3
16814 && REGNO (part[0][0]) == REGNO (part[1][2]))
16815 || (nparts == 4
16816 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16817 || (collisions > 0
16818 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16820 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16822 operands[2 + i] = part[0][j];
16823 operands[6 + i] = part[1][j];
16826 else
16828 for (i = 0; i < nparts; i++)
16830 operands[2 + i] = part[0][i];
16831 operands[6 + i] = part[1][i];
16835 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16836 if (optimize_insn_for_size_p ())
16838 for (j = 0; j < nparts - 1; j++)
16839 if (CONST_INT_P (operands[6 + j])
16840 && operands[6 + j] != const0_rtx
16841 && REG_P (operands[2 + j]))
16842 for (i = j; i < nparts - 1; i++)
16843 if (CONST_INT_P (operands[7 + i])
16844 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16845 operands[7 + i] = operands[2 + j];
16848 for (i = 0; i < nparts; i++)
16849 emit_move_insn (operands[2 + i], operands[6 + i]);
16851 return;
16854 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16855 left shift by a constant, either using a single shift or
16856 a sequence of add instructions. */
16858 static void
16859 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16861 if (count == 1)
16863 emit_insn ((mode == DImode
16864 ? gen_addsi3
16865 : gen_adddi3) (operand, operand, operand));
16867 else if (!optimize_insn_for_size_p ()
16868 && count * ix86_cost->add <= ix86_cost->shift_const)
16870 int i;
16871 for (i=0; i<count; i++)
16873 emit_insn ((mode == DImode
16874 ? gen_addsi3
16875 : gen_adddi3) (operand, operand, operand));
16878 else
16879 emit_insn ((mode == DImode
16880 ? gen_ashlsi3
16881 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16884 void
16885 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16887 rtx low[2], high[2];
16888 int count;
16889 const int single_width = mode == DImode ? 32 : 64;
16891 if (CONST_INT_P (operands[2]))
16893 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16894 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16896 if (count >= single_width)
16898 emit_move_insn (high[0], low[1]);
16899 emit_move_insn (low[0], const0_rtx);
16901 if (count > single_width)
16902 ix86_expand_ashl_const (high[0], count - single_width, mode);
16904 else
16906 if (!rtx_equal_p (operands[0], operands[1]))
16907 emit_move_insn (operands[0], operands[1]);
16908 emit_insn ((mode == DImode
16909 ? gen_x86_shld
16910 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16911 ix86_expand_ashl_const (low[0], count, mode);
16913 return;
16916 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16918 if (operands[1] == const1_rtx)
16920 /* Assuming we've chosen a QImode capable registers, then 1 << N
16921 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16922 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16924 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16926 ix86_expand_clear (low[0]);
16927 ix86_expand_clear (high[0]);
16928 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16930 d = gen_lowpart (QImode, low[0]);
16931 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16932 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16933 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16935 d = gen_lowpart (QImode, high[0]);
16936 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16937 s = gen_rtx_NE (QImode, flags, const0_rtx);
16938 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16941 /* Otherwise, we can get the same results by manually performing
16942 a bit extract operation on bit 5/6, and then performing the two
16943 shifts. The two methods of getting 0/1 into low/high are exactly
16944 the same size. Avoiding the shift in the bit extract case helps
16945 pentium4 a bit; no one else seems to care much either way. */
16946 else
16948 rtx x;
16950 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16951 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16952 else
16953 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16954 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16956 emit_insn ((mode == DImode
16957 ? gen_lshrsi3
16958 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16959 emit_insn ((mode == DImode
16960 ? gen_andsi3
16961 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16962 emit_move_insn (low[0], high[0]);
16963 emit_insn ((mode == DImode
16964 ? gen_xorsi3
16965 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16968 emit_insn ((mode == DImode
16969 ? gen_ashlsi3
16970 : gen_ashldi3) (low[0], low[0], operands[2]));
16971 emit_insn ((mode == DImode
16972 ? gen_ashlsi3
16973 : gen_ashldi3) (high[0], high[0], operands[2]));
16974 return;
16977 if (operands[1] == constm1_rtx)
16979 /* For -1 << N, we can avoid the shld instruction, because we
16980 know that we're shifting 0...31/63 ones into a -1. */
16981 emit_move_insn (low[0], constm1_rtx);
16982 if (optimize_insn_for_size_p ())
16983 emit_move_insn (high[0], low[0]);
16984 else
16985 emit_move_insn (high[0], constm1_rtx);
16987 else
16989 if (!rtx_equal_p (operands[0], operands[1]))
16990 emit_move_insn (operands[0], operands[1]);
16992 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16993 emit_insn ((mode == DImode
16994 ? gen_x86_shld
16995 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16998 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
17000 if (TARGET_CMOVE && scratch)
17002 ix86_expand_clear (scratch);
17003 emit_insn ((mode == DImode
17004 ? gen_x86_shift_adj_1
17005 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17006 scratch));
17008 else
17009 emit_insn ((mode == DImode
17010 ? gen_x86_shift_adj_2
17011 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17014 void
17015 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17017 rtx low[2], high[2];
17018 int count;
17019 const int single_width = mode == DImode ? 32 : 64;
17021 if (CONST_INT_P (operands[2]))
17023 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17024 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17026 if (count == single_width * 2 - 1)
17028 emit_move_insn (high[0], high[1]);
17029 emit_insn ((mode == DImode
17030 ? gen_ashrsi3
17031 : gen_ashrdi3) (high[0], high[0],
17032 GEN_INT (single_width - 1)));
17033 emit_move_insn (low[0], high[0]);
17036 else if (count >= single_width)
17038 emit_move_insn (low[0], high[1]);
17039 emit_move_insn (high[0], low[0]);
17040 emit_insn ((mode == DImode
17041 ? gen_ashrsi3
17042 : gen_ashrdi3) (high[0], high[0],
17043 GEN_INT (single_width - 1)));
17044 if (count > single_width)
17045 emit_insn ((mode == DImode
17046 ? gen_ashrsi3
17047 : gen_ashrdi3) (low[0], low[0],
17048 GEN_INT (count - single_width)));
17050 else
17052 if (!rtx_equal_p (operands[0], operands[1]))
17053 emit_move_insn (operands[0], operands[1]);
17054 emit_insn ((mode == DImode
17055 ? gen_x86_shrd
17056 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17057 emit_insn ((mode == DImode
17058 ? gen_ashrsi3
17059 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17062 else
17064 if (!rtx_equal_p (operands[0], operands[1]))
17065 emit_move_insn (operands[0], operands[1]);
17067 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17069 emit_insn ((mode == DImode
17070 ? gen_x86_shrd
17071 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17072 emit_insn ((mode == DImode
17073 ? gen_ashrsi3
17074 : gen_ashrdi3) (high[0], high[0], operands[2]));
17076 if (TARGET_CMOVE && scratch)
17078 emit_move_insn (scratch, high[0]);
17079 emit_insn ((mode == DImode
17080 ? gen_ashrsi3
17081 : gen_ashrdi3) (scratch, scratch,
17082 GEN_INT (single_width - 1)));
17083 emit_insn ((mode == DImode
17084 ? gen_x86_shift_adj_1
17085 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17086 scratch));
17088 else
17089 emit_insn ((mode == DImode
17090 ? gen_x86_shift_adj_3
17091 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17095 void
17096 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17098 rtx low[2], high[2];
17099 int count;
17100 const int single_width = mode == DImode ? 32 : 64;
17102 if (CONST_INT_P (operands[2]))
17104 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17105 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17107 if (count >= single_width)
17109 emit_move_insn (low[0], high[1]);
17110 ix86_expand_clear (high[0]);
17112 if (count > single_width)
17113 emit_insn ((mode == DImode
17114 ? gen_lshrsi3
17115 : gen_lshrdi3) (low[0], low[0],
17116 GEN_INT (count - single_width)));
17118 else
17120 if (!rtx_equal_p (operands[0], operands[1]))
17121 emit_move_insn (operands[0], operands[1]);
17122 emit_insn ((mode == DImode
17123 ? gen_x86_shrd
17124 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17125 emit_insn ((mode == DImode
17126 ? gen_lshrsi3
17127 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17130 else
17132 if (!rtx_equal_p (operands[0], operands[1]))
17133 emit_move_insn (operands[0], operands[1]);
17135 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17137 emit_insn ((mode == DImode
17138 ? gen_x86_shrd
17139 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17140 emit_insn ((mode == DImode
17141 ? gen_lshrsi3
17142 : gen_lshrdi3) (high[0], high[0], operands[2]));
17144 /* Heh. By reversing the arguments, we can reuse this pattern. */
17145 if (TARGET_CMOVE && scratch)
17147 ix86_expand_clear (scratch);
17148 emit_insn ((mode == DImode
17149 ? gen_x86_shift_adj_1
17150 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17151 scratch));
17153 else
17154 emit_insn ((mode == DImode
17155 ? gen_x86_shift_adj_2
17156 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17160 /* Predict just emitted jump instruction to be taken with probability PROB. */
17161 static void
17162 predict_jump (int prob)
17164 rtx insn = get_last_insn ();
17165 gcc_assert (JUMP_P (insn));
17166 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17169 /* Helper function for the string operations below. Dest VARIABLE whether
17170 it is aligned to VALUE bytes. If true, jump to the label. */
17171 static rtx
17172 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17174 rtx label = gen_label_rtx ();
17175 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17176 if (GET_MODE (variable) == DImode)
17177 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17178 else
17179 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17180 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17181 1, label);
17182 if (epilogue)
17183 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17184 else
17185 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17186 return label;
17189 /* Adjust COUNTER by the VALUE. */
17190 static void
17191 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17193 if (GET_MODE (countreg) == DImode)
17194 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17195 else
17196 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17199 /* Zero extend possibly SImode EXP to Pmode register. */
17201 ix86_zero_extend_to_Pmode (rtx exp)
17203 rtx r;
17204 if (GET_MODE (exp) == VOIDmode)
17205 return force_reg (Pmode, exp);
17206 if (GET_MODE (exp) == Pmode)
17207 return copy_to_mode_reg (Pmode, exp);
17208 r = gen_reg_rtx (Pmode);
17209 emit_insn (gen_zero_extendsidi2 (r, exp));
17210 return r;
17213 /* Divide COUNTREG by SCALE. */
17214 static rtx
17215 scale_counter (rtx countreg, int scale)
17217 rtx sc;
17218 rtx piece_size_mask;
17220 if (scale == 1)
17221 return countreg;
17222 if (CONST_INT_P (countreg))
17223 return GEN_INT (INTVAL (countreg) / scale);
17224 gcc_assert (REG_P (countreg));
17226 piece_size_mask = GEN_INT (scale - 1);
17227 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17228 GEN_INT (exact_log2 (scale)),
17229 NULL, 1, OPTAB_DIRECT);
17230 return sc;
17233 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17234 DImode for constant loop counts. */
17236 static enum machine_mode
17237 counter_mode (rtx count_exp)
17239 if (GET_MODE (count_exp) != VOIDmode)
17240 return GET_MODE (count_exp);
17241 if (!CONST_INT_P (count_exp))
17242 return Pmode;
17243 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17244 return DImode;
17245 return SImode;
17248 /* When SRCPTR is non-NULL, output simple loop to move memory
17249 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17250 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17251 equivalent loop to set memory by VALUE (supposed to be in MODE).
17253 The size is rounded down to whole number of chunk size moved at once.
17254 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17257 static void
17258 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17259 rtx destptr, rtx srcptr, rtx value,
17260 rtx count, enum machine_mode mode, int unroll,
17261 int expected_size)
17263 rtx out_label, top_label, iter, tmp;
17264 enum machine_mode iter_mode = counter_mode (count);
17265 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17266 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17267 rtx size;
17268 rtx x_addr;
17269 rtx y_addr;
17270 int i;
17272 top_label = gen_label_rtx ();
17273 out_label = gen_label_rtx ();
17274 iter = gen_reg_rtx (iter_mode);
17276 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17277 NULL, 1, OPTAB_DIRECT);
17278 /* Those two should combine. */
17279 if (piece_size == const1_rtx)
17281 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17282 true, out_label);
17283 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17285 emit_move_insn (iter, const0_rtx);
17287 emit_label (top_label);
17289 tmp = convert_modes (Pmode, iter_mode, iter, true);
17290 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17291 destmem = change_address (destmem, mode, x_addr);
17293 if (srcmem)
17295 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17296 srcmem = change_address (srcmem, mode, y_addr);
17298 /* When unrolling for chips that reorder memory reads and writes,
17299 we can save registers by using single temporary.
17300 Also using 4 temporaries is overkill in 32bit mode. */
17301 if (!TARGET_64BIT && 0)
17303 for (i = 0; i < unroll; i++)
17305 if (i)
17307 destmem =
17308 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17309 srcmem =
17310 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17312 emit_move_insn (destmem, srcmem);
17315 else
17317 rtx tmpreg[4];
17318 gcc_assert (unroll <= 4);
17319 for (i = 0; i < unroll; i++)
17321 tmpreg[i] = gen_reg_rtx (mode);
17322 if (i)
17324 srcmem =
17325 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17327 emit_move_insn (tmpreg[i], srcmem);
17329 for (i = 0; i < unroll; i++)
17331 if (i)
17333 destmem =
17334 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17336 emit_move_insn (destmem, tmpreg[i]);
17340 else
17341 for (i = 0; i < unroll; i++)
17343 if (i)
17344 destmem =
17345 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17346 emit_move_insn (destmem, value);
17349 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17350 true, OPTAB_LIB_WIDEN);
17351 if (tmp != iter)
17352 emit_move_insn (iter, tmp);
17354 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17355 true, top_label);
17356 if (expected_size != -1)
17358 expected_size /= GET_MODE_SIZE (mode) * unroll;
17359 if (expected_size == 0)
17360 predict_jump (0);
17361 else if (expected_size > REG_BR_PROB_BASE)
17362 predict_jump (REG_BR_PROB_BASE - 1);
17363 else
17364 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17366 else
17367 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17368 iter = ix86_zero_extend_to_Pmode (iter);
17369 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17370 true, OPTAB_LIB_WIDEN);
17371 if (tmp != destptr)
17372 emit_move_insn (destptr, tmp);
17373 if (srcptr)
17375 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17376 true, OPTAB_LIB_WIDEN);
17377 if (tmp != srcptr)
17378 emit_move_insn (srcptr, tmp);
17380 emit_label (out_label);
17383 /* Output "rep; mov" instruction.
17384 Arguments have same meaning as for previous function */
17385 static void
17386 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17387 rtx destptr, rtx srcptr,
17388 rtx count,
17389 enum machine_mode mode)
17391 rtx destexp;
17392 rtx srcexp;
17393 rtx countreg;
17395 /* If the size is known, it is shorter to use rep movs. */
17396 if (mode == QImode && CONST_INT_P (count)
17397 && !(INTVAL (count) & 3))
17398 mode = SImode;
17400 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17401 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17402 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17403 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17404 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17405 if (mode != QImode)
17407 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17408 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17409 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17410 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17411 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17412 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17414 else
17416 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17417 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17419 if (CONST_INT_P (count))
17421 count = GEN_INT (INTVAL (count)
17422 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17423 destmem = shallow_copy_rtx (destmem);
17424 srcmem = shallow_copy_rtx (srcmem);
17425 set_mem_size (destmem, count);
17426 set_mem_size (srcmem, count);
17428 else
17430 if (MEM_SIZE (destmem))
17431 set_mem_size (destmem, NULL_RTX);
17432 if (MEM_SIZE (srcmem))
17433 set_mem_size (srcmem, NULL_RTX);
17435 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17436 destexp, srcexp));
17439 /* Output "rep; stos" instruction.
17440 Arguments have same meaning as for previous function */
17441 static void
17442 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17443 rtx count, enum machine_mode mode,
17444 rtx orig_value)
17446 rtx destexp;
17447 rtx countreg;
17449 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17450 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17451 value = force_reg (mode, gen_lowpart (mode, value));
17452 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17453 if (mode != QImode)
17455 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17456 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17457 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17459 else
17460 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17461 if (orig_value == const0_rtx && CONST_INT_P (count))
17463 count = GEN_INT (INTVAL (count)
17464 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17465 destmem = shallow_copy_rtx (destmem);
17466 set_mem_size (destmem, count);
17468 else if (MEM_SIZE (destmem))
17469 set_mem_size (destmem, NULL_RTX);
17470 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17473 static void
17474 emit_strmov (rtx destmem, rtx srcmem,
17475 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17477 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17478 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17479 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17482 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17483 static void
17484 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17485 rtx destptr, rtx srcptr, rtx count, int max_size)
17487 rtx src, dest;
17488 if (CONST_INT_P (count))
17490 HOST_WIDE_INT countval = INTVAL (count);
17491 int offset = 0;
17493 if ((countval & 0x10) && max_size > 16)
17495 if (TARGET_64BIT)
17497 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17498 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17500 else
17501 gcc_unreachable ();
17502 offset += 16;
17504 if ((countval & 0x08) && max_size > 8)
17506 if (TARGET_64BIT)
17507 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17508 else
17510 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17511 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17513 offset += 8;
17515 if ((countval & 0x04) && max_size > 4)
17517 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17518 offset += 4;
17520 if ((countval & 0x02) && max_size > 2)
17522 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17523 offset += 2;
17525 if ((countval & 0x01) && max_size > 1)
17527 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17528 offset += 1;
17530 return;
17532 if (max_size > 8)
17534 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17535 count, 1, OPTAB_DIRECT);
17536 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17537 count, QImode, 1, 4);
17538 return;
17541 /* When there are stringops, we can cheaply increase dest and src pointers.
17542 Otherwise we save code size by maintaining offset (zero is readily
17543 available from preceding rep operation) and using x86 addressing modes.
17545 if (TARGET_SINGLE_STRINGOP)
17547 if (max_size > 4)
17549 rtx label = ix86_expand_aligntest (count, 4, true);
17550 src = change_address (srcmem, SImode, srcptr);
17551 dest = change_address (destmem, SImode, destptr);
17552 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17553 emit_label (label);
17554 LABEL_NUSES (label) = 1;
17556 if (max_size > 2)
17558 rtx label = ix86_expand_aligntest (count, 2, true);
17559 src = change_address (srcmem, HImode, srcptr);
17560 dest = change_address (destmem, HImode, destptr);
17561 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17562 emit_label (label);
17563 LABEL_NUSES (label) = 1;
17565 if (max_size > 1)
17567 rtx label = ix86_expand_aligntest (count, 1, true);
17568 src = change_address (srcmem, QImode, srcptr);
17569 dest = change_address (destmem, QImode, destptr);
17570 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17571 emit_label (label);
17572 LABEL_NUSES (label) = 1;
17575 else
17577 rtx offset = force_reg (Pmode, const0_rtx);
17578 rtx tmp;
17580 if (max_size > 4)
17582 rtx label = ix86_expand_aligntest (count, 4, true);
17583 src = change_address (srcmem, SImode, srcptr);
17584 dest = change_address (destmem, SImode, destptr);
17585 emit_move_insn (dest, src);
17586 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17587 true, OPTAB_LIB_WIDEN);
17588 if (tmp != offset)
17589 emit_move_insn (offset, tmp);
17590 emit_label (label);
17591 LABEL_NUSES (label) = 1;
17593 if (max_size > 2)
17595 rtx label = ix86_expand_aligntest (count, 2, true);
17596 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17597 src = change_address (srcmem, HImode, tmp);
17598 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17599 dest = change_address (destmem, HImode, tmp);
17600 emit_move_insn (dest, src);
17601 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17602 true, OPTAB_LIB_WIDEN);
17603 if (tmp != offset)
17604 emit_move_insn (offset, tmp);
17605 emit_label (label);
17606 LABEL_NUSES (label) = 1;
17608 if (max_size > 1)
17610 rtx label = ix86_expand_aligntest (count, 1, true);
17611 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17612 src = change_address (srcmem, QImode, tmp);
17613 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17614 dest = change_address (destmem, QImode, tmp);
17615 emit_move_insn (dest, src);
17616 emit_label (label);
17617 LABEL_NUSES (label) = 1;
17622 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17623 static void
17624 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17625 rtx count, int max_size)
17627 count =
17628 expand_simple_binop (counter_mode (count), AND, count,
17629 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17630 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17631 gen_lowpart (QImode, value), count, QImode,
17632 1, max_size / 2);
17635 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17636 static void
17637 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17639 rtx dest;
17641 if (CONST_INT_P (count))
17643 HOST_WIDE_INT countval = INTVAL (count);
17644 int offset = 0;
17646 if ((countval & 0x10) && max_size > 16)
17648 if (TARGET_64BIT)
17650 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17651 emit_insn (gen_strset (destptr, dest, value));
17652 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17653 emit_insn (gen_strset (destptr, dest, value));
17655 else
17656 gcc_unreachable ();
17657 offset += 16;
17659 if ((countval & 0x08) && max_size > 8)
17661 if (TARGET_64BIT)
17663 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17664 emit_insn (gen_strset (destptr, dest, value));
17666 else
17668 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17669 emit_insn (gen_strset (destptr, dest, value));
17670 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17671 emit_insn (gen_strset (destptr, dest, value));
17673 offset += 8;
17675 if ((countval & 0x04) && max_size > 4)
17677 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17678 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17679 offset += 4;
17681 if ((countval & 0x02) && max_size > 2)
17683 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17684 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17685 offset += 2;
17687 if ((countval & 0x01) && max_size > 1)
17689 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17690 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17691 offset += 1;
17693 return;
17695 if (max_size > 32)
17697 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17698 return;
17700 if (max_size > 16)
17702 rtx label = ix86_expand_aligntest (count, 16, true);
17703 if (TARGET_64BIT)
17705 dest = change_address (destmem, DImode, destptr);
17706 emit_insn (gen_strset (destptr, dest, value));
17707 emit_insn (gen_strset (destptr, dest, value));
17709 else
17711 dest = change_address (destmem, SImode, destptr);
17712 emit_insn (gen_strset (destptr, dest, value));
17713 emit_insn (gen_strset (destptr, dest, value));
17714 emit_insn (gen_strset (destptr, dest, value));
17715 emit_insn (gen_strset (destptr, dest, value));
17717 emit_label (label);
17718 LABEL_NUSES (label) = 1;
17720 if (max_size > 8)
17722 rtx label = ix86_expand_aligntest (count, 8, true);
17723 if (TARGET_64BIT)
17725 dest = change_address (destmem, DImode, destptr);
17726 emit_insn (gen_strset (destptr, dest, value));
17728 else
17730 dest = change_address (destmem, SImode, destptr);
17731 emit_insn (gen_strset (destptr, dest, value));
17732 emit_insn (gen_strset (destptr, dest, value));
17734 emit_label (label);
17735 LABEL_NUSES (label) = 1;
17737 if (max_size > 4)
17739 rtx label = ix86_expand_aligntest (count, 4, true);
17740 dest = change_address (destmem, SImode, destptr);
17741 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17742 emit_label (label);
17743 LABEL_NUSES (label) = 1;
17745 if (max_size > 2)
17747 rtx label = ix86_expand_aligntest (count, 2, true);
17748 dest = change_address (destmem, HImode, destptr);
17749 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17750 emit_label (label);
17751 LABEL_NUSES (label) = 1;
17753 if (max_size > 1)
17755 rtx label = ix86_expand_aligntest (count, 1, true);
17756 dest = change_address (destmem, QImode, destptr);
17757 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17758 emit_label (label);
17759 LABEL_NUSES (label) = 1;
17763 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17764 DESIRED_ALIGNMENT. */
17765 static void
17766 expand_movmem_prologue (rtx destmem, rtx srcmem,
17767 rtx destptr, rtx srcptr, rtx count,
17768 int align, int desired_alignment)
17770 if (align <= 1 && desired_alignment > 1)
17772 rtx label = ix86_expand_aligntest (destptr, 1, false);
17773 srcmem = change_address (srcmem, QImode, srcptr);
17774 destmem = change_address (destmem, QImode, destptr);
17775 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17776 ix86_adjust_counter (count, 1);
17777 emit_label (label);
17778 LABEL_NUSES (label) = 1;
17780 if (align <= 2 && desired_alignment > 2)
17782 rtx label = ix86_expand_aligntest (destptr, 2, false);
17783 srcmem = change_address (srcmem, HImode, srcptr);
17784 destmem = change_address (destmem, HImode, destptr);
17785 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17786 ix86_adjust_counter (count, 2);
17787 emit_label (label);
17788 LABEL_NUSES (label) = 1;
17790 if (align <= 4 && desired_alignment > 4)
17792 rtx label = ix86_expand_aligntest (destptr, 4, false);
17793 srcmem = change_address (srcmem, SImode, srcptr);
17794 destmem = change_address (destmem, SImode, destptr);
17795 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17796 ix86_adjust_counter (count, 4);
17797 emit_label (label);
17798 LABEL_NUSES (label) = 1;
17800 gcc_assert (desired_alignment <= 8);
17803 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17804 ALIGN_BYTES is how many bytes need to be copied. */
17805 static rtx
17806 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17807 int desired_align, int align_bytes)
17809 rtx src = *srcp;
17810 rtx src_size, dst_size;
17811 int off = 0;
17812 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17813 if (src_align_bytes >= 0)
17814 src_align_bytes = desired_align - src_align_bytes;
17815 src_size = MEM_SIZE (src);
17816 dst_size = MEM_SIZE (dst);
17817 if (align_bytes & 1)
17819 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17820 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17821 off = 1;
17822 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17824 if (align_bytes & 2)
17826 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17827 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17828 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17829 set_mem_align (dst, 2 * BITS_PER_UNIT);
17830 if (src_align_bytes >= 0
17831 && (src_align_bytes & 1) == (align_bytes & 1)
17832 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17833 set_mem_align (src, 2 * BITS_PER_UNIT);
17834 off = 2;
17835 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17837 if (align_bytes & 4)
17839 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17840 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17841 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17842 set_mem_align (dst, 4 * BITS_PER_UNIT);
17843 if (src_align_bytes >= 0)
17845 unsigned int src_align = 0;
17846 if ((src_align_bytes & 3) == (align_bytes & 3))
17847 src_align = 4;
17848 else if ((src_align_bytes & 1) == (align_bytes & 1))
17849 src_align = 2;
17850 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17851 set_mem_align (src, src_align * BITS_PER_UNIT);
17853 off = 4;
17854 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17856 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17857 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17858 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17859 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17860 if (src_align_bytes >= 0)
17862 unsigned int src_align = 0;
17863 if ((src_align_bytes & 7) == (align_bytes & 7))
17864 src_align = 8;
17865 else if ((src_align_bytes & 3) == (align_bytes & 3))
17866 src_align = 4;
17867 else if ((src_align_bytes & 1) == (align_bytes & 1))
17868 src_align = 2;
17869 if (src_align > (unsigned int) desired_align)
17870 src_align = desired_align;
17871 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17872 set_mem_align (src, src_align * BITS_PER_UNIT);
17874 if (dst_size)
17875 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17876 if (src_size)
17877 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17878 *srcp = src;
17879 return dst;
17882 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17883 DESIRED_ALIGNMENT. */
17884 static void
17885 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17886 int align, int desired_alignment)
17888 if (align <= 1 && desired_alignment > 1)
17890 rtx label = ix86_expand_aligntest (destptr, 1, false);
17891 destmem = change_address (destmem, QImode, destptr);
17892 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17893 ix86_adjust_counter (count, 1);
17894 emit_label (label);
17895 LABEL_NUSES (label) = 1;
17897 if (align <= 2 && desired_alignment > 2)
17899 rtx label = ix86_expand_aligntest (destptr, 2, false);
17900 destmem = change_address (destmem, HImode, destptr);
17901 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17902 ix86_adjust_counter (count, 2);
17903 emit_label (label);
17904 LABEL_NUSES (label) = 1;
17906 if (align <= 4 && desired_alignment > 4)
17908 rtx label = ix86_expand_aligntest (destptr, 4, false);
17909 destmem = change_address (destmem, SImode, destptr);
17910 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17911 ix86_adjust_counter (count, 4);
17912 emit_label (label);
17913 LABEL_NUSES (label) = 1;
17915 gcc_assert (desired_alignment <= 8);
17918 /* Set enough from DST to align DST known to by aligned by ALIGN to
17919 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17920 static rtx
17921 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17922 int desired_align, int align_bytes)
17924 int off = 0;
17925 rtx dst_size = MEM_SIZE (dst);
17926 if (align_bytes & 1)
17928 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17929 off = 1;
17930 emit_insn (gen_strset (destreg, dst,
17931 gen_lowpart (QImode, value)));
17933 if (align_bytes & 2)
17935 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17936 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17937 set_mem_align (dst, 2 * BITS_PER_UNIT);
17938 off = 2;
17939 emit_insn (gen_strset (destreg, dst,
17940 gen_lowpart (HImode, value)));
17942 if (align_bytes & 4)
17944 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17945 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17946 set_mem_align (dst, 4 * BITS_PER_UNIT);
17947 off = 4;
17948 emit_insn (gen_strset (destreg, dst,
17949 gen_lowpart (SImode, value)));
17951 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17952 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17953 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17954 if (dst_size)
17955 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17956 return dst;
17959 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17960 static enum stringop_alg
17961 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17962 int *dynamic_check)
17964 const struct stringop_algs * algs;
17965 bool optimize_for_speed;
17966 /* Algorithms using the rep prefix want at least edi and ecx;
17967 additionally, memset wants eax and memcpy wants esi. Don't
17968 consider such algorithms if the user has appropriated those
17969 registers for their own purposes. */
17970 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17971 || (memset
17972 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17974 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17975 || (alg != rep_prefix_1_byte \
17976 && alg != rep_prefix_4_byte \
17977 && alg != rep_prefix_8_byte))
17978 const struct processor_costs *cost;
17980 /* Even if the string operation call is cold, we still might spend a lot
17981 of time processing large blocks. */
17982 if (optimize_function_for_size_p (cfun)
17983 || (optimize_insn_for_size_p ()
17984 && expected_size != -1 && expected_size < 256))
17985 optimize_for_speed = false;
17986 else
17987 optimize_for_speed = true;
17989 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17991 *dynamic_check = -1;
17992 if (memset)
17993 algs = &cost->memset[TARGET_64BIT != 0];
17994 else
17995 algs = &cost->memcpy[TARGET_64BIT != 0];
17996 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17997 return stringop_alg;
17998 /* rep; movq or rep; movl is the smallest variant. */
17999 else if (!optimize_for_speed)
18001 if (!count || (count & 3))
18002 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18003 else
18004 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18006 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18008 else if (expected_size != -1 && expected_size < 4)
18009 return loop_1_byte;
18010 else if (expected_size != -1)
18012 unsigned int i;
18013 enum stringop_alg alg = libcall;
18014 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18016 /* We get here if the algorithms that were not libcall-based
18017 were rep-prefix based and we are unable to use rep prefixes
18018 based on global register usage. Break out of the loop and
18019 use the heuristic below. */
18020 if (algs->size[i].max == 0)
18021 break;
18022 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18024 enum stringop_alg candidate = algs->size[i].alg;
18026 if (candidate != libcall && ALG_USABLE_P (candidate))
18027 alg = candidate;
18028 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18029 last non-libcall inline algorithm. */
18030 if (TARGET_INLINE_ALL_STRINGOPS)
18032 /* When the current size is best to be copied by a libcall,
18033 but we are still forced to inline, run the heuristic below
18034 that will pick code for medium sized blocks. */
18035 if (alg != libcall)
18036 return alg;
18037 break;
18039 else if (ALG_USABLE_P (candidate))
18040 return candidate;
18043 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18045 /* When asked to inline the call anyway, try to pick meaningful choice.
18046 We look for maximal size of block that is faster to copy by hand and
18047 take blocks of at most of that size guessing that average size will
18048 be roughly half of the block.
18050 If this turns out to be bad, we might simply specify the preferred
18051 choice in ix86_costs. */
18052 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18053 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18055 int max = -1;
18056 enum stringop_alg alg;
18057 int i;
18058 bool any_alg_usable_p = true;
18060 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18062 enum stringop_alg candidate = algs->size[i].alg;
18063 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18065 if (candidate != libcall && candidate
18066 && ALG_USABLE_P (candidate))
18067 max = algs->size[i].max;
18069 /* If there aren't any usable algorithms, then recursing on
18070 smaller sizes isn't going to find anything. Just return the
18071 simple byte-at-a-time copy loop. */
18072 if (!any_alg_usable_p)
18074 /* Pick something reasonable. */
18075 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18076 *dynamic_check = 128;
18077 return loop_1_byte;
18079 if (max == -1)
18080 max = 4096;
18081 alg = decide_alg (count, max / 2, memset, dynamic_check);
18082 gcc_assert (*dynamic_check == -1);
18083 gcc_assert (alg != libcall);
18084 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18085 *dynamic_check = max;
18086 return alg;
18088 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18089 #undef ALG_USABLE_P
18092 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18093 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18094 static int
18095 decide_alignment (int align,
18096 enum stringop_alg alg,
18097 int expected_size)
18099 int desired_align = 0;
18100 switch (alg)
18102 case no_stringop:
18103 gcc_unreachable ();
18104 case loop:
18105 case unrolled_loop:
18106 desired_align = GET_MODE_SIZE (Pmode);
18107 break;
18108 case rep_prefix_8_byte:
18109 desired_align = 8;
18110 break;
18111 case rep_prefix_4_byte:
18112 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18113 copying whole cacheline at once. */
18114 if (TARGET_PENTIUMPRO)
18115 desired_align = 8;
18116 else
18117 desired_align = 4;
18118 break;
18119 case rep_prefix_1_byte:
18120 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18121 copying whole cacheline at once. */
18122 if (TARGET_PENTIUMPRO)
18123 desired_align = 8;
18124 else
18125 desired_align = 1;
18126 break;
18127 case loop_1_byte:
18128 desired_align = 1;
18129 break;
18130 case libcall:
18131 return 0;
18134 if (optimize_size)
18135 desired_align = 1;
18136 if (desired_align < align)
18137 desired_align = align;
18138 if (expected_size != -1 && expected_size < 4)
18139 desired_align = align;
18140 return desired_align;
18143 /* Return the smallest power of 2 greater than VAL. */
18144 static int
18145 smallest_pow2_greater_than (int val)
18147 int ret = 1;
18148 while (ret <= val)
18149 ret <<= 1;
18150 return ret;
18153 /* Expand string move (memcpy) operation. Use i386 string operations when
18154 profitable. expand_setmem contains similar code. The code depends upon
18155 architecture, block size and alignment, but always has the same
18156 overall structure:
18158 1) Prologue guard: Conditional that jumps up to epilogues for small
18159 blocks that can be handled by epilogue alone. This is faster but
18160 also needed for correctness, since prologue assume the block is larger
18161 than the desired alignment.
18163 Optional dynamic check for size and libcall for large
18164 blocks is emitted here too, with -minline-stringops-dynamically.
18166 2) Prologue: copy first few bytes in order to get destination aligned
18167 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18168 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18169 We emit either a jump tree on power of two sized blocks, or a byte loop.
18171 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18172 with specified algorithm.
18174 4) Epilogue: code copying tail of the block that is too small to be
18175 handled by main body (or up to size guarded by prologue guard). */
18178 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18179 rtx expected_align_exp, rtx expected_size_exp)
18181 rtx destreg;
18182 rtx srcreg;
18183 rtx label = NULL;
18184 rtx tmp;
18185 rtx jump_around_label = NULL;
18186 HOST_WIDE_INT align = 1;
18187 unsigned HOST_WIDE_INT count = 0;
18188 HOST_WIDE_INT expected_size = -1;
18189 int size_needed = 0, epilogue_size_needed;
18190 int desired_align = 0, align_bytes = 0;
18191 enum stringop_alg alg;
18192 int dynamic_check;
18193 bool need_zero_guard = false;
18195 if (CONST_INT_P (align_exp))
18196 align = INTVAL (align_exp);
18197 /* i386 can do misaligned access on reasonably increased cost. */
18198 if (CONST_INT_P (expected_align_exp)
18199 && INTVAL (expected_align_exp) > align)
18200 align = INTVAL (expected_align_exp);
18201 /* ALIGN is the minimum of destination and source alignment, but we care here
18202 just about destination alignment. */
18203 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18204 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18206 if (CONST_INT_P (count_exp))
18207 count = expected_size = INTVAL (count_exp);
18208 if (CONST_INT_P (expected_size_exp) && count == 0)
18209 expected_size = INTVAL (expected_size_exp);
18211 /* Make sure we don't need to care about overflow later on. */
18212 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18213 return 0;
18215 /* Step 0: Decide on preferred algorithm, desired alignment and
18216 size of chunks to be copied by main loop. */
18218 alg = decide_alg (count, expected_size, false, &dynamic_check);
18219 desired_align = decide_alignment (align, alg, expected_size);
18221 if (!TARGET_ALIGN_STRINGOPS)
18222 align = desired_align;
18224 if (alg == libcall)
18225 return 0;
18226 gcc_assert (alg != no_stringop);
18227 if (!count)
18228 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18229 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18230 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18231 switch (alg)
18233 case libcall:
18234 case no_stringop:
18235 gcc_unreachable ();
18236 case loop:
18237 need_zero_guard = true;
18238 size_needed = GET_MODE_SIZE (Pmode);
18239 break;
18240 case unrolled_loop:
18241 need_zero_guard = true;
18242 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18243 break;
18244 case rep_prefix_8_byte:
18245 size_needed = 8;
18246 break;
18247 case rep_prefix_4_byte:
18248 size_needed = 4;
18249 break;
18250 case rep_prefix_1_byte:
18251 size_needed = 1;
18252 break;
18253 case loop_1_byte:
18254 need_zero_guard = true;
18255 size_needed = 1;
18256 break;
18259 epilogue_size_needed = size_needed;
18261 /* Step 1: Prologue guard. */
18263 /* Alignment code needs count to be in register. */
18264 if (CONST_INT_P (count_exp) && desired_align > align)
18266 if (INTVAL (count_exp) > desired_align
18267 && INTVAL (count_exp) > size_needed)
18269 align_bytes
18270 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18271 if (align_bytes <= 0)
18272 align_bytes = 0;
18273 else
18274 align_bytes = desired_align - align_bytes;
18276 if (align_bytes == 0)
18277 count_exp = force_reg (counter_mode (count_exp), count_exp);
18279 gcc_assert (desired_align >= 1 && align >= 1);
18281 /* Ensure that alignment prologue won't copy past end of block. */
18282 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18284 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18285 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18286 Make sure it is power of 2. */
18287 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18289 if (count)
18291 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18293 /* If main algorithm works on QImode, no epilogue is needed.
18294 For small sizes just don't align anything. */
18295 if (size_needed == 1)
18296 desired_align = align;
18297 else
18298 goto epilogue;
18301 else
18303 label = gen_label_rtx ();
18304 emit_cmp_and_jump_insns (count_exp,
18305 GEN_INT (epilogue_size_needed),
18306 LTU, 0, counter_mode (count_exp), 1, label);
18307 if (expected_size == -1 || expected_size < epilogue_size_needed)
18308 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18309 else
18310 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18314 /* Emit code to decide on runtime whether library call or inline should be
18315 used. */
18316 if (dynamic_check != -1)
18318 if (CONST_INT_P (count_exp))
18320 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18322 emit_block_move_via_libcall (dst, src, count_exp, false);
18323 count_exp = const0_rtx;
18324 goto epilogue;
18327 else
18329 rtx hot_label = gen_label_rtx ();
18330 jump_around_label = gen_label_rtx ();
18331 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18332 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18333 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18334 emit_block_move_via_libcall (dst, src, count_exp, false);
18335 emit_jump (jump_around_label);
18336 emit_label (hot_label);
18340 /* Step 2: Alignment prologue. */
18342 if (desired_align > align)
18344 if (align_bytes == 0)
18346 /* Except for the first move in epilogue, we no longer know
18347 constant offset in aliasing info. It don't seems to worth
18348 the pain to maintain it for the first move, so throw away
18349 the info early. */
18350 src = change_address (src, BLKmode, srcreg);
18351 dst = change_address (dst, BLKmode, destreg);
18352 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18353 desired_align);
18355 else
18357 /* If we know how many bytes need to be stored before dst is
18358 sufficiently aligned, maintain aliasing info accurately. */
18359 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18360 desired_align, align_bytes);
18361 count_exp = plus_constant (count_exp, -align_bytes);
18362 count -= align_bytes;
18364 if (need_zero_guard
18365 && (count < (unsigned HOST_WIDE_INT) size_needed
18366 || (align_bytes == 0
18367 && count < ((unsigned HOST_WIDE_INT) size_needed
18368 + desired_align - align))))
18370 /* It is possible that we copied enough so the main loop will not
18371 execute. */
18372 gcc_assert (size_needed > 1);
18373 if (label == NULL_RTX)
18374 label = gen_label_rtx ();
18375 emit_cmp_and_jump_insns (count_exp,
18376 GEN_INT (size_needed),
18377 LTU, 0, counter_mode (count_exp), 1, label);
18378 if (expected_size == -1
18379 || expected_size < (desired_align - align) / 2 + size_needed)
18380 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18381 else
18382 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18385 if (label && size_needed == 1)
18387 emit_label (label);
18388 LABEL_NUSES (label) = 1;
18389 label = NULL;
18390 epilogue_size_needed = 1;
18392 else if (label == NULL_RTX)
18393 epilogue_size_needed = size_needed;
18395 /* Step 3: Main loop. */
18397 switch (alg)
18399 case libcall:
18400 case no_stringop:
18401 gcc_unreachable ();
18402 case loop_1_byte:
18403 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18404 count_exp, QImode, 1, expected_size);
18405 break;
18406 case loop:
18407 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18408 count_exp, Pmode, 1, expected_size);
18409 break;
18410 case unrolled_loop:
18411 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18412 registers for 4 temporaries anyway. */
18413 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18414 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18415 expected_size);
18416 break;
18417 case rep_prefix_8_byte:
18418 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18419 DImode);
18420 break;
18421 case rep_prefix_4_byte:
18422 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18423 SImode);
18424 break;
18425 case rep_prefix_1_byte:
18426 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18427 QImode);
18428 break;
18430 /* Adjust properly the offset of src and dest memory for aliasing. */
18431 if (CONST_INT_P (count_exp))
18433 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18434 (count / size_needed) * size_needed);
18435 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18436 (count / size_needed) * size_needed);
18438 else
18440 src = change_address (src, BLKmode, srcreg);
18441 dst = change_address (dst, BLKmode, destreg);
18444 /* Step 4: Epilogue to copy the remaining bytes. */
18445 epilogue:
18446 if (label)
18448 /* When the main loop is done, COUNT_EXP might hold original count,
18449 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18450 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18451 bytes. Compensate if needed. */
18453 if (size_needed < epilogue_size_needed)
18455 tmp =
18456 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18457 GEN_INT (size_needed - 1), count_exp, 1,
18458 OPTAB_DIRECT);
18459 if (tmp != count_exp)
18460 emit_move_insn (count_exp, tmp);
18462 emit_label (label);
18463 LABEL_NUSES (label) = 1;
18466 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18467 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18468 epilogue_size_needed);
18469 if (jump_around_label)
18470 emit_label (jump_around_label);
18471 return 1;
18474 /* Helper function for memcpy. For QImode value 0xXY produce
18475 0xXYXYXYXY of wide specified by MODE. This is essentially
18476 a * 0x10101010, but we can do slightly better than
18477 synth_mult by unwinding the sequence by hand on CPUs with
18478 slow multiply. */
18479 static rtx
18480 promote_duplicated_reg (enum machine_mode mode, rtx val)
18482 enum machine_mode valmode = GET_MODE (val);
18483 rtx tmp;
18484 int nops = mode == DImode ? 3 : 2;
18486 gcc_assert (mode == SImode || mode == DImode);
18487 if (val == const0_rtx)
18488 return copy_to_mode_reg (mode, const0_rtx);
18489 if (CONST_INT_P (val))
18491 HOST_WIDE_INT v = INTVAL (val) & 255;
18493 v |= v << 8;
18494 v |= v << 16;
18495 if (mode == DImode)
18496 v |= (v << 16) << 16;
18497 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18500 if (valmode == VOIDmode)
18501 valmode = QImode;
18502 if (valmode != QImode)
18503 val = gen_lowpart (QImode, val);
18504 if (mode == QImode)
18505 return val;
18506 if (!TARGET_PARTIAL_REG_STALL)
18507 nops--;
18508 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18509 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18510 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18511 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18513 rtx reg = convert_modes (mode, QImode, val, true);
18514 tmp = promote_duplicated_reg (mode, const1_rtx);
18515 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18516 OPTAB_DIRECT);
18518 else
18520 rtx reg = convert_modes (mode, QImode, val, true);
18522 if (!TARGET_PARTIAL_REG_STALL)
18523 if (mode == SImode)
18524 emit_insn (gen_movsi_insv_1 (reg, reg));
18525 else
18526 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18527 else
18529 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18530 NULL, 1, OPTAB_DIRECT);
18531 reg =
18532 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18534 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18535 NULL, 1, OPTAB_DIRECT);
18536 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18537 if (mode == SImode)
18538 return reg;
18539 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18540 NULL, 1, OPTAB_DIRECT);
18541 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18542 return reg;
18546 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18547 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18548 alignment from ALIGN to DESIRED_ALIGN. */
18549 static rtx
18550 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18552 rtx promoted_val;
18554 if (TARGET_64BIT
18555 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18556 promoted_val = promote_duplicated_reg (DImode, val);
18557 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18558 promoted_val = promote_duplicated_reg (SImode, val);
18559 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18560 promoted_val = promote_duplicated_reg (HImode, val);
18561 else
18562 promoted_val = val;
18564 return promoted_val;
18567 /* Expand string clear operation (bzero). Use i386 string operations when
18568 profitable. See expand_movmem comment for explanation of individual
18569 steps performed. */
18571 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18572 rtx expected_align_exp, rtx expected_size_exp)
18574 rtx destreg;
18575 rtx label = NULL;
18576 rtx tmp;
18577 rtx jump_around_label = NULL;
18578 HOST_WIDE_INT align = 1;
18579 unsigned HOST_WIDE_INT count = 0;
18580 HOST_WIDE_INT expected_size = -1;
18581 int size_needed = 0, epilogue_size_needed;
18582 int desired_align = 0, align_bytes = 0;
18583 enum stringop_alg alg;
18584 rtx promoted_val = NULL;
18585 bool force_loopy_epilogue = false;
18586 int dynamic_check;
18587 bool need_zero_guard = false;
18589 if (CONST_INT_P (align_exp))
18590 align = INTVAL (align_exp);
18591 /* i386 can do misaligned access on reasonably increased cost. */
18592 if (CONST_INT_P (expected_align_exp)
18593 && INTVAL (expected_align_exp) > align)
18594 align = INTVAL (expected_align_exp);
18595 if (CONST_INT_P (count_exp))
18596 count = expected_size = INTVAL (count_exp);
18597 if (CONST_INT_P (expected_size_exp) && count == 0)
18598 expected_size = INTVAL (expected_size_exp);
18600 /* Make sure we don't need to care about overflow later on. */
18601 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18602 return 0;
18604 /* Step 0: Decide on preferred algorithm, desired alignment and
18605 size of chunks to be copied by main loop. */
18607 alg = decide_alg (count, expected_size, true, &dynamic_check);
18608 desired_align = decide_alignment (align, alg, expected_size);
18610 if (!TARGET_ALIGN_STRINGOPS)
18611 align = desired_align;
18613 if (alg == libcall)
18614 return 0;
18615 gcc_assert (alg != no_stringop);
18616 if (!count)
18617 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18618 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18619 switch (alg)
18621 case libcall:
18622 case no_stringop:
18623 gcc_unreachable ();
18624 case loop:
18625 need_zero_guard = true;
18626 size_needed = GET_MODE_SIZE (Pmode);
18627 break;
18628 case unrolled_loop:
18629 need_zero_guard = true;
18630 size_needed = GET_MODE_SIZE (Pmode) * 4;
18631 break;
18632 case rep_prefix_8_byte:
18633 size_needed = 8;
18634 break;
18635 case rep_prefix_4_byte:
18636 size_needed = 4;
18637 break;
18638 case rep_prefix_1_byte:
18639 size_needed = 1;
18640 break;
18641 case loop_1_byte:
18642 need_zero_guard = true;
18643 size_needed = 1;
18644 break;
18646 epilogue_size_needed = size_needed;
18648 /* Step 1: Prologue guard. */
18650 /* Alignment code needs count to be in register. */
18651 if (CONST_INT_P (count_exp) && desired_align > align)
18653 if (INTVAL (count_exp) > desired_align
18654 && INTVAL (count_exp) > size_needed)
18656 align_bytes
18657 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18658 if (align_bytes <= 0)
18659 align_bytes = 0;
18660 else
18661 align_bytes = desired_align - align_bytes;
18663 if (align_bytes == 0)
18665 enum machine_mode mode = SImode;
18666 if (TARGET_64BIT && (count & ~0xffffffff))
18667 mode = DImode;
18668 count_exp = force_reg (mode, count_exp);
18671 /* Do the cheap promotion to allow better CSE across the
18672 main loop and epilogue (ie one load of the big constant in the
18673 front of all code. */
18674 if (CONST_INT_P (val_exp))
18675 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18676 desired_align, align);
18677 /* Ensure that alignment prologue won't copy past end of block. */
18678 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18680 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18681 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18682 Make sure it is power of 2. */
18683 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18685 /* To improve performance of small blocks, we jump around the VAL
18686 promoting mode. This mean that if the promoted VAL is not constant,
18687 we might not use it in the epilogue and have to use byte
18688 loop variant. */
18689 if (epilogue_size_needed > 2 && !promoted_val)
18690 force_loopy_epilogue = true;
18691 if (count)
18693 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18695 /* If main algorithm works on QImode, no epilogue is needed.
18696 For small sizes just don't align anything. */
18697 if (size_needed == 1)
18698 desired_align = align;
18699 else
18700 goto epilogue;
18703 else
18705 label = gen_label_rtx ();
18706 emit_cmp_and_jump_insns (count_exp,
18707 GEN_INT (epilogue_size_needed),
18708 LTU, 0, counter_mode (count_exp), 1, label);
18709 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18710 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18711 else
18712 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18715 if (dynamic_check != -1)
18717 rtx hot_label = gen_label_rtx ();
18718 jump_around_label = gen_label_rtx ();
18719 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18720 LEU, 0, counter_mode (count_exp), 1, hot_label);
18721 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18722 set_storage_via_libcall (dst, count_exp, val_exp, false);
18723 emit_jump (jump_around_label);
18724 emit_label (hot_label);
18727 /* Step 2: Alignment prologue. */
18729 /* Do the expensive promotion once we branched off the small blocks. */
18730 if (!promoted_val)
18731 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18732 desired_align, align);
18733 gcc_assert (desired_align >= 1 && align >= 1);
18735 if (desired_align > align)
18737 if (align_bytes == 0)
18739 /* Except for the first move in epilogue, we no longer know
18740 constant offset in aliasing info. It don't seems to worth
18741 the pain to maintain it for the first move, so throw away
18742 the info early. */
18743 dst = change_address (dst, BLKmode, destreg);
18744 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18745 desired_align);
18747 else
18749 /* If we know how many bytes need to be stored before dst is
18750 sufficiently aligned, maintain aliasing info accurately. */
18751 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18752 desired_align, align_bytes);
18753 count_exp = plus_constant (count_exp, -align_bytes);
18754 count -= align_bytes;
18756 if (need_zero_guard
18757 && (count < (unsigned HOST_WIDE_INT) size_needed
18758 || (align_bytes == 0
18759 && count < ((unsigned HOST_WIDE_INT) size_needed
18760 + desired_align - align))))
18762 /* It is possible that we copied enough so the main loop will not
18763 execute. */
18764 gcc_assert (size_needed > 1);
18765 if (label == NULL_RTX)
18766 label = gen_label_rtx ();
18767 emit_cmp_and_jump_insns (count_exp,
18768 GEN_INT (size_needed),
18769 LTU, 0, counter_mode (count_exp), 1, label);
18770 if (expected_size == -1
18771 || expected_size < (desired_align - align) / 2 + size_needed)
18772 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18773 else
18774 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18777 if (label && size_needed == 1)
18779 emit_label (label);
18780 LABEL_NUSES (label) = 1;
18781 label = NULL;
18782 promoted_val = val_exp;
18783 epilogue_size_needed = 1;
18785 else if (label == NULL_RTX)
18786 epilogue_size_needed = size_needed;
18788 /* Step 3: Main loop. */
18790 switch (alg)
18792 case libcall:
18793 case no_stringop:
18794 gcc_unreachable ();
18795 case loop_1_byte:
18796 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18797 count_exp, QImode, 1, expected_size);
18798 break;
18799 case loop:
18800 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18801 count_exp, Pmode, 1, expected_size);
18802 break;
18803 case unrolled_loop:
18804 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18805 count_exp, Pmode, 4, expected_size);
18806 break;
18807 case rep_prefix_8_byte:
18808 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18809 DImode, val_exp);
18810 break;
18811 case rep_prefix_4_byte:
18812 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18813 SImode, val_exp);
18814 break;
18815 case rep_prefix_1_byte:
18816 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18817 QImode, val_exp);
18818 break;
18820 /* Adjust properly the offset of src and dest memory for aliasing. */
18821 if (CONST_INT_P (count_exp))
18822 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18823 (count / size_needed) * size_needed);
18824 else
18825 dst = change_address (dst, BLKmode, destreg);
18827 /* Step 4: Epilogue to copy the remaining bytes. */
18829 if (label)
18831 /* When the main loop is done, COUNT_EXP might hold original count,
18832 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18833 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18834 bytes. Compensate if needed. */
18836 if (size_needed < epilogue_size_needed)
18838 tmp =
18839 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18840 GEN_INT (size_needed - 1), count_exp, 1,
18841 OPTAB_DIRECT);
18842 if (tmp != count_exp)
18843 emit_move_insn (count_exp, tmp);
18845 emit_label (label);
18846 LABEL_NUSES (label) = 1;
18848 epilogue:
18849 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18851 if (force_loopy_epilogue)
18852 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18853 epilogue_size_needed);
18854 else
18855 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18856 epilogue_size_needed);
18858 if (jump_around_label)
18859 emit_label (jump_around_label);
18860 return 1;
18863 /* Expand the appropriate insns for doing strlen if not just doing
18864 repnz; scasb
18866 out = result, initialized with the start address
18867 align_rtx = alignment of the address.
18868 scratch = scratch register, initialized with the startaddress when
18869 not aligned, otherwise undefined
18871 This is just the body. It needs the initializations mentioned above and
18872 some address computing at the end. These things are done in i386.md. */
18874 static void
18875 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18877 int align;
18878 rtx tmp;
18879 rtx align_2_label = NULL_RTX;
18880 rtx align_3_label = NULL_RTX;
18881 rtx align_4_label = gen_label_rtx ();
18882 rtx end_0_label = gen_label_rtx ();
18883 rtx mem;
18884 rtx tmpreg = gen_reg_rtx (SImode);
18885 rtx scratch = gen_reg_rtx (SImode);
18886 rtx cmp;
18888 align = 0;
18889 if (CONST_INT_P (align_rtx))
18890 align = INTVAL (align_rtx);
18892 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18894 /* Is there a known alignment and is it less than 4? */
18895 if (align < 4)
18897 rtx scratch1 = gen_reg_rtx (Pmode);
18898 emit_move_insn (scratch1, out);
18899 /* Is there a known alignment and is it not 2? */
18900 if (align != 2)
18902 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18903 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18905 /* Leave just the 3 lower bits. */
18906 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18907 NULL_RTX, 0, OPTAB_WIDEN);
18909 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18910 Pmode, 1, align_4_label);
18911 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18912 Pmode, 1, align_2_label);
18913 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18914 Pmode, 1, align_3_label);
18916 else
18918 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18919 check if is aligned to 4 - byte. */
18921 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18922 NULL_RTX, 0, OPTAB_WIDEN);
18924 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18925 Pmode, 1, align_4_label);
18928 mem = change_address (src, QImode, out);
18930 /* Now compare the bytes. */
18932 /* Compare the first n unaligned byte on a byte per byte basis. */
18933 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18934 QImode, 1, end_0_label);
18936 /* Increment the address. */
18937 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18939 /* Not needed with an alignment of 2 */
18940 if (align != 2)
18942 emit_label (align_2_label);
18944 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18945 end_0_label);
18947 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18949 emit_label (align_3_label);
18952 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18953 end_0_label);
18955 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18958 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18959 align this loop. It gives only huge programs, but does not help to
18960 speed up. */
18961 emit_label (align_4_label);
18963 mem = change_address (src, SImode, out);
18964 emit_move_insn (scratch, mem);
18965 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18967 /* This formula yields a nonzero result iff one of the bytes is zero.
18968 This saves three branches inside loop and many cycles. */
18970 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18971 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18972 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18973 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18974 gen_int_mode (0x80808080, SImode)));
18975 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18976 align_4_label);
18978 if (TARGET_CMOVE)
18980 rtx reg = gen_reg_rtx (SImode);
18981 rtx reg2 = gen_reg_rtx (Pmode);
18982 emit_move_insn (reg, tmpreg);
18983 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18985 /* If zero is not in the first two bytes, move two bytes forward. */
18986 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18987 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18988 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18989 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18990 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18991 reg,
18992 tmpreg)));
18993 /* Emit lea manually to avoid clobbering of flags. */
18994 emit_insn (gen_rtx_SET (SImode, reg2,
18995 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18997 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18998 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18999 emit_insn (gen_rtx_SET (VOIDmode, out,
19000 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19001 reg2,
19002 out)));
19005 else
19007 rtx end_2_label = gen_label_rtx ();
19008 /* Is zero in the first two bytes? */
19010 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19011 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19012 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19013 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19014 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19015 pc_rtx);
19016 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19017 JUMP_LABEL (tmp) = end_2_label;
19019 /* Not in the first two. Move two bytes forward. */
19020 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19021 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19023 emit_label (end_2_label);
19027 /* Avoid branch in fixing the byte. */
19028 tmpreg = gen_lowpart (QImode, tmpreg);
19029 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19030 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19031 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19033 emit_label (end_0_label);
19036 /* Expand strlen. */
19039 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19041 rtx addr, scratch1, scratch2, scratch3, scratch4;
19043 /* The generic case of strlen expander is long. Avoid it's
19044 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19046 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19047 && !TARGET_INLINE_ALL_STRINGOPS
19048 && !optimize_insn_for_size_p ()
19049 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19050 return 0;
19052 addr = force_reg (Pmode, XEXP (src, 0));
19053 scratch1 = gen_reg_rtx (Pmode);
19055 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19056 && !optimize_insn_for_size_p ())
19058 /* Well it seems that some optimizer does not combine a call like
19059 foo(strlen(bar), strlen(bar));
19060 when the move and the subtraction is done here. It does calculate
19061 the length just once when these instructions are done inside of
19062 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19063 often used and I use one fewer register for the lifetime of
19064 output_strlen_unroll() this is better. */
19066 emit_move_insn (out, addr);
19068 ix86_expand_strlensi_unroll_1 (out, src, align);
19070 /* strlensi_unroll_1 returns the address of the zero at the end of
19071 the string, like memchr(), so compute the length by subtracting
19072 the start address. */
19073 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19075 else
19077 rtx unspec;
19079 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19080 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19081 return false;
19083 scratch2 = gen_reg_rtx (Pmode);
19084 scratch3 = gen_reg_rtx (Pmode);
19085 scratch4 = force_reg (Pmode, constm1_rtx);
19087 emit_move_insn (scratch3, addr);
19088 eoschar = force_reg (QImode, eoschar);
19090 src = replace_equiv_address_nv (src, scratch3);
19092 /* If .md starts supporting :P, this can be done in .md. */
19093 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19094 scratch4), UNSPEC_SCAS);
19095 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19096 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19097 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19099 return 1;
19102 /* For given symbol (function) construct code to compute address of it's PLT
19103 entry in large x86-64 PIC model. */
19105 construct_plt_address (rtx symbol)
19107 rtx tmp = gen_reg_rtx (Pmode);
19108 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19110 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19111 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19113 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19114 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19115 return tmp;
19118 void
19119 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19120 rtx callarg2,
19121 rtx pop, int sibcall)
19123 rtx use = NULL, call;
19125 if (pop == const0_rtx)
19126 pop = NULL;
19127 gcc_assert (!TARGET_64BIT || !pop);
19129 if (TARGET_MACHO && !TARGET_64BIT)
19131 #if TARGET_MACHO
19132 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19133 fnaddr = machopic_indirect_call_target (fnaddr);
19134 #endif
19136 else
19138 /* Static functions and indirect calls don't need the pic register. */
19139 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19140 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19141 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19142 use_reg (&use, pic_offset_table_rtx);
19145 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19147 rtx al = gen_rtx_REG (QImode, AX_REG);
19148 emit_move_insn (al, callarg2);
19149 use_reg (&use, al);
19152 if (ix86_cmodel == CM_LARGE_PIC
19153 && MEM_P (fnaddr)
19154 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19155 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19156 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19157 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19159 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19160 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19162 if (sibcall && TARGET_64BIT
19163 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19165 rtx addr;
19166 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19167 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19168 emit_move_insn (fnaddr, addr);
19169 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19172 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19173 if (retval)
19174 call = gen_rtx_SET (VOIDmode, retval, call);
19175 if (pop)
19177 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19178 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19179 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19181 if (TARGET_64BIT
19182 && ix86_cfun_abi () == MS_ABI
19183 && (!callarg2 || INTVAL (callarg2) != -2))
19185 /* We need to represent that SI and DI registers are clobbered
19186 by SYSV calls. */
19187 static int clobbered_registers[] = {
19188 XMM6_REG, XMM7_REG, XMM8_REG,
19189 XMM9_REG, XMM10_REG, XMM11_REG,
19190 XMM12_REG, XMM13_REG, XMM14_REG,
19191 XMM15_REG, SI_REG, DI_REG
19193 unsigned int i;
19194 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19195 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19196 UNSPEC_MS_TO_SYSV_CALL);
19198 vec[0] = call;
19199 vec[1] = unspec;
19200 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19201 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19202 ? TImode : DImode,
19203 gen_rtx_REG
19204 (SSE_REGNO_P (clobbered_registers[i])
19205 ? TImode : DImode,
19206 clobbered_registers[i]));
19208 call = gen_rtx_PARALLEL (VOIDmode,
19209 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19210 + 2, vec));
19213 call = emit_call_insn (call);
19214 if (use)
19215 CALL_INSN_FUNCTION_USAGE (call) = use;
19219 /* Clear stack slot assignments remembered from previous functions.
19220 This is called from INIT_EXPANDERS once before RTL is emitted for each
19221 function. */
19223 static struct machine_function *
19224 ix86_init_machine_status (void)
19226 struct machine_function *f;
19228 f = GGC_CNEW (struct machine_function);
19229 f->use_fast_prologue_epilogue_nregs = -1;
19230 f->tls_descriptor_call_expanded_p = 0;
19231 f->call_abi = ix86_abi;
19233 return f;
19236 /* Return a MEM corresponding to a stack slot with mode MODE.
19237 Allocate a new slot if necessary.
19239 The RTL for a function can have several slots available: N is
19240 which slot to use. */
19243 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19245 struct stack_local_entry *s;
19247 gcc_assert (n < MAX_386_STACK_LOCALS);
19249 /* Virtual slot is valid only before vregs are instantiated. */
19250 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19252 for (s = ix86_stack_locals; s; s = s->next)
19253 if (s->mode == mode && s->n == n)
19254 return copy_rtx (s->rtl);
19256 s = (struct stack_local_entry *)
19257 ggc_alloc (sizeof (struct stack_local_entry));
19258 s->n = n;
19259 s->mode = mode;
19260 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19262 s->next = ix86_stack_locals;
19263 ix86_stack_locals = s;
19264 return s->rtl;
19267 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19269 static GTY(()) rtx ix86_tls_symbol;
19271 ix86_tls_get_addr (void)
19274 if (!ix86_tls_symbol)
19276 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19277 (TARGET_ANY_GNU_TLS
19278 && !TARGET_64BIT)
19279 ? "___tls_get_addr"
19280 : "__tls_get_addr");
19283 return ix86_tls_symbol;
19286 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19288 static GTY(()) rtx ix86_tls_module_base_symbol;
19290 ix86_tls_module_base (void)
19293 if (!ix86_tls_module_base_symbol)
19295 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19296 "_TLS_MODULE_BASE_");
19297 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19298 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19301 return ix86_tls_module_base_symbol;
19304 /* Calculate the length of the memory address in the instruction
19305 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19308 memory_address_length (rtx addr)
19310 struct ix86_address parts;
19311 rtx base, index, disp;
19312 int len;
19313 int ok;
19315 if (GET_CODE (addr) == PRE_DEC
19316 || GET_CODE (addr) == POST_INC
19317 || GET_CODE (addr) == PRE_MODIFY
19318 || GET_CODE (addr) == POST_MODIFY)
19319 return 0;
19321 ok = ix86_decompose_address (addr, &parts);
19322 gcc_assert (ok);
19324 if (parts.base && GET_CODE (parts.base) == SUBREG)
19325 parts.base = SUBREG_REG (parts.base);
19326 if (parts.index && GET_CODE (parts.index) == SUBREG)
19327 parts.index = SUBREG_REG (parts.index);
19329 base = parts.base;
19330 index = parts.index;
19331 disp = parts.disp;
19332 len = 0;
19334 /* Rule of thumb:
19335 - esp as the base always wants an index,
19336 - ebp as the base always wants a displacement,
19337 - r12 as the base always wants an index,
19338 - r13 as the base always wants a displacement. */
19340 /* Register Indirect. */
19341 if (base && !index && !disp)
19343 /* esp (for its index) and ebp (for its displacement) need
19344 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19345 code. */
19346 if (REG_P (addr)
19347 && (addr == arg_pointer_rtx
19348 || addr == frame_pointer_rtx
19349 || REGNO (addr) == SP_REG
19350 || REGNO (addr) == BP_REG
19351 || REGNO (addr) == R12_REG
19352 || REGNO (addr) == R13_REG))
19353 len = 1;
19356 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19357 is not disp32, but disp32(%rip), so for disp32
19358 SIB byte is needed, unless print_operand_address
19359 optimizes it into disp32(%rip) or (%rip) is implied
19360 by UNSPEC. */
19361 else if (disp && !base && !index)
19363 len = 4;
19364 if (TARGET_64BIT)
19366 rtx symbol = disp;
19368 if (GET_CODE (disp) == CONST)
19369 symbol = XEXP (disp, 0);
19370 if (GET_CODE (symbol) == PLUS
19371 && CONST_INT_P (XEXP (symbol, 1)))
19372 symbol = XEXP (symbol, 0);
19374 if (GET_CODE (symbol) != LABEL_REF
19375 && (GET_CODE (symbol) != SYMBOL_REF
19376 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19377 && (GET_CODE (symbol) != UNSPEC
19378 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19379 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19380 len += 1;
19384 else
19386 /* Find the length of the displacement constant. */
19387 if (disp)
19389 if (base && satisfies_constraint_K (disp))
19390 len = 1;
19391 else
19392 len = 4;
19394 /* ebp always wants a displacement. Similarly r13. */
19395 else if (REG_P (base)
19396 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19397 len = 1;
19399 /* An index requires the two-byte modrm form.... */
19400 if (index
19401 /* ...like esp (or r12), which always wants an index. */
19402 || base == arg_pointer_rtx
19403 || base == frame_pointer_rtx
19404 || (REG_P (base)
19405 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19406 len += 1;
19409 switch (parts.seg)
19411 case SEG_FS:
19412 case SEG_GS:
19413 len += 1;
19414 break;
19415 default:
19416 break;
19419 return len;
19422 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19423 is set, expect that insn have 8bit immediate alternative. */
19425 ix86_attr_length_immediate_default (rtx insn, int shortform)
19427 int len = 0;
19428 int i;
19429 extract_insn_cached (insn);
19430 for (i = recog_data.n_operands - 1; i >= 0; --i)
19431 if (CONSTANT_P (recog_data.operand[i]))
19433 enum attr_mode mode = get_attr_mode (insn);
19435 gcc_assert (!len);
19436 if (shortform && CONST_INT_P (recog_data.operand[i]))
19438 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19439 switch (mode)
19441 case MODE_QI:
19442 len = 1;
19443 continue;
19444 case MODE_HI:
19445 ival = trunc_int_for_mode (ival, HImode);
19446 break;
19447 case MODE_SI:
19448 ival = trunc_int_for_mode (ival, SImode);
19449 break;
19450 default:
19451 break;
19453 if (IN_RANGE (ival, -128, 127))
19455 len = 1;
19456 continue;
19459 switch (mode)
19461 case MODE_QI:
19462 len = 1;
19463 break;
19464 case MODE_HI:
19465 len = 2;
19466 break;
19467 case MODE_SI:
19468 len = 4;
19469 break;
19470 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19471 case MODE_DI:
19472 len = 4;
19473 break;
19474 default:
19475 fatal_insn ("unknown insn mode", insn);
19478 return len;
19480 /* Compute default value for "length_address" attribute. */
19482 ix86_attr_length_address_default (rtx insn)
19484 int i;
19486 if (get_attr_type (insn) == TYPE_LEA)
19488 rtx set = PATTERN (insn), addr;
19490 if (GET_CODE (set) == PARALLEL)
19491 set = XVECEXP (set, 0, 0);
19493 gcc_assert (GET_CODE (set) == SET);
19495 addr = SET_SRC (set);
19496 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19498 if (GET_CODE (addr) == ZERO_EXTEND)
19499 addr = XEXP (addr, 0);
19500 if (GET_CODE (addr) == SUBREG)
19501 addr = SUBREG_REG (addr);
19504 return memory_address_length (addr);
19507 extract_insn_cached (insn);
19508 for (i = recog_data.n_operands - 1; i >= 0; --i)
19509 if (MEM_P (recog_data.operand[i]))
19511 constrain_operands_cached (reload_completed);
19512 if (which_alternative != -1)
19514 const char *constraints = recog_data.constraints[i];
19515 int alt = which_alternative;
19517 while (*constraints == '=' || *constraints == '+')
19518 constraints++;
19519 while (alt-- > 0)
19520 while (*constraints++ != ',')
19522 /* Skip ignored operands. */
19523 if (*constraints == 'X')
19524 continue;
19526 return memory_address_length (XEXP (recog_data.operand[i], 0));
19528 return 0;
19531 /* Compute default value for "length_vex" attribute. It includes
19532 2 or 3 byte VEX prefix and 1 opcode byte. */
19535 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19536 int has_vex_w)
19538 int i;
19540 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19541 byte VEX prefix. */
19542 if (!has_0f_opcode || has_vex_w)
19543 return 3 + 1;
19545 /* We can always use 2 byte VEX prefix in 32bit. */
19546 if (!TARGET_64BIT)
19547 return 2 + 1;
19549 extract_insn_cached (insn);
19551 for (i = recog_data.n_operands - 1; i >= 0; --i)
19552 if (REG_P (recog_data.operand[i]))
19554 /* REX.W bit uses 3 byte VEX prefix. */
19555 if (GET_MODE (recog_data.operand[i]) == DImode
19556 && GENERAL_REG_P (recog_data.operand[i]))
19557 return 3 + 1;
19559 else
19561 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19562 if (MEM_P (recog_data.operand[i])
19563 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19564 return 3 + 1;
19567 return 2 + 1;
19570 /* Return the maximum number of instructions a cpu can issue. */
19572 static int
19573 ix86_issue_rate (void)
19575 switch (ix86_tune)
19577 case PROCESSOR_PENTIUM:
19578 case PROCESSOR_ATOM:
19579 case PROCESSOR_K6:
19580 return 2;
19582 case PROCESSOR_PENTIUMPRO:
19583 case PROCESSOR_PENTIUM4:
19584 case PROCESSOR_ATHLON:
19585 case PROCESSOR_K8:
19586 case PROCESSOR_AMDFAM10:
19587 case PROCESSOR_NOCONA:
19588 case PROCESSOR_GENERIC32:
19589 case PROCESSOR_GENERIC64:
19590 return 3;
19592 case PROCESSOR_CORE2:
19593 return 4;
19595 default:
19596 return 1;
19600 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19601 by DEP_INSN and nothing set by DEP_INSN. */
19603 static int
19604 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19606 rtx set, set2;
19608 /* Simplify the test for uninteresting insns. */
19609 if (insn_type != TYPE_SETCC
19610 && insn_type != TYPE_ICMOV
19611 && insn_type != TYPE_FCMOV
19612 && insn_type != TYPE_IBR)
19613 return 0;
19615 if ((set = single_set (dep_insn)) != 0)
19617 set = SET_DEST (set);
19618 set2 = NULL_RTX;
19620 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19621 && XVECLEN (PATTERN (dep_insn), 0) == 2
19622 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19623 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19625 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19626 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19628 else
19629 return 0;
19631 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19632 return 0;
19634 /* This test is true if the dependent insn reads the flags but
19635 not any other potentially set register. */
19636 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19637 return 0;
19639 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19640 return 0;
19642 return 1;
19645 /* Return true iff USE_INSN has a memory address with operands set by
19646 SET_INSN. */
19648 bool
19649 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19651 int i;
19652 extract_insn_cached (use_insn);
19653 for (i = recog_data.n_operands - 1; i >= 0; --i)
19654 if (MEM_P (recog_data.operand[i]))
19656 rtx addr = XEXP (recog_data.operand[i], 0);
19657 return modified_in_p (addr, set_insn) != 0;
19659 return false;
19662 static int
19663 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19665 enum attr_type insn_type, dep_insn_type;
19666 enum attr_memory memory;
19667 rtx set, set2;
19668 int dep_insn_code_number;
19670 /* Anti and output dependencies have zero cost on all CPUs. */
19671 if (REG_NOTE_KIND (link) != 0)
19672 return 0;
19674 dep_insn_code_number = recog_memoized (dep_insn);
19676 /* If we can't recognize the insns, we can't really do anything. */
19677 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19678 return cost;
19680 insn_type = get_attr_type (insn);
19681 dep_insn_type = get_attr_type (dep_insn);
19683 switch (ix86_tune)
19685 case PROCESSOR_PENTIUM:
19686 /* Address Generation Interlock adds a cycle of latency. */
19687 if (insn_type == TYPE_LEA)
19689 rtx addr = PATTERN (insn);
19691 if (GET_CODE (addr) == PARALLEL)
19692 addr = XVECEXP (addr, 0, 0);
19694 gcc_assert (GET_CODE (addr) == SET);
19696 addr = SET_SRC (addr);
19697 if (modified_in_p (addr, dep_insn))
19698 cost += 1;
19700 else if (ix86_agi_dependent (dep_insn, insn))
19701 cost += 1;
19703 /* ??? Compares pair with jump/setcc. */
19704 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19705 cost = 0;
19707 /* Floating point stores require value to be ready one cycle earlier. */
19708 if (insn_type == TYPE_FMOV
19709 && get_attr_memory (insn) == MEMORY_STORE
19710 && !ix86_agi_dependent (dep_insn, insn))
19711 cost += 1;
19712 break;
19714 case PROCESSOR_PENTIUMPRO:
19715 memory = get_attr_memory (insn);
19717 /* INT->FP conversion is expensive. */
19718 if (get_attr_fp_int_src (dep_insn))
19719 cost += 5;
19721 /* There is one cycle extra latency between an FP op and a store. */
19722 if (insn_type == TYPE_FMOV
19723 && (set = single_set (dep_insn)) != NULL_RTX
19724 && (set2 = single_set (insn)) != NULL_RTX
19725 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19726 && MEM_P (SET_DEST (set2)))
19727 cost += 1;
19729 /* Show ability of reorder buffer to hide latency of load by executing
19730 in parallel with previous instruction in case
19731 previous instruction is not needed to compute the address. */
19732 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19733 && !ix86_agi_dependent (dep_insn, insn))
19735 /* Claim moves to take one cycle, as core can issue one load
19736 at time and the next load can start cycle later. */
19737 if (dep_insn_type == TYPE_IMOV
19738 || dep_insn_type == TYPE_FMOV)
19739 cost = 1;
19740 else if (cost > 1)
19741 cost--;
19743 break;
19745 case PROCESSOR_K6:
19746 memory = get_attr_memory (insn);
19748 /* The esp dependency is resolved before the instruction is really
19749 finished. */
19750 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19751 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19752 return 1;
19754 /* INT->FP conversion is expensive. */
19755 if (get_attr_fp_int_src (dep_insn))
19756 cost += 5;
19758 /* Show ability of reorder buffer to hide latency of load by executing
19759 in parallel with previous instruction in case
19760 previous instruction is not needed to compute the address. */
19761 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19762 && !ix86_agi_dependent (dep_insn, insn))
19764 /* Claim moves to take one cycle, as core can issue one load
19765 at time and the next load can start cycle later. */
19766 if (dep_insn_type == TYPE_IMOV
19767 || dep_insn_type == TYPE_FMOV)
19768 cost = 1;
19769 else if (cost > 2)
19770 cost -= 2;
19771 else
19772 cost = 1;
19774 break;
19776 case PROCESSOR_ATHLON:
19777 case PROCESSOR_K8:
19778 case PROCESSOR_AMDFAM10:
19779 case PROCESSOR_ATOM:
19780 case PROCESSOR_GENERIC32:
19781 case PROCESSOR_GENERIC64:
19782 memory = get_attr_memory (insn);
19784 /* Show ability of reorder buffer to hide latency of load by executing
19785 in parallel with previous instruction in case
19786 previous instruction is not needed to compute the address. */
19787 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19788 && !ix86_agi_dependent (dep_insn, insn))
19790 enum attr_unit unit = get_attr_unit (insn);
19791 int loadcost = 3;
19793 /* Because of the difference between the length of integer and
19794 floating unit pipeline preparation stages, the memory operands
19795 for floating point are cheaper.
19797 ??? For Athlon it the difference is most probably 2. */
19798 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19799 loadcost = 3;
19800 else
19801 loadcost = TARGET_ATHLON ? 2 : 0;
19803 if (cost >= loadcost)
19804 cost -= loadcost;
19805 else
19806 cost = 0;
19809 default:
19810 break;
19813 return cost;
19816 /* How many alternative schedules to try. This should be as wide as the
19817 scheduling freedom in the DFA, but no wider. Making this value too
19818 large results extra work for the scheduler. */
19820 static int
19821 ia32_multipass_dfa_lookahead (void)
19823 switch (ix86_tune)
19825 case PROCESSOR_PENTIUM:
19826 return 2;
19828 case PROCESSOR_PENTIUMPRO:
19829 case PROCESSOR_K6:
19830 return 1;
19832 default:
19833 return 0;
19838 /* Compute the alignment given to a constant that is being placed in memory.
19839 EXP is the constant and ALIGN is the alignment that the object would
19840 ordinarily have.
19841 The value of this function is used instead of that alignment to align
19842 the object. */
19845 ix86_constant_alignment (tree exp, int align)
19847 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19848 || TREE_CODE (exp) == INTEGER_CST)
19850 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19851 return 64;
19852 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19853 return 128;
19855 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19856 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19857 return BITS_PER_WORD;
19859 return align;
19862 /* Compute the alignment for a static variable.
19863 TYPE is the data type, and ALIGN is the alignment that
19864 the object would ordinarily have. The value of this function is used
19865 instead of that alignment to align the object. */
19868 ix86_data_alignment (tree type, int align)
19870 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19872 if (AGGREGATE_TYPE_P (type)
19873 && TYPE_SIZE (type)
19874 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19875 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19876 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19877 && align < max_align)
19878 align = max_align;
19880 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19881 to 16byte boundary. */
19882 if (TARGET_64BIT)
19884 if (AGGREGATE_TYPE_P (type)
19885 && TYPE_SIZE (type)
19886 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19887 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19888 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19889 return 128;
19892 if (TREE_CODE (type) == ARRAY_TYPE)
19894 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19895 return 64;
19896 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19897 return 128;
19899 else if (TREE_CODE (type) == COMPLEX_TYPE)
19902 if (TYPE_MODE (type) == DCmode && align < 64)
19903 return 64;
19904 if ((TYPE_MODE (type) == XCmode
19905 || TYPE_MODE (type) == TCmode) && align < 128)
19906 return 128;
19908 else if ((TREE_CODE (type) == RECORD_TYPE
19909 || TREE_CODE (type) == UNION_TYPE
19910 || TREE_CODE (type) == QUAL_UNION_TYPE)
19911 && TYPE_FIELDS (type))
19913 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19914 return 64;
19915 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19916 return 128;
19918 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19919 || TREE_CODE (type) == INTEGER_TYPE)
19921 if (TYPE_MODE (type) == DFmode && align < 64)
19922 return 64;
19923 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19924 return 128;
19927 return align;
19930 /* Compute the alignment for a local variable or a stack slot. EXP is
19931 the data type or decl itself, MODE is the widest mode available and
19932 ALIGN is the alignment that the object would ordinarily have. The
19933 value of this macro is used instead of that alignment to align the
19934 object. */
19936 unsigned int
19937 ix86_local_alignment (tree exp, enum machine_mode mode,
19938 unsigned int align)
19940 tree type, decl;
19942 if (exp && DECL_P (exp))
19944 type = TREE_TYPE (exp);
19945 decl = exp;
19947 else
19949 type = exp;
19950 decl = NULL;
19953 /* Don't do dynamic stack realignment for long long objects with
19954 -mpreferred-stack-boundary=2. */
19955 if (!TARGET_64BIT
19956 && align == 64
19957 && ix86_preferred_stack_boundary < 64
19958 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19959 && (!type || !TYPE_USER_ALIGN (type))
19960 && (!decl || !DECL_USER_ALIGN (decl)))
19961 align = 32;
19963 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19964 register in MODE. We will return the largest alignment of XF
19965 and DF. */
19966 if (!type)
19968 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19969 align = GET_MODE_ALIGNMENT (DFmode);
19970 return align;
19973 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19974 to 16byte boundary. */
19975 if (TARGET_64BIT)
19977 if (AGGREGATE_TYPE_P (type)
19978 && TYPE_SIZE (type)
19979 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19980 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19981 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19982 return 128;
19984 if (TREE_CODE (type) == ARRAY_TYPE)
19986 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19987 return 64;
19988 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19989 return 128;
19991 else if (TREE_CODE (type) == COMPLEX_TYPE)
19993 if (TYPE_MODE (type) == DCmode && align < 64)
19994 return 64;
19995 if ((TYPE_MODE (type) == XCmode
19996 || TYPE_MODE (type) == TCmode) && align < 128)
19997 return 128;
19999 else if ((TREE_CODE (type) == RECORD_TYPE
20000 || TREE_CODE (type) == UNION_TYPE
20001 || TREE_CODE (type) == QUAL_UNION_TYPE)
20002 && TYPE_FIELDS (type))
20004 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20005 return 64;
20006 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20007 return 128;
20009 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20010 || TREE_CODE (type) == INTEGER_TYPE)
20013 if (TYPE_MODE (type) == DFmode && align < 64)
20014 return 64;
20015 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20016 return 128;
20018 return align;
20021 /* Emit RTL insns to initialize the variable parts of a trampoline.
20022 FNADDR is an RTX for the address of the function's pure code.
20023 CXT is an RTX for the static chain value for the function. */
20024 void
20025 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
20027 if (!TARGET_64BIT)
20029 /* Compute offset from the end of the jmp to the target function. */
20030 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
20031 plus_constant (tramp, 10),
20032 NULL_RTX, 1, OPTAB_DIRECT);
20033 emit_move_insn (gen_rtx_MEM (QImode, tramp),
20034 gen_int_mode (0xb9, QImode));
20035 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
20036 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
20037 gen_int_mode (0xe9, QImode));
20038 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
20040 else
20042 int offset = 0;
20043 /* Try to load address using shorter movl instead of movabs.
20044 We may want to support movq for kernel mode, but kernel does not use
20045 trampolines at the moment. */
20046 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20048 fnaddr = copy_to_mode_reg (DImode, fnaddr);
20049 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20050 gen_int_mode (0xbb41, HImode));
20051 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
20052 gen_lowpart (SImode, fnaddr));
20053 offset += 6;
20055 else
20057 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20058 gen_int_mode (0xbb49, HImode));
20059 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20060 fnaddr);
20061 offset += 10;
20063 /* Load static chain using movabs to r10. */
20064 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20065 gen_int_mode (0xba49, HImode));
20066 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20067 cxt);
20068 offset += 10;
20069 /* Jump to the r11 */
20070 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20071 gen_int_mode (0xff49, HImode));
20072 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
20073 gen_int_mode (0xe3, QImode));
20074 offset += 3;
20075 gcc_assert (offset <= TRAMPOLINE_SIZE);
20078 #ifdef ENABLE_EXECUTE_STACK
20079 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20080 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
20081 #endif
20084 /* Codes for all the SSE/MMX builtins. */
20085 enum ix86_builtins
20087 IX86_BUILTIN_ADDPS,
20088 IX86_BUILTIN_ADDSS,
20089 IX86_BUILTIN_DIVPS,
20090 IX86_BUILTIN_DIVSS,
20091 IX86_BUILTIN_MULPS,
20092 IX86_BUILTIN_MULSS,
20093 IX86_BUILTIN_SUBPS,
20094 IX86_BUILTIN_SUBSS,
20096 IX86_BUILTIN_CMPEQPS,
20097 IX86_BUILTIN_CMPLTPS,
20098 IX86_BUILTIN_CMPLEPS,
20099 IX86_BUILTIN_CMPGTPS,
20100 IX86_BUILTIN_CMPGEPS,
20101 IX86_BUILTIN_CMPNEQPS,
20102 IX86_BUILTIN_CMPNLTPS,
20103 IX86_BUILTIN_CMPNLEPS,
20104 IX86_BUILTIN_CMPNGTPS,
20105 IX86_BUILTIN_CMPNGEPS,
20106 IX86_BUILTIN_CMPORDPS,
20107 IX86_BUILTIN_CMPUNORDPS,
20108 IX86_BUILTIN_CMPEQSS,
20109 IX86_BUILTIN_CMPLTSS,
20110 IX86_BUILTIN_CMPLESS,
20111 IX86_BUILTIN_CMPNEQSS,
20112 IX86_BUILTIN_CMPNLTSS,
20113 IX86_BUILTIN_CMPNLESS,
20114 IX86_BUILTIN_CMPNGTSS,
20115 IX86_BUILTIN_CMPNGESS,
20116 IX86_BUILTIN_CMPORDSS,
20117 IX86_BUILTIN_CMPUNORDSS,
20119 IX86_BUILTIN_COMIEQSS,
20120 IX86_BUILTIN_COMILTSS,
20121 IX86_BUILTIN_COMILESS,
20122 IX86_BUILTIN_COMIGTSS,
20123 IX86_BUILTIN_COMIGESS,
20124 IX86_BUILTIN_COMINEQSS,
20125 IX86_BUILTIN_UCOMIEQSS,
20126 IX86_BUILTIN_UCOMILTSS,
20127 IX86_BUILTIN_UCOMILESS,
20128 IX86_BUILTIN_UCOMIGTSS,
20129 IX86_BUILTIN_UCOMIGESS,
20130 IX86_BUILTIN_UCOMINEQSS,
20132 IX86_BUILTIN_CVTPI2PS,
20133 IX86_BUILTIN_CVTPS2PI,
20134 IX86_BUILTIN_CVTSI2SS,
20135 IX86_BUILTIN_CVTSI642SS,
20136 IX86_BUILTIN_CVTSS2SI,
20137 IX86_BUILTIN_CVTSS2SI64,
20138 IX86_BUILTIN_CVTTPS2PI,
20139 IX86_BUILTIN_CVTTSS2SI,
20140 IX86_BUILTIN_CVTTSS2SI64,
20142 IX86_BUILTIN_MAXPS,
20143 IX86_BUILTIN_MAXSS,
20144 IX86_BUILTIN_MINPS,
20145 IX86_BUILTIN_MINSS,
20147 IX86_BUILTIN_LOADUPS,
20148 IX86_BUILTIN_STOREUPS,
20149 IX86_BUILTIN_MOVSS,
20151 IX86_BUILTIN_MOVHLPS,
20152 IX86_BUILTIN_MOVLHPS,
20153 IX86_BUILTIN_LOADHPS,
20154 IX86_BUILTIN_LOADLPS,
20155 IX86_BUILTIN_STOREHPS,
20156 IX86_BUILTIN_STORELPS,
20158 IX86_BUILTIN_MASKMOVQ,
20159 IX86_BUILTIN_MOVMSKPS,
20160 IX86_BUILTIN_PMOVMSKB,
20162 IX86_BUILTIN_MOVNTPS,
20163 IX86_BUILTIN_MOVNTQ,
20165 IX86_BUILTIN_LOADDQU,
20166 IX86_BUILTIN_STOREDQU,
20168 IX86_BUILTIN_PACKSSWB,
20169 IX86_BUILTIN_PACKSSDW,
20170 IX86_BUILTIN_PACKUSWB,
20172 IX86_BUILTIN_PADDB,
20173 IX86_BUILTIN_PADDW,
20174 IX86_BUILTIN_PADDD,
20175 IX86_BUILTIN_PADDQ,
20176 IX86_BUILTIN_PADDSB,
20177 IX86_BUILTIN_PADDSW,
20178 IX86_BUILTIN_PADDUSB,
20179 IX86_BUILTIN_PADDUSW,
20180 IX86_BUILTIN_PSUBB,
20181 IX86_BUILTIN_PSUBW,
20182 IX86_BUILTIN_PSUBD,
20183 IX86_BUILTIN_PSUBQ,
20184 IX86_BUILTIN_PSUBSB,
20185 IX86_BUILTIN_PSUBSW,
20186 IX86_BUILTIN_PSUBUSB,
20187 IX86_BUILTIN_PSUBUSW,
20189 IX86_BUILTIN_PAND,
20190 IX86_BUILTIN_PANDN,
20191 IX86_BUILTIN_POR,
20192 IX86_BUILTIN_PXOR,
20194 IX86_BUILTIN_PAVGB,
20195 IX86_BUILTIN_PAVGW,
20197 IX86_BUILTIN_PCMPEQB,
20198 IX86_BUILTIN_PCMPEQW,
20199 IX86_BUILTIN_PCMPEQD,
20200 IX86_BUILTIN_PCMPGTB,
20201 IX86_BUILTIN_PCMPGTW,
20202 IX86_BUILTIN_PCMPGTD,
20204 IX86_BUILTIN_PMADDWD,
20206 IX86_BUILTIN_PMAXSW,
20207 IX86_BUILTIN_PMAXUB,
20208 IX86_BUILTIN_PMINSW,
20209 IX86_BUILTIN_PMINUB,
20211 IX86_BUILTIN_PMULHUW,
20212 IX86_BUILTIN_PMULHW,
20213 IX86_BUILTIN_PMULLW,
20215 IX86_BUILTIN_PSADBW,
20216 IX86_BUILTIN_PSHUFW,
20218 IX86_BUILTIN_PSLLW,
20219 IX86_BUILTIN_PSLLD,
20220 IX86_BUILTIN_PSLLQ,
20221 IX86_BUILTIN_PSRAW,
20222 IX86_BUILTIN_PSRAD,
20223 IX86_BUILTIN_PSRLW,
20224 IX86_BUILTIN_PSRLD,
20225 IX86_BUILTIN_PSRLQ,
20226 IX86_BUILTIN_PSLLWI,
20227 IX86_BUILTIN_PSLLDI,
20228 IX86_BUILTIN_PSLLQI,
20229 IX86_BUILTIN_PSRAWI,
20230 IX86_BUILTIN_PSRADI,
20231 IX86_BUILTIN_PSRLWI,
20232 IX86_BUILTIN_PSRLDI,
20233 IX86_BUILTIN_PSRLQI,
20235 IX86_BUILTIN_PUNPCKHBW,
20236 IX86_BUILTIN_PUNPCKHWD,
20237 IX86_BUILTIN_PUNPCKHDQ,
20238 IX86_BUILTIN_PUNPCKLBW,
20239 IX86_BUILTIN_PUNPCKLWD,
20240 IX86_BUILTIN_PUNPCKLDQ,
20242 IX86_BUILTIN_SHUFPS,
20244 IX86_BUILTIN_RCPPS,
20245 IX86_BUILTIN_RCPSS,
20246 IX86_BUILTIN_RSQRTPS,
20247 IX86_BUILTIN_RSQRTPS_NR,
20248 IX86_BUILTIN_RSQRTSS,
20249 IX86_BUILTIN_RSQRTF,
20250 IX86_BUILTIN_SQRTPS,
20251 IX86_BUILTIN_SQRTPS_NR,
20252 IX86_BUILTIN_SQRTSS,
20254 IX86_BUILTIN_UNPCKHPS,
20255 IX86_BUILTIN_UNPCKLPS,
20257 IX86_BUILTIN_ANDPS,
20258 IX86_BUILTIN_ANDNPS,
20259 IX86_BUILTIN_ORPS,
20260 IX86_BUILTIN_XORPS,
20262 IX86_BUILTIN_EMMS,
20263 IX86_BUILTIN_LDMXCSR,
20264 IX86_BUILTIN_STMXCSR,
20265 IX86_BUILTIN_SFENCE,
20267 /* 3DNow! Original */
20268 IX86_BUILTIN_FEMMS,
20269 IX86_BUILTIN_PAVGUSB,
20270 IX86_BUILTIN_PF2ID,
20271 IX86_BUILTIN_PFACC,
20272 IX86_BUILTIN_PFADD,
20273 IX86_BUILTIN_PFCMPEQ,
20274 IX86_BUILTIN_PFCMPGE,
20275 IX86_BUILTIN_PFCMPGT,
20276 IX86_BUILTIN_PFMAX,
20277 IX86_BUILTIN_PFMIN,
20278 IX86_BUILTIN_PFMUL,
20279 IX86_BUILTIN_PFRCP,
20280 IX86_BUILTIN_PFRCPIT1,
20281 IX86_BUILTIN_PFRCPIT2,
20282 IX86_BUILTIN_PFRSQIT1,
20283 IX86_BUILTIN_PFRSQRT,
20284 IX86_BUILTIN_PFSUB,
20285 IX86_BUILTIN_PFSUBR,
20286 IX86_BUILTIN_PI2FD,
20287 IX86_BUILTIN_PMULHRW,
20289 /* 3DNow! Athlon Extensions */
20290 IX86_BUILTIN_PF2IW,
20291 IX86_BUILTIN_PFNACC,
20292 IX86_BUILTIN_PFPNACC,
20293 IX86_BUILTIN_PI2FW,
20294 IX86_BUILTIN_PSWAPDSI,
20295 IX86_BUILTIN_PSWAPDSF,
20297 /* SSE2 */
20298 IX86_BUILTIN_ADDPD,
20299 IX86_BUILTIN_ADDSD,
20300 IX86_BUILTIN_DIVPD,
20301 IX86_BUILTIN_DIVSD,
20302 IX86_BUILTIN_MULPD,
20303 IX86_BUILTIN_MULSD,
20304 IX86_BUILTIN_SUBPD,
20305 IX86_BUILTIN_SUBSD,
20307 IX86_BUILTIN_CMPEQPD,
20308 IX86_BUILTIN_CMPLTPD,
20309 IX86_BUILTIN_CMPLEPD,
20310 IX86_BUILTIN_CMPGTPD,
20311 IX86_BUILTIN_CMPGEPD,
20312 IX86_BUILTIN_CMPNEQPD,
20313 IX86_BUILTIN_CMPNLTPD,
20314 IX86_BUILTIN_CMPNLEPD,
20315 IX86_BUILTIN_CMPNGTPD,
20316 IX86_BUILTIN_CMPNGEPD,
20317 IX86_BUILTIN_CMPORDPD,
20318 IX86_BUILTIN_CMPUNORDPD,
20319 IX86_BUILTIN_CMPEQSD,
20320 IX86_BUILTIN_CMPLTSD,
20321 IX86_BUILTIN_CMPLESD,
20322 IX86_BUILTIN_CMPNEQSD,
20323 IX86_BUILTIN_CMPNLTSD,
20324 IX86_BUILTIN_CMPNLESD,
20325 IX86_BUILTIN_CMPORDSD,
20326 IX86_BUILTIN_CMPUNORDSD,
20328 IX86_BUILTIN_COMIEQSD,
20329 IX86_BUILTIN_COMILTSD,
20330 IX86_BUILTIN_COMILESD,
20331 IX86_BUILTIN_COMIGTSD,
20332 IX86_BUILTIN_COMIGESD,
20333 IX86_BUILTIN_COMINEQSD,
20334 IX86_BUILTIN_UCOMIEQSD,
20335 IX86_BUILTIN_UCOMILTSD,
20336 IX86_BUILTIN_UCOMILESD,
20337 IX86_BUILTIN_UCOMIGTSD,
20338 IX86_BUILTIN_UCOMIGESD,
20339 IX86_BUILTIN_UCOMINEQSD,
20341 IX86_BUILTIN_MAXPD,
20342 IX86_BUILTIN_MAXSD,
20343 IX86_BUILTIN_MINPD,
20344 IX86_BUILTIN_MINSD,
20346 IX86_BUILTIN_ANDPD,
20347 IX86_BUILTIN_ANDNPD,
20348 IX86_BUILTIN_ORPD,
20349 IX86_BUILTIN_XORPD,
20351 IX86_BUILTIN_SQRTPD,
20352 IX86_BUILTIN_SQRTSD,
20354 IX86_BUILTIN_UNPCKHPD,
20355 IX86_BUILTIN_UNPCKLPD,
20357 IX86_BUILTIN_SHUFPD,
20359 IX86_BUILTIN_LOADUPD,
20360 IX86_BUILTIN_STOREUPD,
20361 IX86_BUILTIN_MOVSD,
20363 IX86_BUILTIN_LOADHPD,
20364 IX86_BUILTIN_LOADLPD,
20366 IX86_BUILTIN_CVTDQ2PD,
20367 IX86_BUILTIN_CVTDQ2PS,
20369 IX86_BUILTIN_CVTPD2DQ,
20370 IX86_BUILTIN_CVTPD2PI,
20371 IX86_BUILTIN_CVTPD2PS,
20372 IX86_BUILTIN_CVTTPD2DQ,
20373 IX86_BUILTIN_CVTTPD2PI,
20375 IX86_BUILTIN_CVTPI2PD,
20376 IX86_BUILTIN_CVTSI2SD,
20377 IX86_BUILTIN_CVTSI642SD,
20379 IX86_BUILTIN_CVTSD2SI,
20380 IX86_BUILTIN_CVTSD2SI64,
20381 IX86_BUILTIN_CVTSD2SS,
20382 IX86_BUILTIN_CVTSS2SD,
20383 IX86_BUILTIN_CVTTSD2SI,
20384 IX86_BUILTIN_CVTTSD2SI64,
20386 IX86_BUILTIN_CVTPS2DQ,
20387 IX86_BUILTIN_CVTPS2PD,
20388 IX86_BUILTIN_CVTTPS2DQ,
20390 IX86_BUILTIN_MOVNTI,
20391 IX86_BUILTIN_MOVNTPD,
20392 IX86_BUILTIN_MOVNTDQ,
20394 IX86_BUILTIN_MOVQ128,
20396 /* SSE2 MMX */
20397 IX86_BUILTIN_MASKMOVDQU,
20398 IX86_BUILTIN_MOVMSKPD,
20399 IX86_BUILTIN_PMOVMSKB128,
20401 IX86_BUILTIN_PACKSSWB128,
20402 IX86_BUILTIN_PACKSSDW128,
20403 IX86_BUILTIN_PACKUSWB128,
20405 IX86_BUILTIN_PADDB128,
20406 IX86_BUILTIN_PADDW128,
20407 IX86_BUILTIN_PADDD128,
20408 IX86_BUILTIN_PADDQ128,
20409 IX86_BUILTIN_PADDSB128,
20410 IX86_BUILTIN_PADDSW128,
20411 IX86_BUILTIN_PADDUSB128,
20412 IX86_BUILTIN_PADDUSW128,
20413 IX86_BUILTIN_PSUBB128,
20414 IX86_BUILTIN_PSUBW128,
20415 IX86_BUILTIN_PSUBD128,
20416 IX86_BUILTIN_PSUBQ128,
20417 IX86_BUILTIN_PSUBSB128,
20418 IX86_BUILTIN_PSUBSW128,
20419 IX86_BUILTIN_PSUBUSB128,
20420 IX86_BUILTIN_PSUBUSW128,
20422 IX86_BUILTIN_PAND128,
20423 IX86_BUILTIN_PANDN128,
20424 IX86_BUILTIN_POR128,
20425 IX86_BUILTIN_PXOR128,
20427 IX86_BUILTIN_PAVGB128,
20428 IX86_BUILTIN_PAVGW128,
20430 IX86_BUILTIN_PCMPEQB128,
20431 IX86_BUILTIN_PCMPEQW128,
20432 IX86_BUILTIN_PCMPEQD128,
20433 IX86_BUILTIN_PCMPGTB128,
20434 IX86_BUILTIN_PCMPGTW128,
20435 IX86_BUILTIN_PCMPGTD128,
20437 IX86_BUILTIN_PMADDWD128,
20439 IX86_BUILTIN_PMAXSW128,
20440 IX86_BUILTIN_PMAXUB128,
20441 IX86_BUILTIN_PMINSW128,
20442 IX86_BUILTIN_PMINUB128,
20444 IX86_BUILTIN_PMULUDQ,
20445 IX86_BUILTIN_PMULUDQ128,
20446 IX86_BUILTIN_PMULHUW128,
20447 IX86_BUILTIN_PMULHW128,
20448 IX86_BUILTIN_PMULLW128,
20450 IX86_BUILTIN_PSADBW128,
20451 IX86_BUILTIN_PSHUFHW,
20452 IX86_BUILTIN_PSHUFLW,
20453 IX86_BUILTIN_PSHUFD,
20455 IX86_BUILTIN_PSLLDQI128,
20456 IX86_BUILTIN_PSLLWI128,
20457 IX86_BUILTIN_PSLLDI128,
20458 IX86_BUILTIN_PSLLQI128,
20459 IX86_BUILTIN_PSRAWI128,
20460 IX86_BUILTIN_PSRADI128,
20461 IX86_BUILTIN_PSRLDQI128,
20462 IX86_BUILTIN_PSRLWI128,
20463 IX86_BUILTIN_PSRLDI128,
20464 IX86_BUILTIN_PSRLQI128,
20466 IX86_BUILTIN_PSLLDQ128,
20467 IX86_BUILTIN_PSLLW128,
20468 IX86_BUILTIN_PSLLD128,
20469 IX86_BUILTIN_PSLLQ128,
20470 IX86_BUILTIN_PSRAW128,
20471 IX86_BUILTIN_PSRAD128,
20472 IX86_BUILTIN_PSRLW128,
20473 IX86_BUILTIN_PSRLD128,
20474 IX86_BUILTIN_PSRLQ128,
20476 IX86_BUILTIN_PUNPCKHBW128,
20477 IX86_BUILTIN_PUNPCKHWD128,
20478 IX86_BUILTIN_PUNPCKHDQ128,
20479 IX86_BUILTIN_PUNPCKHQDQ128,
20480 IX86_BUILTIN_PUNPCKLBW128,
20481 IX86_BUILTIN_PUNPCKLWD128,
20482 IX86_BUILTIN_PUNPCKLDQ128,
20483 IX86_BUILTIN_PUNPCKLQDQ128,
20485 IX86_BUILTIN_CLFLUSH,
20486 IX86_BUILTIN_MFENCE,
20487 IX86_BUILTIN_LFENCE,
20489 IX86_BUILTIN_BSRSI,
20490 IX86_BUILTIN_BSRDI,
20491 IX86_BUILTIN_RDPMC,
20492 IX86_BUILTIN_RDTSC,
20493 IX86_BUILTIN_RDTSCP,
20494 IX86_BUILTIN_ROLQI,
20495 IX86_BUILTIN_ROLHI,
20496 IX86_BUILTIN_RORQI,
20497 IX86_BUILTIN_RORHI,
20499 /* SSE3. */
20500 IX86_BUILTIN_ADDSUBPS,
20501 IX86_BUILTIN_HADDPS,
20502 IX86_BUILTIN_HSUBPS,
20503 IX86_BUILTIN_MOVSHDUP,
20504 IX86_BUILTIN_MOVSLDUP,
20505 IX86_BUILTIN_ADDSUBPD,
20506 IX86_BUILTIN_HADDPD,
20507 IX86_BUILTIN_HSUBPD,
20508 IX86_BUILTIN_LDDQU,
20510 IX86_BUILTIN_MONITOR,
20511 IX86_BUILTIN_MWAIT,
20513 /* SSSE3. */
20514 IX86_BUILTIN_PHADDW,
20515 IX86_BUILTIN_PHADDD,
20516 IX86_BUILTIN_PHADDSW,
20517 IX86_BUILTIN_PHSUBW,
20518 IX86_BUILTIN_PHSUBD,
20519 IX86_BUILTIN_PHSUBSW,
20520 IX86_BUILTIN_PMADDUBSW,
20521 IX86_BUILTIN_PMULHRSW,
20522 IX86_BUILTIN_PSHUFB,
20523 IX86_BUILTIN_PSIGNB,
20524 IX86_BUILTIN_PSIGNW,
20525 IX86_BUILTIN_PSIGND,
20526 IX86_BUILTIN_PALIGNR,
20527 IX86_BUILTIN_PABSB,
20528 IX86_BUILTIN_PABSW,
20529 IX86_BUILTIN_PABSD,
20531 IX86_BUILTIN_PHADDW128,
20532 IX86_BUILTIN_PHADDD128,
20533 IX86_BUILTIN_PHADDSW128,
20534 IX86_BUILTIN_PHSUBW128,
20535 IX86_BUILTIN_PHSUBD128,
20536 IX86_BUILTIN_PHSUBSW128,
20537 IX86_BUILTIN_PMADDUBSW128,
20538 IX86_BUILTIN_PMULHRSW128,
20539 IX86_BUILTIN_PSHUFB128,
20540 IX86_BUILTIN_PSIGNB128,
20541 IX86_BUILTIN_PSIGNW128,
20542 IX86_BUILTIN_PSIGND128,
20543 IX86_BUILTIN_PALIGNR128,
20544 IX86_BUILTIN_PABSB128,
20545 IX86_BUILTIN_PABSW128,
20546 IX86_BUILTIN_PABSD128,
20548 /* AMDFAM10 - SSE4A New Instructions. */
20549 IX86_BUILTIN_MOVNTSD,
20550 IX86_BUILTIN_MOVNTSS,
20551 IX86_BUILTIN_EXTRQI,
20552 IX86_BUILTIN_EXTRQ,
20553 IX86_BUILTIN_INSERTQI,
20554 IX86_BUILTIN_INSERTQ,
20556 /* SSE4.1. */
20557 IX86_BUILTIN_BLENDPD,
20558 IX86_BUILTIN_BLENDPS,
20559 IX86_BUILTIN_BLENDVPD,
20560 IX86_BUILTIN_BLENDVPS,
20561 IX86_BUILTIN_PBLENDVB128,
20562 IX86_BUILTIN_PBLENDW128,
20564 IX86_BUILTIN_DPPD,
20565 IX86_BUILTIN_DPPS,
20567 IX86_BUILTIN_INSERTPS128,
20569 IX86_BUILTIN_MOVNTDQA,
20570 IX86_BUILTIN_MPSADBW128,
20571 IX86_BUILTIN_PACKUSDW128,
20572 IX86_BUILTIN_PCMPEQQ,
20573 IX86_BUILTIN_PHMINPOSUW128,
20575 IX86_BUILTIN_PMAXSB128,
20576 IX86_BUILTIN_PMAXSD128,
20577 IX86_BUILTIN_PMAXUD128,
20578 IX86_BUILTIN_PMAXUW128,
20580 IX86_BUILTIN_PMINSB128,
20581 IX86_BUILTIN_PMINSD128,
20582 IX86_BUILTIN_PMINUD128,
20583 IX86_BUILTIN_PMINUW128,
20585 IX86_BUILTIN_PMOVSXBW128,
20586 IX86_BUILTIN_PMOVSXBD128,
20587 IX86_BUILTIN_PMOVSXBQ128,
20588 IX86_BUILTIN_PMOVSXWD128,
20589 IX86_BUILTIN_PMOVSXWQ128,
20590 IX86_BUILTIN_PMOVSXDQ128,
20592 IX86_BUILTIN_PMOVZXBW128,
20593 IX86_BUILTIN_PMOVZXBD128,
20594 IX86_BUILTIN_PMOVZXBQ128,
20595 IX86_BUILTIN_PMOVZXWD128,
20596 IX86_BUILTIN_PMOVZXWQ128,
20597 IX86_BUILTIN_PMOVZXDQ128,
20599 IX86_BUILTIN_PMULDQ128,
20600 IX86_BUILTIN_PMULLD128,
20602 IX86_BUILTIN_ROUNDPD,
20603 IX86_BUILTIN_ROUNDPS,
20604 IX86_BUILTIN_ROUNDSD,
20605 IX86_BUILTIN_ROUNDSS,
20607 IX86_BUILTIN_PTESTZ,
20608 IX86_BUILTIN_PTESTC,
20609 IX86_BUILTIN_PTESTNZC,
20611 IX86_BUILTIN_VEC_INIT_V2SI,
20612 IX86_BUILTIN_VEC_INIT_V4HI,
20613 IX86_BUILTIN_VEC_INIT_V8QI,
20614 IX86_BUILTIN_VEC_EXT_V2DF,
20615 IX86_BUILTIN_VEC_EXT_V2DI,
20616 IX86_BUILTIN_VEC_EXT_V4SF,
20617 IX86_BUILTIN_VEC_EXT_V4SI,
20618 IX86_BUILTIN_VEC_EXT_V8HI,
20619 IX86_BUILTIN_VEC_EXT_V2SI,
20620 IX86_BUILTIN_VEC_EXT_V4HI,
20621 IX86_BUILTIN_VEC_EXT_V16QI,
20622 IX86_BUILTIN_VEC_SET_V2DI,
20623 IX86_BUILTIN_VEC_SET_V4SF,
20624 IX86_BUILTIN_VEC_SET_V4SI,
20625 IX86_BUILTIN_VEC_SET_V8HI,
20626 IX86_BUILTIN_VEC_SET_V4HI,
20627 IX86_BUILTIN_VEC_SET_V16QI,
20629 IX86_BUILTIN_VEC_PACK_SFIX,
20631 /* SSE4.2. */
20632 IX86_BUILTIN_CRC32QI,
20633 IX86_BUILTIN_CRC32HI,
20634 IX86_BUILTIN_CRC32SI,
20635 IX86_BUILTIN_CRC32DI,
20637 IX86_BUILTIN_PCMPESTRI128,
20638 IX86_BUILTIN_PCMPESTRM128,
20639 IX86_BUILTIN_PCMPESTRA128,
20640 IX86_BUILTIN_PCMPESTRC128,
20641 IX86_BUILTIN_PCMPESTRO128,
20642 IX86_BUILTIN_PCMPESTRS128,
20643 IX86_BUILTIN_PCMPESTRZ128,
20644 IX86_BUILTIN_PCMPISTRI128,
20645 IX86_BUILTIN_PCMPISTRM128,
20646 IX86_BUILTIN_PCMPISTRA128,
20647 IX86_BUILTIN_PCMPISTRC128,
20648 IX86_BUILTIN_PCMPISTRO128,
20649 IX86_BUILTIN_PCMPISTRS128,
20650 IX86_BUILTIN_PCMPISTRZ128,
20652 IX86_BUILTIN_PCMPGTQ,
20654 /* AES instructions */
20655 IX86_BUILTIN_AESENC128,
20656 IX86_BUILTIN_AESENCLAST128,
20657 IX86_BUILTIN_AESDEC128,
20658 IX86_BUILTIN_AESDECLAST128,
20659 IX86_BUILTIN_AESIMC128,
20660 IX86_BUILTIN_AESKEYGENASSIST128,
20662 /* PCLMUL instruction */
20663 IX86_BUILTIN_PCLMULQDQ128,
20665 /* AVX */
20666 IX86_BUILTIN_ADDPD256,
20667 IX86_BUILTIN_ADDPS256,
20668 IX86_BUILTIN_ADDSUBPD256,
20669 IX86_BUILTIN_ADDSUBPS256,
20670 IX86_BUILTIN_ANDPD256,
20671 IX86_BUILTIN_ANDPS256,
20672 IX86_BUILTIN_ANDNPD256,
20673 IX86_BUILTIN_ANDNPS256,
20674 IX86_BUILTIN_BLENDPD256,
20675 IX86_BUILTIN_BLENDPS256,
20676 IX86_BUILTIN_BLENDVPD256,
20677 IX86_BUILTIN_BLENDVPS256,
20678 IX86_BUILTIN_DIVPD256,
20679 IX86_BUILTIN_DIVPS256,
20680 IX86_BUILTIN_DPPS256,
20681 IX86_BUILTIN_HADDPD256,
20682 IX86_BUILTIN_HADDPS256,
20683 IX86_BUILTIN_HSUBPD256,
20684 IX86_BUILTIN_HSUBPS256,
20685 IX86_BUILTIN_MAXPD256,
20686 IX86_BUILTIN_MAXPS256,
20687 IX86_BUILTIN_MINPD256,
20688 IX86_BUILTIN_MINPS256,
20689 IX86_BUILTIN_MULPD256,
20690 IX86_BUILTIN_MULPS256,
20691 IX86_BUILTIN_ORPD256,
20692 IX86_BUILTIN_ORPS256,
20693 IX86_BUILTIN_SHUFPD256,
20694 IX86_BUILTIN_SHUFPS256,
20695 IX86_BUILTIN_SUBPD256,
20696 IX86_BUILTIN_SUBPS256,
20697 IX86_BUILTIN_XORPD256,
20698 IX86_BUILTIN_XORPS256,
20699 IX86_BUILTIN_CMPSD,
20700 IX86_BUILTIN_CMPSS,
20701 IX86_BUILTIN_CMPPD,
20702 IX86_BUILTIN_CMPPS,
20703 IX86_BUILTIN_CMPPD256,
20704 IX86_BUILTIN_CMPPS256,
20705 IX86_BUILTIN_CVTDQ2PD256,
20706 IX86_BUILTIN_CVTDQ2PS256,
20707 IX86_BUILTIN_CVTPD2PS256,
20708 IX86_BUILTIN_CVTPS2DQ256,
20709 IX86_BUILTIN_CVTPS2PD256,
20710 IX86_BUILTIN_CVTTPD2DQ256,
20711 IX86_BUILTIN_CVTPD2DQ256,
20712 IX86_BUILTIN_CVTTPS2DQ256,
20713 IX86_BUILTIN_EXTRACTF128PD256,
20714 IX86_BUILTIN_EXTRACTF128PS256,
20715 IX86_BUILTIN_EXTRACTF128SI256,
20716 IX86_BUILTIN_VZEROALL,
20717 IX86_BUILTIN_VZEROUPPER,
20718 IX86_BUILTIN_VZEROUPPER_REX64,
20719 IX86_BUILTIN_VPERMILVARPD,
20720 IX86_BUILTIN_VPERMILVARPS,
20721 IX86_BUILTIN_VPERMILVARPD256,
20722 IX86_BUILTIN_VPERMILVARPS256,
20723 IX86_BUILTIN_VPERMILPD,
20724 IX86_BUILTIN_VPERMILPS,
20725 IX86_BUILTIN_VPERMILPD256,
20726 IX86_BUILTIN_VPERMILPS256,
20727 IX86_BUILTIN_VPERM2F128PD256,
20728 IX86_BUILTIN_VPERM2F128PS256,
20729 IX86_BUILTIN_VPERM2F128SI256,
20730 IX86_BUILTIN_VBROADCASTSS,
20731 IX86_BUILTIN_VBROADCASTSD256,
20732 IX86_BUILTIN_VBROADCASTSS256,
20733 IX86_BUILTIN_VBROADCASTPD256,
20734 IX86_BUILTIN_VBROADCASTPS256,
20735 IX86_BUILTIN_VINSERTF128PD256,
20736 IX86_BUILTIN_VINSERTF128PS256,
20737 IX86_BUILTIN_VINSERTF128SI256,
20738 IX86_BUILTIN_LOADUPD256,
20739 IX86_BUILTIN_LOADUPS256,
20740 IX86_BUILTIN_STOREUPD256,
20741 IX86_BUILTIN_STOREUPS256,
20742 IX86_BUILTIN_LDDQU256,
20743 IX86_BUILTIN_MOVNTDQ256,
20744 IX86_BUILTIN_MOVNTPD256,
20745 IX86_BUILTIN_MOVNTPS256,
20746 IX86_BUILTIN_LOADDQU256,
20747 IX86_BUILTIN_STOREDQU256,
20748 IX86_BUILTIN_MASKLOADPD,
20749 IX86_BUILTIN_MASKLOADPS,
20750 IX86_BUILTIN_MASKSTOREPD,
20751 IX86_BUILTIN_MASKSTOREPS,
20752 IX86_BUILTIN_MASKLOADPD256,
20753 IX86_BUILTIN_MASKLOADPS256,
20754 IX86_BUILTIN_MASKSTOREPD256,
20755 IX86_BUILTIN_MASKSTOREPS256,
20756 IX86_BUILTIN_MOVSHDUP256,
20757 IX86_BUILTIN_MOVSLDUP256,
20758 IX86_BUILTIN_MOVDDUP256,
20760 IX86_BUILTIN_SQRTPD256,
20761 IX86_BUILTIN_SQRTPS256,
20762 IX86_BUILTIN_SQRTPS_NR256,
20763 IX86_BUILTIN_RSQRTPS256,
20764 IX86_BUILTIN_RSQRTPS_NR256,
20766 IX86_BUILTIN_RCPPS256,
20768 IX86_BUILTIN_ROUNDPD256,
20769 IX86_BUILTIN_ROUNDPS256,
20771 IX86_BUILTIN_UNPCKHPD256,
20772 IX86_BUILTIN_UNPCKLPD256,
20773 IX86_BUILTIN_UNPCKHPS256,
20774 IX86_BUILTIN_UNPCKLPS256,
20776 IX86_BUILTIN_SI256_SI,
20777 IX86_BUILTIN_PS256_PS,
20778 IX86_BUILTIN_PD256_PD,
20779 IX86_BUILTIN_SI_SI256,
20780 IX86_BUILTIN_PS_PS256,
20781 IX86_BUILTIN_PD_PD256,
20783 IX86_BUILTIN_VTESTZPD,
20784 IX86_BUILTIN_VTESTCPD,
20785 IX86_BUILTIN_VTESTNZCPD,
20786 IX86_BUILTIN_VTESTZPS,
20787 IX86_BUILTIN_VTESTCPS,
20788 IX86_BUILTIN_VTESTNZCPS,
20789 IX86_BUILTIN_VTESTZPD256,
20790 IX86_BUILTIN_VTESTCPD256,
20791 IX86_BUILTIN_VTESTNZCPD256,
20792 IX86_BUILTIN_VTESTZPS256,
20793 IX86_BUILTIN_VTESTCPS256,
20794 IX86_BUILTIN_VTESTNZCPS256,
20795 IX86_BUILTIN_PTESTZ256,
20796 IX86_BUILTIN_PTESTC256,
20797 IX86_BUILTIN_PTESTNZC256,
20799 IX86_BUILTIN_MOVMSKPD256,
20800 IX86_BUILTIN_MOVMSKPS256,
20802 /* TFmode support builtins. */
20803 IX86_BUILTIN_INFQ,
20804 IX86_BUILTIN_HUGE_VALQ,
20805 IX86_BUILTIN_FABSQ,
20806 IX86_BUILTIN_COPYSIGNQ,
20808 /* SSE5 instructions */
20809 IX86_BUILTIN_FMADDSS,
20810 IX86_BUILTIN_FMADDSD,
20811 IX86_BUILTIN_FMADDPS,
20812 IX86_BUILTIN_FMADDPD,
20813 IX86_BUILTIN_FMSUBSS,
20814 IX86_BUILTIN_FMSUBSD,
20815 IX86_BUILTIN_FMSUBPS,
20816 IX86_BUILTIN_FMSUBPD,
20817 IX86_BUILTIN_FNMADDSS,
20818 IX86_BUILTIN_FNMADDSD,
20819 IX86_BUILTIN_FNMADDPS,
20820 IX86_BUILTIN_FNMADDPD,
20821 IX86_BUILTIN_FNMSUBSS,
20822 IX86_BUILTIN_FNMSUBSD,
20823 IX86_BUILTIN_FNMSUBPS,
20824 IX86_BUILTIN_FNMSUBPD,
20825 IX86_BUILTIN_PCMOV,
20826 IX86_BUILTIN_PCMOV_V2DI,
20827 IX86_BUILTIN_PCMOV_V4SI,
20828 IX86_BUILTIN_PCMOV_V8HI,
20829 IX86_BUILTIN_PCMOV_V16QI,
20830 IX86_BUILTIN_PCMOV_V4SF,
20831 IX86_BUILTIN_PCMOV_V2DF,
20832 IX86_BUILTIN_PPERM,
20833 IX86_BUILTIN_PERMPS,
20834 IX86_BUILTIN_PERMPD,
20835 IX86_BUILTIN_PMACSSWW,
20836 IX86_BUILTIN_PMACSWW,
20837 IX86_BUILTIN_PMACSSWD,
20838 IX86_BUILTIN_PMACSWD,
20839 IX86_BUILTIN_PMACSSDD,
20840 IX86_BUILTIN_PMACSDD,
20841 IX86_BUILTIN_PMACSSDQL,
20842 IX86_BUILTIN_PMACSSDQH,
20843 IX86_BUILTIN_PMACSDQL,
20844 IX86_BUILTIN_PMACSDQH,
20845 IX86_BUILTIN_PMADCSSWD,
20846 IX86_BUILTIN_PMADCSWD,
20847 IX86_BUILTIN_PHADDBW,
20848 IX86_BUILTIN_PHADDBD,
20849 IX86_BUILTIN_PHADDBQ,
20850 IX86_BUILTIN_PHADDWD,
20851 IX86_BUILTIN_PHADDWQ,
20852 IX86_BUILTIN_PHADDDQ,
20853 IX86_BUILTIN_PHADDUBW,
20854 IX86_BUILTIN_PHADDUBD,
20855 IX86_BUILTIN_PHADDUBQ,
20856 IX86_BUILTIN_PHADDUWD,
20857 IX86_BUILTIN_PHADDUWQ,
20858 IX86_BUILTIN_PHADDUDQ,
20859 IX86_BUILTIN_PHSUBBW,
20860 IX86_BUILTIN_PHSUBWD,
20861 IX86_BUILTIN_PHSUBDQ,
20862 IX86_BUILTIN_PROTB,
20863 IX86_BUILTIN_PROTW,
20864 IX86_BUILTIN_PROTD,
20865 IX86_BUILTIN_PROTQ,
20866 IX86_BUILTIN_PROTB_IMM,
20867 IX86_BUILTIN_PROTW_IMM,
20868 IX86_BUILTIN_PROTD_IMM,
20869 IX86_BUILTIN_PROTQ_IMM,
20870 IX86_BUILTIN_PSHLB,
20871 IX86_BUILTIN_PSHLW,
20872 IX86_BUILTIN_PSHLD,
20873 IX86_BUILTIN_PSHLQ,
20874 IX86_BUILTIN_PSHAB,
20875 IX86_BUILTIN_PSHAW,
20876 IX86_BUILTIN_PSHAD,
20877 IX86_BUILTIN_PSHAQ,
20878 IX86_BUILTIN_FRCZSS,
20879 IX86_BUILTIN_FRCZSD,
20880 IX86_BUILTIN_FRCZPS,
20881 IX86_BUILTIN_FRCZPD,
20882 IX86_BUILTIN_CVTPH2PS,
20883 IX86_BUILTIN_CVTPS2PH,
20885 IX86_BUILTIN_COMEQSS,
20886 IX86_BUILTIN_COMNESS,
20887 IX86_BUILTIN_COMLTSS,
20888 IX86_BUILTIN_COMLESS,
20889 IX86_BUILTIN_COMGTSS,
20890 IX86_BUILTIN_COMGESS,
20891 IX86_BUILTIN_COMUEQSS,
20892 IX86_BUILTIN_COMUNESS,
20893 IX86_BUILTIN_COMULTSS,
20894 IX86_BUILTIN_COMULESS,
20895 IX86_BUILTIN_COMUGTSS,
20896 IX86_BUILTIN_COMUGESS,
20897 IX86_BUILTIN_COMORDSS,
20898 IX86_BUILTIN_COMUNORDSS,
20899 IX86_BUILTIN_COMFALSESS,
20900 IX86_BUILTIN_COMTRUESS,
20902 IX86_BUILTIN_COMEQSD,
20903 IX86_BUILTIN_COMNESD,
20904 IX86_BUILTIN_COMLTSD,
20905 IX86_BUILTIN_COMLESD,
20906 IX86_BUILTIN_COMGTSD,
20907 IX86_BUILTIN_COMGESD,
20908 IX86_BUILTIN_COMUEQSD,
20909 IX86_BUILTIN_COMUNESD,
20910 IX86_BUILTIN_COMULTSD,
20911 IX86_BUILTIN_COMULESD,
20912 IX86_BUILTIN_COMUGTSD,
20913 IX86_BUILTIN_COMUGESD,
20914 IX86_BUILTIN_COMORDSD,
20915 IX86_BUILTIN_COMUNORDSD,
20916 IX86_BUILTIN_COMFALSESD,
20917 IX86_BUILTIN_COMTRUESD,
20919 IX86_BUILTIN_COMEQPS,
20920 IX86_BUILTIN_COMNEPS,
20921 IX86_BUILTIN_COMLTPS,
20922 IX86_BUILTIN_COMLEPS,
20923 IX86_BUILTIN_COMGTPS,
20924 IX86_BUILTIN_COMGEPS,
20925 IX86_BUILTIN_COMUEQPS,
20926 IX86_BUILTIN_COMUNEPS,
20927 IX86_BUILTIN_COMULTPS,
20928 IX86_BUILTIN_COMULEPS,
20929 IX86_BUILTIN_COMUGTPS,
20930 IX86_BUILTIN_COMUGEPS,
20931 IX86_BUILTIN_COMORDPS,
20932 IX86_BUILTIN_COMUNORDPS,
20933 IX86_BUILTIN_COMFALSEPS,
20934 IX86_BUILTIN_COMTRUEPS,
20936 IX86_BUILTIN_COMEQPD,
20937 IX86_BUILTIN_COMNEPD,
20938 IX86_BUILTIN_COMLTPD,
20939 IX86_BUILTIN_COMLEPD,
20940 IX86_BUILTIN_COMGTPD,
20941 IX86_BUILTIN_COMGEPD,
20942 IX86_BUILTIN_COMUEQPD,
20943 IX86_BUILTIN_COMUNEPD,
20944 IX86_BUILTIN_COMULTPD,
20945 IX86_BUILTIN_COMULEPD,
20946 IX86_BUILTIN_COMUGTPD,
20947 IX86_BUILTIN_COMUGEPD,
20948 IX86_BUILTIN_COMORDPD,
20949 IX86_BUILTIN_COMUNORDPD,
20950 IX86_BUILTIN_COMFALSEPD,
20951 IX86_BUILTIN_COMTRUEPD,
20953 IX86_BUILTIN_PCOMEQUB,
20954 IX86_BUILTIN_PCOMNEUB,
20955 IX86_BUILTIN_PCOMLTUB,
20956 IX86_BUILTIN_PCOMLEUB,
20957 IX86_BUILTIN_PCOMGTUB,
20958 IX86_BUILTIN_PCOMGEUB,
20959 IX86_BUILTIN_PCOMFALSEUB,
20960 IX86_BUILTIN_PCOMTRUEUB,
20961 IX86_BUILTIN_PCOMEQUW,
20962 IX86_BUILTIN_PCOMNEUW,
20963 IX86_BUILTIN_PCOMLTUW,
20964 IX86_BUILTIN_PCOMLEUW,
20965 IX86_BUILTIN_PCOMGTUW,
20966 IX86_BUILTIN_PCOMGEUW,
20967 IX86_BUILTIN_PCOMFALSEUW,
20968 IX86_BUILTIN_PCOMTRUEUW,
20969 IX86_BUILTIN_PCOMEQUD,
20970 IX86_BUILTIN_PCOMNEUD,
20971 IX86_BUILTIN_PCOMLTUD,
20972 IX86_BUILTIN_PCOMLEUD,
20973 IX86_BUILTIN_PCOMGTUD,
20974 IX86_BUILTIN_PCOMGEUD,
20975 IX86_BUILTIN_PCOMFALSEUD,
20976 IX86_BUILTIN_PCOMTRUEUD,
20977 IX86_BUILTIN_PCOMEQUQ,
20978 IX86_BUILTIN_PCOMNEUQ,
20979 IX86_BUILTIN_PCOMLTUQ,
20980 IX86_BUILTIN_PCOMLEUQ,
20981 IX86_BUILTIN_PCOMGTUQ,
20982 IX86_BUILTIN_PCOMGEUQ,
20983 IX86_BUILTIN_PCOMFALSEUQ,
20984 IX86_BUILTIN_PCOMTRUEUQ,
20986 IX86_BUILTIN_PCOMEQB,
20987 IX86_BUILTIN_PCOMNEB,
20988 IX86_BUILTIN_PCOMLTB,
20989 IX86_BUILTIN_PCOMLEB,
20990 IX86_BUILTIN_PCOMGTB,
20991 IX86_BUILTIN_PCOMGEB,
20992 IX86_BUILTIN_PCOMFALSEB,
20993 IX86_BUILTIN_PCOMTRUEB,
20994 IX86_BUILTIN_PCOMEQW,
20995 IX86_BUILTIN_PCOMNEW,
20996 IX86_BUILTIN_PCOMLTW,
20997 IX86_BUILTIN_PCOMLEW,
20998 IX86_BUILTIN_PCOMGTW,
20999 IX86_BUILTIN_PCOMGEW,
21000 IX86_BUILTIN_PCOMFALSEW,
21001 IX86_BUILTIN_PCOMTRUEW,
21002 IX86_BUILTIN_PCOMEQD,
21003 IX86_BUILTIN_PCOMNED,
21004 IX86_BUILTIN_PCOMLTD,
21005 IX86_BUILTIN_PCOMLED,
21006 IX86_BUILTIN_PCOMGTD,
21007 IX86_BUILTIN_PCOMGED,
21008 IX86_BUILTIN_PCOMFALSED,
21009 IX86_BUILTIN_PCOMTRUED,
21010 IX86_BUILTIN_PCOMEQQ,
21011 IX86_BUILTIN_PCOMNEQ,
21012 IX86_BUILTIN_PCOMLTQ,
21013 IX86_BUILTIN_PCOMLEQ,
21014 IX86_BUILTIN_PCOMGTQ,
21015 IX86_BUILTIN_PCOMGEQ,
21016 IX86_BUILTIN_PCOMFALSEQ,
21017 IX86_BUILTIN_PCOMTRUEQ,
21019 IX86_BUILTIN_MAX
21022 /* Table for the ix86 builtin decls. */
21023 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21025 /* Table of all of the builtin functions that are possible with different ISA's
21026 but are waiting to be built until a function is declared to use that
21027 ISA. */
21028 struct GTY(()) builtin_isa {
21029 tree type; /* builtin type to use in the declaration */
21030 const char *name; /* function name */
21031 int isa; /* isa_flags this builtin is defined for */
21032 bool const_p; /* true if the declaration is constant */
21035 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21038 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21039 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
21040 * function decl in the ix86_builtins array. Returns the function decl or
21041 * NULL_TREE, if the builtin was not added.
21043 * If the front end has a special hook for builtin functions, delay adding
21044 * builtin functions that aren't in the current ISA until the ISA is changed
21045 * with function specific optimization. Doing so, can save about 300K for the
21046 * default compiler. When the builtin is expanded, check at that time whether
21047 * it is valid.
21049 * If the front end doesn't have a special hook, record all builtins, even if
21050 * it isn't an instruction set in the current ISA in case the user uses
21051 * function specific options for a different ISA, so that we don't get scope
21052 * errors if a builtin is added in the middle of a function scope. */
21054 static inline tree
21055 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
21057 tree decl = NULL_TREE;
21059 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21061 ix86_builtins_isa[(int) code].isa = mask;
21063 if ((mask & ix86_isa_flags) != 0
21064 || (lang_hooks.builtin_function
21065 == lang_hooks.builtin_function_ext_scope))
21068 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
21069 NULL_TREE);
21070 ix86_builtins[(int) code] = decl;
21071 ix86_builtins_isa[(int) code].type = NULL_TREE;
21073 else
21075 ix86_builtins[(int) code] = NULL_TREE;
21076 ix86_builtins_isa[(int) code].const_p = false;
21077 ix86_builtins_isa[(int) code].type = type;
21078 ix86_builtins_isa[(int) code].name = name;
21082 return decl;
21085 /* Like def_builtin, but also marks the function decl "const". */
21087 static inline tree
21088 def_builtin_const (int mask, const char *name, tree type,
21089 enum ix86_builtins code)
21091 tree decl = def_builtin (mask, name, type, code);
21092 if (decl)
21093 TREE_READONLY (decl) = 1;
21094 else
21095 ix86_builtins_isa[(int) code].const_p = true;
21097 return decl;
21100 /* Add any new builtin functions for a given ISA that may not have been
21101 declared. This saves a bit of space compared to adding all of the
21102 declarations to the tree, even if we didn't use them. */
21104 static void
21105 ix86_add_new_builtins (int isa)
21107 int i;
21108 tree decl;
21110 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21112 if ((ix86_builtins_isa[i].isa & isa) != 0
21113 && ix86_builtins_isa[i].type != NULL_TREE)
21115 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21116 ix86_builtins_isa[i].type,
21117 i, BUILT_IN_MD, NULL,
21118 NULL_TREE);
21120 ix86_builtins[i] = decl;
21121 ix86_builtins_isa[i].type = NULL_TREE;
21122 if (ix86_builtins_isa[i].const_p)
21123 TREE_READONLY (decl) = 1;
21128 /* Bits for builtin_description.flag. */
21130 /* Set when we don't support the comparison natively, and should
21131 swap_comparison in order to support it. */
21132 #define BUILTIN_DESC_SWAP_OPERANDS 1
21134 struct builtin_description
21136 const unsigned int mask;
21137 const enum insn_code icode;
21138 const char *const name;
21139 const enum ix86_builtins code;
21140 const enum rtx_code comparison;
21141 const int flag;
21144 static const struct builtin_description bdesc_comi[] =
21146 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21151 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21154 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21155 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21156 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21157 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21172 static const struct builtin_description bdesc_pcmpestr[] =
21174 /* SSE4.2 */
21175 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21176 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21177 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21178 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21179 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21180 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21181 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21184 static const struct builtin_description bdesc_pcmpistr[] =
21186 /* SSE4.2 */
21187 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21188 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21189 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21190 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21191 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21192 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21193 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21196 /* Special builtin types */
21197 enum ix86_special_builtin_type
21199 SPECIAL_FTYPE_UNKNOWN,
21200 VOID_FTYPE_VOID,
21201 UINT64_FTYPE_VOID,
21202 UINT64_FTYPE_PUNSIGNED,
21203 V32QI_FTYPE_PCCHAR,
21204 V16QI_FTYPE_PCCHAR,
21205 V8SF_FTYPE_PCV4SF,
21206 V8SF_FTYPE_PCFLOAT,
21207 V4DF_FTYPE_PCV2DF,
21208 V4DF_FTYPE_PCDOUBLE,
21209 V4SF_FTYPE_PCFLOAT,
21210 V2DF_FTYPE_PCDOUBLE,
21211 V8SF_FTYPE_PCV8SF_V8SF,
21212 V4DF_FTYPE_PCV4DF_V4DF,
21213 V4SF_FTYPE_V4SF_PCV2SF,
21214 V4SF_FTYPE_PCV4SF_V4SF,
21215 V2DF_FTYPE_V2DF_PCDOUBLE,
21216 V2DF_FTYPE_PCV2DF_V2DF,
21217 V2DI_FTYPE_PV2DI,
21218 VOID_FTYPE_PV2SF_V4SF,
21219 VOID_FTYPE_PV4DI_V4DI,
21220 VOID_FTYPE_PV2DI_V2DI,
21221 VOID_FTYPE_PCHAR_V32QI,
21222 VOID_FTYPE_PCHAR_V16QI,
21223 VOID_FTYPE_PFLOAT_V8SF,
21224 VOID_FTYPE_PFLOAT_V4SF,
21225 VOID_FTYPE_PDOUBLE_V4DF,
21226 VOID_FTYPE_PDOUBLE_V2DF,
21227 VOID_FTYPE_PDI_DI,
21228 VOID_FTYPE_PINT_INT,
21229 VOID_FTYPE_PV8SF_V8SF_V8SF,
21230 VOID_FTYPE_PV4DF_V4DF_V4DF,
21231 VOID_FTYPE_PV4SF_V4SF_V4SF,
21232 VOID_FTYPE_PV2DF_V2DF_V2DF
21235 /* Builtin types */
21236 enum ix86_builtin_type
21238 FTYPE_UNKNOWN,
21239 FLOAT128_FTYPE_FLOAT128,
21240 FLOAT_FTYPE_FLOAT,
21241 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21242 INT_FTYPE_V8SF_V8SF_PTEST,
21243 INT_FTYPE_V4DI_V4DI_PTEST,
21244 INT_FTYPE_V4DF_V4DF_PTEST,
21245 INT_FTYPE_V4SF_V4SF_PTEST,
21246 INT_FTYPE_V2DI_V2DI_PTEST,
21247 INT_FTYPE_V2DF_V2DF_PTEST,
21248 INT_FTYPE_INT,
21249 UINT64_FTYPE_INT,
21250 INT64_FTYPE_INT64,
21251 INT64_FTYPE_V4SF,
21252 INT64_FTYPE_V2DF,
21253 INT_FTYPE_V16QI,
21254 INT_FTYPE_V8QI,
21255 INT_FTYPE_V8SF,
21256 INT_FTYPE_V4DF,
21257 INT_FTYPE_V4SF,
21258 INT_FTYPE_V2DF,
21259 V16QI_FTYPE_V16QI,
21260 V8SI_FTYPE_V8SF,
21261 V8SI_FTYPE_V4SI,
21262 V8HI_FTYPE_V8HI,
21263 V8HI_FTYPE_V16QI,
21264 V8QI_FTYPE_V8QI,
21265 V8SF_FTYPE_V8SF,
21266 V8SF_FTYPE_V8SI,
21267 V8SF_FTYPE_V4SF,
21268 V4SI_FTYPE_V4SI,
21269 V4SI_FTYPE_V16QI,
21270 V4SI_FTYPE_V8SI,
21271 V4SI_FTYPE_V8HI,
21272 V4SI_FTYPE_V4DF,
21273 V4SI_FTYPE_V4SF,
21274 V4SI_FTYPE_V2DF,
21275 V4HI_FTYPE_V4HI,
21276 V4DF_FTYPE_V4DF,
21277 V4DF_FTYPE_V4SI,
21278 V4DF_FTYPE_V4SF,
21279 V4DF_FTYPE_V2DF,
21280 V4SF_FTYPE_V4DF,
21281 V4SF_FTYPE_V4SF,
21282 V4SF_FTYPE_V4SF_VEC_MERGE,
21283 V4SF_FTYPE_V8SF,
21284 V4SF_FTYPE_V4SI,
21285 V4SF_FTYPE_V2DF,
21286 V2DI_FTYPE_V2DI,
21287 V2DI_FTYPE_V16QI,
21288 V2DI_FTYPE_V8HI,
21289 V2DI_FTYPE_V4SI,
21290 V2DF_FTYPE_V2DF,
21291 V2DF_FTYPE_V2DF_VEC_MERGE,
21292 V2DF_FTYPE_V4SI,
21293 V2DF_FTYPE_V4DF,
21294 V2DF_FTYPE_V4SF,
21295 V2DF_FTYPE_V2SI,
21296 V2SI_FTYPE_V2SI,
21297 V2SI_FTYPE_V4SF,
21298 V2SI_FTYPE_V2SF,
21299 V2SI_FTYPE_V2DF,
21300 V2SF_FTYPE_V2SF,
21301 V2SF_FTYPE_V2SI,
21302 V16QI_FTYPE_V16QI_V16QI,
21303 V16QI_FTYPE_V8HI_V8HI,
21304 V8QI_FTYPE_V8QI_V8QI,
21305 V8QI_FTYPE_V4HI_V4HI,
21306 V8HI_FTYPE_V8HI_V8HI,
21307 V8HI_FTYPE_V8HI_V8HI_COUNT,
21308 V8HI_FTYPE_V16QI_V16QI,
21309 V8HI_FTYPE_V4SI_V4SI,
21310 V8HI_FTYPE_V8HI_SI_COUNT,
21311 V8SF_FTYPE_V8SF_V8SF,
21312 V8SF_FTYPE_V8SF_V8SI,
21313 V4SI_FTYPE_V4SI_V4SI,
21314 V4SI_FTYPE_V4SI_V4SI_COUNT,
21315 V4SI_FTYPE_V8HI_V8HI,
21316 V4SI_FTYPE_V4SF_V4SF,
21317 V4SI_FTYPE_V2DF_V2DF,
21318 V4SI_FTYPE_V4SI_SI_COUNT,
21319 V4HI_FTYPE_V4HI_V4HI,
21320 V4HI_FTYPE_V4HI_V4HI_COUNT,
21321 V4HI_FTYPE_V8QI_V8QI,
21322 V4HI_FTYPE_V2SI_V2SI,
21323 V4HI_FTYPE_V4HI_SI_COUNT,
21324 V4DF_FTYPE_V4DF_V4DF,
21325 V4DF_FTYPE_V4DF_V4DI,
21326 V4SF_FTYPE_V4SF_V4SF,
21327 V4SF_FTYPE_V4SF_V4SF_SWAP,
21328 V4SF_FTYPE_V4SF_V4SI,
21329 V4SF_FTYPE_V4SF_V2SI,
21330 V4SF_FTYPE_V4SF_V2DF,
21331 V4SF_FTYPE_V4SF_DI,
21332 V4SF_FTYPE_V4SF_SI,
21333 V2DI_FTYPE_V2DI_V2DI,
21334 V2DI_FTYPE_V2DI_V2DI_COUNT,
21335 V2DI_FTYPE_V16QI_V16QI,
21336 V2DI_FTYPE_V4SI_V4SI,
21337 V2DI_FTYPE_V2DI_V16QI,
21338 V2DI_FTYPE_V2DF_V2DF,
21339 V2DI_FTYPE_V2DI_SI_COUNT,
21340 V2SI_FTYPE_V2SI_V2SI,
21341 V2SI_FTYPE_V2SI_V2SI_COUNT,
21342 V2SI_FTYPE_V4HI_V4HI,
21343 V2SI_FTYPE_V2SF_V2SF,
21344 V2SI_FTYPE_V2SI_SI_COUNT,
21345 V2DF_FTYPE_V2DF_V2DF,
21346 V2DF_FTYPE_V2DF_V2DF_SWAP,
21347 V2DF_FTYPE_V2DF_V4SF,
21348 V2DF_FTYPE_V2DF_V2DI,
21349 V2DF_FTYPE_V2DF_DI,
21350 V2DF_FTYPE_V2DF_SI,
21351 V2SF_FTYPE_V2SF_V2SF,
21352 V1DI_FTYPE_V1DI_V1DI,
21353 V1DI_FTYPE_V1DI_V1DI_COUNT,
21354 V1DI_FTYPE_V8QI_V8QI,
21355 V1DI_FTYPE_V2SI_V2SI,
21356 V1DI_FTYPE_V1DI_SI_COUNT,
21357 UINT64_FTYPE_UINT64_UINT64,
21358 UINT_FTYPE_UINT_UINT,
21359 UINT_FTYPE_UINT_USHORT,
21360 UINT_FTYPE_UINT_UCHAR,
21361 UINT16_FTYPE_UINT16_INT,
21362 UINT8_FTYPE_UINT8_INT,
21363 V8HI_FTYPE_V8HI_INT,
21364 V4SI_FTYPE_V4SI_INT,
21365 V4HI_FTYPE_V4HI_INT,
21366 V8SF_FTYPE_V8SF_INT,
21367 V4SI_FTYPE_V8SI_INT,
21368 V4SF_FTYPE_V8SF_INT,
21369 V2DF_FTYPE_V4DF_INT,
21370 V4DF_FTYPE_V4DF_INT,
21371 V4SF_FTYPE_V4SF_INT,
21372 V2DI_FTYPE_V2DI_INT,
21373 V2DI2TI_FTYPE_V2DI_INT,
21374 V2DF_FTYPE_V2DF_INT,
21375 V16QI_FTYPE_V16QI_V16QI_V16QI,
21376 V8SF_FTYPE_V8SF_V8SF_V8SF,
21377 V4DF_FTYPE_V4DF_V4DF_V4DF,
21378 V4SF_FTYPE_V4SF_V4SF_V4SF,
21379 V2DF_FTYPE_V2DF_V2DF_V2DF,
21380 V16QI_FTYPE_V16QI_V16QI_INT,
21381 V8SI_FTYPE_V8SI_V8SI_INT,
21382 V8SI_FTYPE_V8SI_V4SI_INT,
21383 V8HI_FTYPE_V8HI_V8HI_INT,
21384 V8SF_FTYPE_V8SF_V8SF_INT,
21385 V8SF_FTYPE_V8SF_V4SF_INT,
21386 V4SI_FTYPE_V4SI_V4SI_INT,
21387 V4DF_FTYPE_V4DF_V4DF_INT,
21388 V4DF_FTYPE_V4DF_V2DF_INT,
21389 V4SF_FTYPE_V4SF_V4SF_INT,
21390 V2DI_FTYPE_V2DI_V2DI_INT,
21391 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21392 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21393 V2DF_FTYPE_V2DF_V2DF_INT,
21394 V2DI_FTYPE_V2DI_UINT_UINT,
21395 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21398 /* Special builtins with variable number of arguments. */
21399 static const struct builtin_description bdesc_special_args[] =
21401 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
21402 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
21404 /* MMX */
21405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21407 /* 3DNow! */
21408 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21410 /* SSE */
21411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21420 /* SSE or 3DNow!A */
21421 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21422 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21424 /* SSE2 */
21425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21438 /* SSE3 */
21439 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21441 /* SSE4.1 */
21442 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21444 /* SSE4A */
21445 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21446 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21448 /* AVX */
21449 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21451 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21461 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21463 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21468 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21473 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21478 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21481 /* Builtins with variable number of arguments. */
21482 static const struct builtin_description bdesc_args[] =
21484 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
21485 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
21486 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
21487 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21488 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21489 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21490 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21492 /* MMX */
21493 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21495 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21496 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21498 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21500 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21501 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21502 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21503 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21504 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21505 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21506 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21510 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21513 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21514 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21515 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21518 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21519 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21520 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21522 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21525 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21526 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21527 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21528 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21529 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21531 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21532 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21533 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21535 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21537 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21538 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21539 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21540 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21541 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21542 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21544 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21545 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21546 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21547 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21548 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21549 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21551 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21552 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21553 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21554 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21556 /* 3DNow! */
21557 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21558 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21559 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21560 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21562 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21563 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21564 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21565 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21566 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21567 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21568 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21569 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21570 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21571 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21572 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21573 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21574 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21575 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21576 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21578 /* 3DNow!A */
21579 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21580 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21581 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21582 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21583 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21584 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21586 /* SSE */
21587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21589 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21591 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21595 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21598 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21600 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21602 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21603 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21604 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21605 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21607 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21608 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21609 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21613 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21614 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21619 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21622 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21623 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21624 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21625 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21629 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21630 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21631 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21634 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21635 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21636 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21637 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21639 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21641 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21642 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21644 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21645 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21646 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21650 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21652 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21654 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21656 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21660 /* SSE MMX or 3Dnow!A */
21661 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21662 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21663 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21665 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21666 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21667 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21668 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21670 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21671 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21673 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21675 /* SSE2 */
21676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21694 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21695 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21701 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21702 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21703 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21704 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21714 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21731 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21732 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21736 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21738 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21739 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21741 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21745 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21747 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21748 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21749 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21750 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21751 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21752 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21753 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21754 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21765 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21766 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21768 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21770 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21771 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21783 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21784 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21785 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21792 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21793 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21794 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21795 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21801 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21802 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21810 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21815 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21816 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21817 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21818 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21819 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21820 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21823 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21824 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21825 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21826 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21830 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21831 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21832 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21833 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21835 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21836 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21837 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21839 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21841 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21842 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21844 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21846 /* SSE2 MMX */
21847 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21848 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21850 /* SSE3 */
21851 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21852 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21854 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21855 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21856 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21857 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21858 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21859 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21861 /* SSSE3 */
21862 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21863 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21864 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21865 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21866 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21867 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21869 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21870 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21871 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21872 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21873 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21874 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21875 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21876 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21877 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21878 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21879 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21880 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21881 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21882 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21883 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21884 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21885 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21886 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21887 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21888 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21889 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21890 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21891 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21892 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21894 /* SSSE3. */
21895 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21896 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21898 /* SSE4.1 */
21899 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21900 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21901 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21902 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21903 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21904 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21905 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21906 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21907 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21908 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21910 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21911 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21912 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21913 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21914 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21915 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21916 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21917 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21918 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21919 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21920 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21921 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21922 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21924 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21925 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21926 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21927 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21928 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21929 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21930 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21931 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21932 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21933 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21934 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21935 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21937 /* SSE4.1 and SSE5 */
21938 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21939 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21940 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21941 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21943 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21944 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21945 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21947 /* SSE4.2 */
21948 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21949 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21950 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21951 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21952 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21954 /* SSE4A */
21955 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21956 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21957 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21958 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21960 /* AES */
21961 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21962 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21964 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21965 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21966 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21967 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21969 /* PCLMUL */
21970 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21972 /* AVX */
21973 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21974 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21977 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21978 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21981 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21987 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21988 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21989 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21990 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21991 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21992 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21993 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21994 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21995 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21996 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21997 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21998 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22044 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22046 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22048 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
22084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22087 /* SSE5 */
22088 enum multi_arg_type {
22089 MULTI_ARG_UNKNOWN,
22090 MULTI_ARG_3_SF,
22091 MULTI_ARG_3_DF,
22092 MULTI_ARG_3_DI,
22093 MULTI_ARG_3_SI,
22094 MULTI_ARG_3_SI_DI,
22095 MULTI_ARG_3_HI,
22096 MULTI_ARG_3_HI_SI,
22097 MULTI_ARG_3_QI,
22098 MULTI_ARG_3_PERMPS,
22099 MULTI_ARG_3_PERMPD,
22100 MULTI_ARG_2_SF,
22101 MULTI_ARG_2_DF,
22102 MULTI_ARG_2_DI,
22103 MULTI_ARG_2_SI,
22104 MULTI_ARG_2_HI,
22105 MULTI_ARG_2_QI,
22106 MULTI_ARG_2_DI_IMM,
22107 MULTI_ARG_2_SI_IMM,
22108 MULTI_ARG_2_HI_IMM,
22109 MULTI_ARG_2_QI_IMM,
22110 MULTI_ARG_2_SF_CMP,
22111 MULTI_ARG_2_DF_CMP,
22112 MULTI_ARG_2_DI_CMP,
22113 MULTI_ARG_2_SI_CMP,
22114 MULTI_ARG_2_HI_CMP,
22115 MULTI_ARG_2_QI_CMP,
22116 MULTI_ARG_2_DI_TF,
22117 MULTI_ARG_2_SI_TF,
22118 MULTI_ARG_2_HI_TF,
22119 MULTI_ARG_2_QI_TF,
22120 MULTI_ARG_2_SF_TF,
22121 MULTI_ARG_2_DF_TF,
22122 MULTI_ARG_1_SF,
22123 MULTI_ARG_1_DF,
22124 MULTI_ARG_1_DI,
22125 MULTI_ARG_1_SI,
22126 MULTI_ARG_1_HI,
22127 MULTI_ARG_1_QI,
22128 MULTI_ARG_1_SI_DI,
22129 MULTI_ARG_1_HI_DI,
22130 MULTI_ARG_1_HI_SI,
22131 MULTI_ARG_1_QI_DI,
22132 MULTI_ARG_1_QI_SI,
22133 MULTI_ARG_1_QI_HI,
22134 MULTI_ARG_1_PH2PS,
22135 MULTI_ARG_1_PS2PH
22138 static const struct builtin_description bdesc_multi_arg[] =
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22200 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22202 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22219 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22228 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22236 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22237 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22238 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22239 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22240 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22241 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22242 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22243 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22244 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22245 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22246 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22247 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22248 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22250 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22251 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22252 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22253 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22254 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22255 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22256 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22257 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22258 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22261 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22263 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22277 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22279 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22280 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22281 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22285 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22336 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22353 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22361 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22376 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22377 in the current target ISA to allow the user to compile particular modules
22378 with different target specific options that differ from the command line
22379 options. */
22380 static void
22381 ix86_init_mmx_sse_builtins (void)
22383 const struct builtin_description * d;
22384 size_t i;
22386 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22387 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22388 tree V1DI_type_node
22389 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22390 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22391 tree V2DI_type_node
22392 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22393 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22394 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22395 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22396 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22397 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22398 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22400 tree pchar_type_node = build_pointer_type (char_type_node);
22401 tree pcchar_type_node
22402 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22403 tree pfloat_type_node = build_pointer_type (float_type_node);
22404 tree pcfloat_type_node
22405 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22406 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22407 tree pcv2sf_type_node
22408 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22409 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22410 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22412 /* Comparisons. */
22413 tree int_ftype_v4sf_v4sf
22414 = build_function_type_list (integer_type_node,
22415 V4SF_type_node, V4SF_type_node, NULL_TREE);
22416 tree v4si_ftype_v4sf_v4sf
22417 = build_function_type_list (V4SI_type_node,
22418 V4SF_type_node, V4SF_type_node, NULL_TREE);
22419 /* MMX/SSE/integer conversions. */
22420 tree int_ftype_v4sf
22421 = build_function_type_list (integer_type_node,
22422 V4SF_type_node, NULL_TREE);
22423 tree int64_ftype_v4sf
22424 = build_function_type_list (long_long_integer_type_node,
22425 V4SF_type_node, NULL_TREE);
22426 tree int_ftype_v8qi
22427 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22428 tree v4sf_ftype_v4sf_int
22429 = build_function_type_list (V4SF_type_node,
22430 V4SF_type_node, integer_type_node, NULL_TREE);
22431 tree v4sf_ftype_v4sf_int64
22432 = build_function_type_list (V4SF_type_node,
22433 V4SF_type_node, long_long_integer_type_node,
22434 NULL_TREE);
22435 tree v4sf_ftype_v4sf_v2si
22436 = build_function_type_list (V4SF_type_node,
22437 V4SF_type_node, V2SI_type_node, NULL_TREE);
22439 /* Miscellaneous. */
22440 tree v8qi_ftype_v4hi_v4hi
22441 = build_function_type_list (V8QI_type_node,
22442 V4HI_type_node, V4HI_type_node, NULL_TREE);
22443 tree v4hi_ftype_v2si_v2si
22444 = build_function_type_list (V4HI_type_node,
22445 V2SI_type_node, V2SI_type_node, NULL_TREE);
22446 tree v4sf_ftype_v4sf_v4sf_int
22447 = build_function_type_list (V4SF_type_node,
22448 V4SF_type_node, V4SF_type_node,
22449 integer_type_node, NULL_TREE);
22450 tree v2si_ftype_v4hi_v4hi
22451 = build_function_type_list (V2SI_type_node,
22452 V4HI_type_node, V4HI_type_node, NULL_TREE);
22453 tree v4hi_ftype_v4hi_int
22454 = build_function_type_list (V4HI_type_node,
22455 V4HI_type_node, integer_type_node, NULL_TREE);
22456 tree v2si_ftype_v2si_int
22457 = build_function_type_list (V2SI_type_node,
22458 V2SI_type_node, integer_type_node, NULL_TREE);
22459 tree v1di_ftype_v1di_int
22460 = build_function_type_list (V1DI_type_node,
22461 V1DI_type_node, integer_type_node, NULL_TREE);
22463 tree void_ftype_void
22464 = build_function_type (void_type_node, void_list_node);
22465 tree void_ftype_unsigned
22466 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22467 tree void_ftype_unsigned_unsigned
22468 = build_function_type_list (void_type_node, unsigned_type_node,
22469 unsigned_type_node, NULL_TREE);
22470 tree void_ftype_pcvoid_unsigned_unsigned
22471 = build_function_type_list (void_type_node, const_ptr_type_node,
22472 unsigned_type_node, unsigned_type_node,
22473 NULL_TREE);
22474 tree unsigned_ftype_void
22475 = build_function_type (unsigned_type_node, void_list_node);
22476 tree v2si_ftype_v4sf
22477 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22478 /* Loads/stores. */
22479 tree void_ftype_v8qi_v8qi_pchar
22480 = build_function_type_list (void_type_node,
22481 V8QI_type_node, V8QI_type_node,
22482 pchar_type_node, NULL_TREE);
22483 tree v4sf_ftype_pcfloat
22484 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22485 tree v4sf_ftype_v4sf_pcv2sf
22486 = build_function_type_list (V4SF_type_node,
22487 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22488 tree void_ftype_pv2sf_v4sf
22489 = build_function_type_list (void_type_node,
22490 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22491 tree void_ftype_pfloat_v4sf
22492 = build_function_type_list (void_type_node,
22493 pfloat_type_node, V4SF_type_node, NULL_TREE);
22494 tree void_ftype_pdi_di
22495 = build_function_type_list (void_type_node,
22496 pdi_type_node, long_long_unsigned_type_node,
22497 NULL_TREE);
22498 tree void_ftype_pv2di_v2di
22499 = build_function_type_list (void_type_node,
22500 pv2di_type_node, V2DI_type_node, NULL_TREE);
22501 /* Normal vector unops. */
22502 tree v4sf_ftype_v4sf
22503 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22504 tree v16qi_ftype_v16qi
22505 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22506 tree v8hi_ftype_v8hi
22507 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22508 tree v4si_ftype_v4si
22509 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22510 tree v8qi_ftype_v8qi
22511 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22512 tree v4hi_ftype_v4hi
22513 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22515 /* Normal vector binops. */
22516 tree v4sf_ftype_v4sf_v4sf
22517 = build_function_type_list (V4SF_type_node,
22518 V4SF_type_node, V4SF_type_node, NULL_TREE);
22519 tree v8qi_ftype_v8qi_v8qi
22520 = build_function_type_list (V8QI_type_node,
22521 V8QI_type_node, V8QI_type_node, NULL_TREE);
22522 tree v4hi_ftype_v4hi_v4hi
22523 = build_function_type_list (V4HI_type_node,
22524 V4HI_type_node, V4HI_type_node, NULL_TREE);
22525 tree v2si_ftype_v2si_v2si
22526 = build_function_type_list (V2SI_type_node,
22527 V2SI_type_node, V2SI_type_node, NULL_TREE);
22528 tree v1di_ftype_v1di_v1di
22529 = build_function_type_list (V1DI_type_node,
22530 V1DI_type_node, V1DI_type_node, NULL_TREE);
22531 tree v1di_ftype_v1di_v1di_int
22532 = build_function_type_list (V1DI_type_node,
22533 V1DI_type_node, V1DI_type_node,
22534 integer_type_node, NULL_TREE);
22535 tree v2si_ftype_v2sf
22536 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22537 tree v2sf_ftype_v2si
22538 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22539 tree v2si_ftype_v2si
22540 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22541 tree v2sf_ftype_v2sf
22542 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22543 tree v2sf_ftype_v2sf_v2sf
22544 = build_function_type_list (V2SF_type_node,
22545 V2SF_type_node, V2SF_type_node, NULL_TREE);
22546 tree v2si_ftype_v2sf_v2sf
22547 = build_function_type_list (V2SI_type_node,
22548 V2SF_type_node, V2SF_type_node, NULL_TREE);
22549 tree pint_type_node = build_pointer_type (integer_type_node);
22550 tree pdouble_type_node = build_pointer_type (double_type_node);
22551 tree pcdouble_type_node = build_pointer_type (
22552 build_type_variant (double_type_node, 1, 0));
22553 tree int_ftype_v2df_v2df
22554 = build_function_type_list (integer_type_node,
22555 V2DF_type_node, V2DF_type_node, NULL_TREE);
22557 tree void_ftype_pcvoid
22558 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22559 tree v4sf_ftype_v4si
22560 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22561 tree v4si_ftype_v4sf
22562 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22563 tree v2df_ftype_v4si
22564 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22565 tree v4si_ftype_v2df
22566 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22567 tree v4si_ftype_v2df_v2df
22568 = build_function_type_list (V4SI_type_node,
22569 V2DF_type_node, V2DF_type_node, NULL_TREE);
22570 tree v2si_ftype_v2df
22571 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22572 tree v4sf_ftype_v2df
22573 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22574 tree v2df_ftype_v2si
22575 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22576 tree v2df_ftype_v4sf
22577 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22578 tree int_ftype_v2df
22579 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22580 tree int64_ftype_v2df
22581 = build_function_type_list (long_long_integer_type_node,
22582 V2DF_type_node, NULL_TREE);
22583 tree v2df_ftype_v2df_int
22584 = build_function_type_list (V2DF_type_node,
22585 V2DF_type_node, integer_type_node, NULL_TREE);
22586 tree v2df_ftype_v2df_int64
22587 = build_function_type_list (V2DF_type_node,
22588 V2DF_type_node, long_long_integer_type_node,
22589 NULL_TREE);
22590 tree v4sf_ftype_v4sf_v2df
22591 = build_function_type_list (V4SF_type_node,
22592 V4SF_type_node, V2DF_type_node, NULL_TREE);
22593 tree v2df_ftype_v2df_v4sf
22594 = build_function_type_list (V2DF_type_node,
22595 V2DF_type_node, V4SF_type_node, NULL_TREE);
22596 tree v2df_ftype_v2df_v2df_int
22597 = build_function_type_list (V2DF_type_node,
22598 V2DF_type_node, V2DF_type_node,
22599 integer_type_node,
22600 NULL_TREE);
22601 tree v2df_ftype_v2df_pcdouble
22602 = build_function_type_list (V2DF_type_node,
22603 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22604 tree void_ftype_pdouble_v2df
22605 = build_function_type_list (void_type_node,
22606 pdouble_type_node, V2DF_type_node, NULL_TREE);
22607 tree void_ftype_pint_int
22608 = build_function_type_list (void_type_node,
22609 pint_type_node, integer_type_node, NULL_TREE);
22610 tree void_ftype_v16qi_v16qi_pchar
22611 = build_function_type_list (void_type_node,
22612 V16QI_type_node, V16QI_type_node,
22613 pchar_type_node, NULL_TREE);
22614 tree v2df_ftype_pcdouble
22615 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22616 tree v2df_ftype_v2df_v2df
22617 = build_function_type_list (V2DF_type_node,
22618 V2DF_type_node, V2DF_type_node, NULL_TREE);
22619 tree v16qi_ftype_v16qi_v16qi
22620 = build_function_type_list (V16QI_type_node,
22621 V16QI_type_node, V16QI_type_node, NULL_TREE);
22622 tree v8hi_ftype_v8hi_v8hi
22623 = build_function_type_list (V8HI_type_node,
22624 V8HI_type_node, V8HI_type_node, NULL_TREE);
22625 tree v4si_ftype_v4si_v4si
22626 = build_function_type_list (V4SI_type_node,
22627 V4SI_type_node, V4SI_type_node, NULL_TREE);
22628 tree v2di_ftype_v2di_v2di
22629 = build_function_type_list (V2DI_type_node,
22630 V2DI_type_node, V2DI_type_node, NULL_TREE);
22631 tree v2di_ftype_v2df_v2df
22632 = build_function_type_list (V2DI_type_node,
22633 V2DF_type_node, V2DF_type_node, NULL_TREE);
22634 tree v2df_ftype_v2df
22635 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22636 tree v2di_ftype_v2di_int
22637 = build_function_type_list (V2DI_type_node,
22638 V2DI_type_node, integer_type_node, NULL_TREE);
22639 tree v2di_ftype_v2di_v2di_int
22640 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22641 V2DI_type_node, integer_type_node, NULL_TREE);
22642 tree v4si_ftype_v4si_int
22643 = build_function_type_list (V4SI_type_node,
22644 V4SI_type_node, integer_type_node, NULL_TREE);
22645 tree v8hi_ftype_v8hi_int
22646 = build_function_type_list (V8HI_type_node,
22647 V8HI_type_node, integer_type_node, NULL_TREE);
22648 tree v4si_ftype_v8hi_v8hi
22649 = build_function_type_list (V4SI_type_node,
22650 V8HI_type_node, V8HI_type_node, NULL_TREE);
22651 tree v1di_ftype_v8qi_v8qi
22652 = build_function_type_list (V1DI_type_node,
22653 V8QI_type_node, V8QI_type_node, NULL_TREE);
22654 tree v1di_ftype_v2si_v2si
22655 = build_function_type_list (V1DI_type_node,
22656 V2SI_type_node, V2SI_type_node, NULL_TREE);
22657 tree v2di_ftype_v16qi_v16qi
22658 = build_function_type_list (V2DI_type_node,
22659 V16QI_type_node, V16QI_type_node, NULL_TREE);
22660 tree v2di_ftype_v4si_v4si
22661 = build_function_type_list (V2DI_type_node,
22662 V4SI_type_node, V4SI_type_node, NULL_TREE);
22663 tree int_ftype_v16qi
22664 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22665 tree v16qi_ftype_pcchar
22666 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22667 tree void_ftype_pchar_v16qi
22668 = build_function_type_list (void_type_node,
22669 pchar_type_node, V16QI_type_node, NULL_TREE);
22671 tree v2di_ftype_v2di_unsigned_unsigned
22672 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22673 unsigned_type_node, unsigned_type_node,
22674 NULL_TREE);
22675 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22676 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22677 unsigned_type_node, unsigned_type_node,
22678 NULL_TREE);
22679 tree v2di_ftype_v2di_v16qi
22680 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22681 NULL_TREE);
22682 tree v2df_ftype_v2df_v2df_v2df
22683 = build_function_type_list (V2DF_type_node,
22684 V2DF_type_node, V2DF_type_node,
22685 V2DF_type_node, NULL_TREE);
22686 tree v4sf_ftype_v4sf_v4sf_v4sf
22687 = build_function_type_list (V4SF_type_node,
22688 V4SF_type_node, V4SF_type_node,
22689 V4SF_type_node, NULL_TREE);
22690 tree v8hi_ftype_v16qi
22691 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22692 NULL_TREE);
22693 tree v4si_ftype_v16qi
22694 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22695 NULL_TREE);
22696 tree v2di_ftype_v16qi
22697 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22698 NULL_TREE);
22699 tree v4si_ftype_v8hi
22700 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22701 NULL_TREE);
22702 tree v2di_ftype_v8hi
22703 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22704 NULL_TREE);
22705 tree v2di_ftype_v4si
22706 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22707 NULL_TREE);
22708 tree v2di_ftype_pv2di
22709 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22710 NULL_TREE);
22711 tree v16qi_ftype_v16qi_v16qi_int
22712 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22713 V16QI_type_node, integer_type_node,
22714 NULL_TREE);
22715 tree v16qi_ftype_v16qi_v16qi_v16qi
22716 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22717 V16QI_type_node, V16QI_type_node,
22718 NULL_TREE);
22719 tree v8hi_ftype_v8hi_v8hi_int
22720 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22721 V8HI_type_node, integer_type_node,
22722 NULL_TREE);
22723 tree v4si_ftype_v4si_v4si_int
22724 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22725 V4SI_type_node, integer_type_node,
22726 NULL_TREE);
22727 tree int_ftype_v2di_v2di
22728 = build_function_type_list (integer_type_node,
22729 V2DI_type_node, V2DI_type_node,
22730 NULL_TREE);
22731 tree int_ftype_v16qi_int_v16qi_int_int
22732 = build_function_type_list (integer_type_node,
22733 V16QI_type_node,
22734 integer_type_node,
22735 V16QI_type_node,
22736 integer_type_node,
22737 integer_type_node,
22738 NULL_TREE);
22739 tree v16qi_ftype_v16qi_int_v16qi_int_int
22740 = build_function_type_list (V16QI_type_node,
22741 V16QI_type_node,
22742 integer_type_node,
22743 V16QI_type_node,
22744 integer_type_node,
22745 integer_type_node,
22746 NULL_TREE);
22747 tree int_ftype_v16qi_v16qi_int
22748 = build_function_type_list (integer_type_node,
22749 V16QI_type_node,
22750 V16QI_type_node,
22751 integer_type_node,
22752 NULL_TREE);
22754 /* SSE5 instructions */
22755 tree v2di_ftype_v2di_v2di_v2di
22756 = build_function_type_list (V2DI_type_node,
22757 V2DI_type_node,
22758 V2DI_type_node,
22759 V2DI_type_node,
22760 NULL_TREE);
22762 tree v4si_ftype_v4si_v4si_v4si
22763 = build_function_type_list (V4SI_type_node,
22764 V4SI_type_node,
22765 V4SI_type_node,
22766 V4SI_type_node,
22767 NULL_TREE);
22769 tree v4si_ftype_v4si_v4si_v2di
22770 = build_function_type_list (V4SI_type_node,
22771 V4SI_type_node,
22772 V4SI_type_node,
22773 V2DI_type_node,
22774 NULL_TREE);
22776 tree v8hi_ftype_v8hi_v8hi_v8hi
22777 = build_function_type_list (V8HI_type_node,
22778 V8HI_type_node,
22779 V8HI_type_node,
22780 V8HI_type_node,
22781 NULL_TREE);
22783 tree v8hi_ftype_v8hi_v8hi_v4si
22784 = build_function_type_list (V8HI_type_node,
22785 V8HI_type_node,
22786 V8HI_type_node,
22787 V4SI_type_node,
22788 NULL_TREE);
22790 tree v2df_ftype_v2df_v2df_v16qi
22791 = build_function_type_list (V2DF_type_node,
22792 V2DF_type_node,
22793 V2DF_type_node,
22794 V16QI_type_node,
22795 NULL_TREE);
22797 tree v4sf_ftype_v4sf_v4sf_v16qi
22798 = build_function_type_list (V4SF_type_node,
22799 V4SF_type_node,
22800 V4SF_type_node,
22801 V16QI_type_node,
22802 NULL_TREE);
22804 tree v2di_ftype_v2di_si
22805 = build_function_type_list (V2DI_type_node,
22806 V2DI_type_node,
22807 integer_type_node,
22808 NULL_TREE);
22810 tree v4si_ftype_v4si_si
22811 = build_function_type_list (V4SI_type_node,
22812 V4SI_type_node,
22813 integer_type_node,
22814 NULL_TREE);
22816 tree v8hi_ftype_v8hi_si
22817 = build_function_type_list (V8HI_type_node,
22818 V8HI_type_node,
22819 integer_type_node,
22820 NULL_TREE);
22822 tree v16qi_ftype_v16qi_si
22823 = build_function_type_list (V16QI_type_node,
22824 V16QI_type_node,
22825 integer_type_node,
22826 NULL_TREE);
22827 tree v4sf_ftype_v4hi
22828 = build_function_type_list (V4SF_type_node,
22829 V4HI_type_node,
22830 NULL_TREE);
22832 tree v4hi_ftype_v4sf
22833 = build_function_type_list (V4HI_type_node,
22834 V4SF_type_node,
22835 NULL_TREE);
22837 tree v2di_ftype_v2di
22838 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22840 tree v16qi_ftype_v8hi_v8hi
22841 = build_function_type_list (V16QI_type_node,
22842 V8HI_type_node, V8HI_type_node,
22843 NULL_TREE);
22844 tree v8hi_ftype_v4si_v4si
22845 = build_function_type_list (V8HI_type_node,
22846 V4SI_type_node, V4SI_type_node,
22847 NULL_TREE);
22848 tree v8hi_ftype_v16qi_v16qi
22849 = build_function_type_list (V8HI_type_node,
22850 V16QI_type_node, V16QI_type_node,
22851 NULL_TREE);
22852 tree v4hi_ftype_v8qi_v8qi
22853 = build_function_type_list (V4HI_type_node,
22854 V8QI_type_node, V8QI_type_node,
22855 NULL_TREE);
22856 tree unsigned_ftype_unsigned_uchar
22857 = build_function_type_list (unsigned_type_node,
22858 unsigned_type_node,
22859 unsigned_char_type_node,
22860 NULL_TREE);
22861 tree unsigned_ftype_unsigned_ushort
22862 = build_function_type_list (unsigned_type_node,
22863 unsigned_type_node,
22864 short_unsigned_type_node,
22865 NULL_TREE);
22866 tree unsigned_ftype_unsigned_unsigned
22867 = build_function_type_list (unsigned_type_node,
22868 unsigned_type_node,
22869 unsigned_type_node,
22870 NULL_TREE);
22871 tree uint64_ftype_uint64_uint64
22872 = build_function_type_list (long_long_unsigned_type_node,
22873 long_long_unsigned_type_node,
22874 long_long_unsigned_type_node,
22875 NULL_TREE);
22876 tree float_ftype_float
22877 = build_function_type_list (float_type_node,
22878 float_type_node,
22879 NULL_TREE);
22881 /* AVX builtins */
22882 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22883 V32QImode);
22884 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22885 V8SImode);
22886 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22887 V8SFmode);
22888 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22889 V4DImode);
22890 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22891 V4DFmode);
22892 tree v8sf_ftype_v8sf
22893 = build_function_type_list (V8SF_type_node,
22894 V8SF_type_node,
22895 NULL_TREE);
22896 tree v8si_ftype_v8sf
22897 = build_function_type_list (V8SI_type_node,
22898 V8SF_type_node,
22899 NULL_TREE);
22900 tree v8sf_ftype_v8si
22901 = build_function_type_list (V8SF_type_node,
22902 V8SI_type_node,
22903 NULL_TREE);
22904 tree v4si_ftype_v4df
22905 = build_function_type_list (V4SI_type_node,
22906 V4DF_type_node,
22907 NULL_TREE);
22908 tree v4df_ftype_v4df
22909 = build_function_type_list (V4DF_type_node,
22910 V4DF_type_node,
22911 NULL_TREE);
22912 tree v4df_ftype_v4si
22913 = build_function_type_list (V4DF_type_node,
22914 V4SI_type_node,
22915 NULL_TREE);
22916 tree v4df_ftype_v4sf
22917 = build_function_type_list (V4DF_type_node,
22918 V4SF_type_node,
22919 NULL_TREE);
22920 tree v4sf_ftype_v4df
22921 = build_function_type_list (V4SF_type_node,
22922 V4DF_type_node,
22923 NULL_TREE);
22924 tree v8sf_ftype_v8sf_v8sf
22925 = build_function_type_list (V8SF_type_node,
22926 V8SF_type_node, V8SF_type_node,
22927 NULL_TREE);
22928 tree v4df_ftype_v4df_v4df
22929 = build_function_type_list (V4DF_type_node,
22930 V4DF_type_node, V4DF_type_node,
22931 NULL_TREE);
22932 tree v8sf_ftype_v8sf_int
22933 = build_function_type_list (V8SF_type_node,
22934 V8SF_type_node, integer_type_node,
22935 NULL_TREE);
22936 tree v4si_ftype_v8si_int
22937 = build_function_type_list (V4SI_type_node,
22938 V8SI_type_node, integer_type_node,
22939 NULL_TREE);
22940 tree v4df_ftype_v4df_int
22941 = build_function_type_list (V4DF_type_node,
22942 V4DF_type_node, integer_type_node,
22943 NULL_TREE);
22944 tree v4sf_ftype_v8sf_int
22945 = build_function_type_list (V4SF_type_node,
22946 V8SF_type_node, integer_type_node,
22947 NULL_TREE);
22948 tree v2df_ftype_v4df_int
22949 = build_function_type_list (V2DF_type_node,
22950 V4DF_type_node, integer_type_node,
22951 NULL_TREE);
22952 tree v8sf_ftype_v8sf_v8sf_int
22953 = build_function_type_list (V8SF_type_node,
22954 V8SF_type_node, V8SF_type_node,
22955 integer_type_node,
22956 NULL_TREE);
22957 tree v8sf_ftype_v8sf_v8sf_v8sf
22958 = build_function_type_list (V8SF_type_node,
22959 V8SF_type_node, V8SF_type_node,
22960 V8SF_type_node,
22961 NULL_TREE);
22962 tree v4df_ftype_v4df_v4df_v4df
22963 = build_function_type_list (V4DF_type_node,
22964 V4DF_type_node, V4DF_type_node,
22965 V4DF_type_node,
22966 NULL_TREE);
22967 tree v8si_ftype_v8si_v8si_int
22968 = build_function_type_list (V8SI_type_node,
22969 V8SI_type_node, V8SI_type_node,
22970 integer_type_node,
22971 NULL_TREE);
22972 tree v4df_ftype_v4df_v4df_int
22973 = build_function_type_list (V4DF_type_node,
22974 V4DF_type_node, V4DF_type_node,
22975 integer_type_node,
22976 NULL_TREE);
22977 tree v8sf_ftype_pcfloat
22978 = build_function_type_list (V8SF_type_node,
22979 pcfloat_type_node,
22980 NULL_TREE);
22981 tree v4df_ftype_pcdouble
22982 = build_function_type_list (V4DF_type_node,
22983 pcdouble_type_node,
22984 NULL_TREE);
22985 tree pcv4sf_type_node
22986 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22987 tree pcv2df_type_node
22988 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22989 tree v8sf_ftype_pcv4sf
22990 = build_function_type_list (V8SF_type_node,
22991 pcv4sf_type_node,
22992 NULL_TREE);
22993 tree v4df_ftype_pcv2df
22994 = build_function_type_list (V4DF_type_node,
22995 pcv2df_type_node,
22996 NULL_TREE);
22997 tree v32qi_ftype_pcchar
22998 = build_function_type_list (V32QI_type_node,
22999 pcchar_type_node,
23000 NULL_TREE);
23001 tree void_ftype_pchar_v32qi
23002 = build_function_type_list (void_type_node,
23003 pchar_type_node, V32QI_type_node,
23004 NULL_TREE);
23005 tree v8si_ftype_v8si_v4si_int
23006 = build_function_type_list (V8SI_type_node,
23007 V8SI_type_node, V4SI_type_node,
23008 integer_type_node,
23009 NULL_TREE);
23010 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
23011 tree void_ftype_pv4di_v4di
23012 = build_function_type_list (void_type_node,
23013 pv4di_type_node, V4DI_type_node,
23014 NULL_TREE);
23015 tree v8sf_ftype_v8sf_v4sf_int
23016 = build_function_type_list (V8SF_type_node,
23017 V8SF_type_node, V4SF_type_node,
23018 integer_type_node,
23019 NULL_TREE);
23020 tree v4df_ftype_v4df_v2df_int
23021 = build_function_type_list (V4DF_type_node,
23022 V4DF_type_node, V2DF_type_node,
23023 integer_type_node,
23024 NULL_TREE);
23025 tree void_ftype_pfloat_v8sf
23026 = build_function_type_list (void_type_node,
23027 pfloat_type_node, V8SF_type_node,
23028 NULL_TREE);
23029 tree void_ftype_pdouble_v4df
23030 = build_function_type_list (void_type_node,
23031 pdouble_type_node, V4DF_type_node,
23032 NULL_TREE);
23033 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
23034 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
23035 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
23036 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
23037 tree pcv8sf_type_node
23038 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
23039 tree pcv4df_type_node
23040 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
23041 tree v8sf_ftype_pcv8sf_v8sf
23042 = build_function_type_list (V8SF_type_node,
23043 pcv8sf_type_node, V8SF_type_node,
23044 NULL_TREE);
23045 tree v4df_ftype_pcv4df_v4df
23046 = build_function_type_list (V4DF_type_node,
23047 pcv4df_type_node, V4DF_type_node,
23048 NULL_TREE);
23049 tree v4sf_ftype_pcv4sf_v4sf
23050 = build_function_type_list (V4SF_type_node,
23051 pcv4sf_type_node, V4SF_type_node,
23052 NULL_TREE);
23053 tree v2df_ftype_pcv2df_v2df
23054 = build_function_type_list (V2DF_type_node,
23055 pcv2df_type_node, V2DF_type_node,
23056 NULL_TREE);
23057 tree void_ftype_pv8sf_v8sf_v8sf
23058 = build_function_type_list (void_type_node,
23059 pv8sf_type_node, V8SF_type_node,
23060 V8SF_type_node,
23061 NULL_TREE);
23062 tree void_ftype_pv4df_v4df_v4df
23063 = build_function_type_list (void_type_node,
23064 pv4df_type_node, V4DF_type_node,
23065 V4DF_type_node,
23066 NULL_TREE);
23067 tree void_ftype_pv4sf_v4sf_v4sf
23068 = build_function_type_list (void_type_node,
23069 pv4sf_type_node, V4SF_type_node,
23070 V4SF_type_node,
23071 NULL_TREE);
23072 tree void_ftype_pv2df_v2df_v2df
23073 = build_function_type_list (void_type_node,
23074 pv2df_type_node, V2DF_type_node,
23075 V2DF_type_node,
23076 NULL_TREE);
23077 tree v4df_ftype_v2df
23078 = build_function_type_list (V4DF_type_node,
23079 V2DF_type_node,
23080 NULL_TREE);
23081 tree v8sf_ftype_v4sf
23082 = build_function_type_list (V8SF_type_node,
23083 V4SF_type_node,
23084 NULL_TREE);
23085 tree v8si_ftype_v4si
23086 = build_function_type_list (V8SI_type_node,
23087 V4SI_type_node,
23088 NULL_TREE);
23089 tree v2df_ftype_v4df
23090 = build_function_type_list (V2DF_type_node,
23091 V4DF_type_node,
23092 NULL_TREE);
23093 tree v4sf_ftype_v8sf
23094 = build_function_type_list (V4SF_type_node,
23095 V8SF_type_node,
23096 NULL_TREE);
23097 tree v4si_ftype_v8si
23098 = build_function_type_list (V4SI_type_node,
23099 V8SI_type_node,
23100 NULL_TREE);
23101 tree int_ftype_v4df
23102 = build_function_type_list (integer_type_node,
23103 V4DF_type_node,
23104 NULL_TREE);
23105 tree int_ftype_v8sf
23106 = build_function_type_list (integer_type_node,
23107 V8SF_type_node,
23108 NULL_TREE);
23109 tree int_ftype_v8sf_v8sf
23110 = build_function_type_list (integer_type_node,
23111 V8SF_type_node, V8SF_type_node,
23112 NULL_TREE);
23113 tree int_ftype_v4di_v4di
23114 = build_function_type_list (integer_type_node,
23115 V4DI_type_node, V4DI_type_node,
23116 NULL_TREE);
23117 tree int_ftype_v4df_v4df
23118 = build_function_type_list (integer_type_node,
23119 V4DF_type_node, V4DF_type_node,
23120 NULL_TREE);
23121 tree v8sf_ftype_v8sf_v8si
23122 = build_function_type_list (V8SF_type_node,
23123 V8SF_type_node, V8SI_type_node,
23124 NULL_TREE);
23125 tree v4df_ftype_v4df_v4di
23126 = build_function_type_list (V4DF_type_node,
23127 V4DF_type_node, V4DI_type_node,
23128 NULL_TREE);
23129 tree v4sf_ftype_v4sf_v4si
23130 = build_function_type_list (V4SF_type_node,
23131 V4SF_type_node, V4SI_type_node, NULL_TREE);
23132 tree v2df_ftype_v2df_v2di
23133 = build_function_type_list (V2DF_type_node,
23134 V2DF_type_node, V2DI_type_node, NULL_TREE);
23136 /* Integer intrinsics. */
23137 tree uint64_ftype_void
23138 = build_function_type (long_long_unsigned_type_node,
23139 void_list_node);
23140 tree int_ftype_int
23141 = build_function_type_list (integer_type_node,
23142 integer_type_node, NULL_TREE);
23143 tree int64_ftype_int64
23144 = build_function_type_list (long_long_integer_type_node,
23145 long_long_integer_type_node,
23146 NULL_TREE);
23147 tree uint64_ftype_int
23148 = build_function_type_list (long_long_unsigned_type_node,
23149 integer_type_node, NULL_TREE);
23150 tree punsigned_type_node = build_pointer_type (unsigned_type_node);
23151 tree uint64_ftype_punsigned
23152 = build_function_type_list (long_long_unsigned_type_node,
23153 punsigned_type_node, NULL_TREE);
23154 tree ushort_ftype_ushort_int
23155 = build_function_type_list (short_unsigned_type_node,
23156 short_unsigned_type_node,
23157 integer_type_node,
23158 NULL_TREE);
23159 tree uchar_ftype_uchar_int
23160 = build_function_type_list (unsigned_char_type_node,
23161 unsigned_char_type_node,
23162 integer_type_node,
23163 NULL_TREE);
23165 tree ftype;
23167 /* Add all special builtins with variable number of operands. */
23168 for (i = 0, d = bdesc_special_args;
23169 i < ARRAY_SIZE (bdesc_special_args);
23170 i++, d++)
23172 tree type;
23174 if (d->name == 0)
23175 continue;
23177 switch ((enum ix86_special_builtin_type) d->flag)
23179 case VOID_FTYPE_VOID:
23180 type = void_ftype_void;
23181 break;
23182 case UINT64_FTYPE_VOID:
23183 type = uint64_ftype_void;
23184 break;
23185 case UINT64_FTYPE_PUNSIGNED:
23186 type = uint64_ftype_punsigned;
23187 break;
23188 case V32QI_FTYPE_PCCHAR:
23189 type = v32qi_ftype_pcchar;
23190 break;
23191 case V16QI_FTYPE_PCCHAR:
23192 type = v16qi_ftype_pcchar;
23193 break;
23194 case V8SF_FTYPE_PCV4SF:
23195 type = v8sf_ftype_pcv4sf;
23196 break;
23197 case V8SF_FTYPE_PCFLOAT:
23198 type = v8sf_ftype_pcfloat;
23199 break;
23200 case V4DF_FTYPE_PCV2DF:
23201 type = v4df_ftype_pcv2df;
23202 break;
23203 case V4DF_FTYPE_PCDOUBLE:
23204 type = v4df_ftype_pcdouble;
23205 break;
23206 case V4SF_FTYPE_PCFLOAT:
23207 type = v4sf_ftype_pcfloat;
23208 break;
23209 case V2DI_FTYPE_PV2DI:
23210 type = v2di_ftype_pv2di;
23211 break;
23212 case V2DF_FTYPE_PCDOUBLE:
23213 type = v2df_ftype_pcdouble;
23214 break;
23215 case V8SF_FTYPE_PCV8SF_V8SF:
23216 type = v8sf_ftype_pcv8sf_v8sf;
23217 break;
23218 case V4DF_FTYPE_PCV4DF_V4DF:
23219 type = v4df_ftype_pcv4df_v4df;
23220 break;
23221 case V4SF_FTYPE_V4SF_PCV2SF:
23222 type = v4sf_ftype_v4sf_pcv2sf;
23223 break;
23224 case V4SF_FTYPE_PCV4SF_V4SF:
23225 type = v4sf_ftype_pcv4sf_v4sf;
23226 break;
23227 case V2DF_FTYPE_V2DF_PCDOUBLE:
23228 type = v2df_ftype_v2df_pcdouble;
23229 break;
23230 case V2DF_FTYPE_PCV2DF_V2DF:
23231 type = v2df_ftype_pcv2df_v2df;
23232 break;
23233 case VOID_FTYPE_PV2SF_V4SF:
23234 type = void_ftype_pv2sf_v4sf;
23235 break;
23236 case VOID_FTYPE_PV4DI_V4DI:
23237 type = void_ftype_pv4di_v4di;
23238 break;
23239 case VOID_FTYPE_PV2DI_V2DI:
23240 type = void_ftype_pv2di_v2di;
23241 break;
23242 case VOID_FTYPE_PCHAR_V32QI:
23243 type = void_ftype_pchar_v32qi;
23244 break;
23245 case VOID_FTYPE_PCHAR_V16QI:
23246 type = void_ftype_pchar_v16qi;
23247 break;
23248 case VOID_FTYPE_PFLOAT_V8SF:
23249 type = void_ftype_pfloat_v8sf;
23250 break;
23251 case VOID_FTYPE_PFLOAT_V4SF:
23252 type = void_ftype_pfloat_v4sf;
23253 break;
23254 case VOID_FTYPE_PDOUBLE_V4DF:
23255 type = void_ftype_pdouble_v4df;
23256 break;
23257 case VOID_FTYPE_PDOUBLE_V2DF:
23258 type = void_ftype_pdouble_v2df;
23259 break;
23260 case VOID_FTYPE_PDI_DI:
23261 type = void_ftype_pdi_di;
23262 break;
23263 case VOID_FTYPE_PINT_INT:
23264 type = void_ftype_pint_int;
23265 break;
23266 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23267 type = void_ftype_pv8sf_v8sf_v8sf;
23268 break;
23269 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23270 type = void_ftype_pv4df_v4df_v4df;
23271 break;
23272 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23273 type = void_ftype_pv4sf_v4sf_v4sf;
23274 break;
23275 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23276 type = void_ftype_pv2df_v2df_v2df;
23277 break;
23278 default:
23279 gcc_unreachable ();
23282 def_builtin (d->mask, d->name, type, d->code);
23285 /* Add all builtins with variable number of operands. */
23286 for (i = 0, d = bdesc_args;
23287 i < ARRAY_SIZE (bdesc_args);
23288 i++, d++)
23290 tree type;
23292 if (d->name == 0)
23293 continue;
23295 switch ((enum ix86_builtin_type) d->flag)
23297 case FLOAT_FTYPE_FLOAT:
23298 type = float_ftype_float;
23299 break;
23300 case INT_FTYPE_V8SF_V8SF_PTEST:
23301 type = int_ftype_v8sf_v8sf;
23302 break;
23303 case INT_FTYPE_V4DI_V4DI_PTEST:
23304 type = int_ftype_v4di_v4di;
23305 break;
23306 case INT_FTYPE_V4DF_V4DF_PTEST:
23307 type = int_ftype_v4df_v4df;
23308 break;
23309 case INT_FTYPE_V4SF_V4SF_PTEST:
23310 type = int_ftype_v4sf_v4sf;
23311 break;
23312 case INT_FTYPE_V2DI_V2DI_PTEST:
23313 type = int_ftype_v2di_v2di;
23314 break;
23315 case INT_FTYPE_V2DF_V2DF_PTEST:
23316 type = int_ftype_v2df_v2df;
23317 break;
23318 case INT_FTYPE_INT:
23319 type = int_ftype_int;
23320 break;
23321 case UINT64_FTYPE_INT:
23322 type = uint64_ftype_int;
23323 break;
23324 case INT64_FTYPE_INT64:
23325 type = int64_ftype_int64;
23326 break;
23327 case INT64_FTYPE_V4SF:
23328 type = int64_ftype_v4sf;
23329 break;
23330 case INT64_FTYPE_V2DF:
23331 type = int64_ftype_v2df;
23332 break;
23333 case INT_FTYPE_V16QI:
23334 type = int_ftype_v16qi;
23335 break;
23336 case INT_FTYPE_V8QI:
23337 type = int_ftype_v8qi;
23338 break;
23339 case INT_FTYPE_V8SF:
23340 type = int_ftype_v8sf;
23341 break;
23342 case INT_FTYPE_V4DF:
23343 type = int_ftype_v4df;
23344 break;
23345 case INT_FTYPE_V4SF:
23346 type = int_ftype_v4sf;
23347 break;
23348 case INT_FTYPE_V2DF:
23349 type = int_ftype_v2df;
23350 break;
23351 case V16QI_FTYPE_V16QI:
23352 type = v16qi_ftype_v16qi;
23353 break;
23354 case V8SI_FTYPE_V8SF:
23355 type = v8si_ftype_v8sf;
23356 break;
23357 case V8SI_FTYPE_V4SI:
23358 type = v8si_ftype_v4si;
23359 break;
23360 case V8HI_FTYPE_V8HI:
23361 type = v8hi_ftype_v8hi;
23362 break;
23363 case V8HI_FTYPE_V16QI:
23364 type = v8hi_ftype_v16qi;
23365 break;
23366 case V8QI_FTYPE_V8QI:
23367 type = v8qi_ftype_v8qi;
23368 break;
23369 case V8SF_FTYPE_V8SF:
23370 type = v8sf_ftype_v8sf;
23371 break;
23372 case V8SF_FTYPE_V8SI:
23373 type = v8sf_ftype_v8si;
23374 break;
23375 case V8SF_FTYPE_V4SF:
23376 type = v8sf_ftype_v4sf;
23377 break;
23378 case V4SI_FTYPE_V4DF:
23379 type = v4si_ftype_v4df;
23380 break;
23381 case V4SI_FTYPE_V4SI:
23382 type = v4si_ftype_v4si;
23383 break;
23384 case V4SI_FTYPE_V16QI:
23385 type = v4si_ftype_v16qi;
23386 break;
23387 case V4SI_FTYPE_V8SI:
23388 type = v4si_ftype_v8si;
23389 break;
23390 case V4SI_FTYPE_V8HI:
23391 type = v4si_ftype_v8hi;
23392 break;
23393 case V4SI_FTYPE_V4SF:
23394 type = v4si_ftype_v4sf;
23395 break;
23396 case V4SI_FTYPE_V2DF:
23397 type = v4si_ftype_v2df;
23398 break;
23399 case V4HI_FTYPE_V4HI:
23400 type = v4hi_ftype_v4hi;
23401 break;
23402 case V4DF_FTYPE_V4DF:
23403 type = v4df_ftype_v4df;
23404 break;
23405 case V4DF_FTYPE_V4SI:
23406 type = v4df_ftype_v4si;
23407 break;
23408 case V4DF_FTYPE_V4SF:
23409 type = v4df_ftype_v4sf;
23410 break;
23411 case V4DF_FTYPE_V2DF:
23412 type = v4df_ftype_v2df;
23413 break;
23414 case V4SF_FTYPE_V4SF:
23415 case V4SF_FTYPE_V4SF_VEC_MERGE:
23416 type = v4sf_ftype_v4sf;
23417 break;
23418 case V4SF_FTYPE_V8SF:
23419 type = v4sf_ftype_v8sf;
23420 break;
23421 case V4SF_FTYPE_V4SI:
23422 type = v4sf_ftype_v4si;
23423 break;
23424 case V4SF_FTYPE_V4DF:
23425 type = v4sf_ftype_v4df;
23426 break;
23427 case V4SF_FTYPE_V2DF:
23428 type = v4sf_ftype_v2df;
23429 break;
23430 case V2DI_FTYPE_V2DI:
23431 type = v2di_ftype_v2di;
23432 break;
23433 case V2DI_FTYPE_V16QI:
23434 type = v2di_ftype_v16qi;
23435 break;
23436 case V2DI_FTYPE_V8HI:
23437 type = v2di_ftype_v8hi;
23438 break;
23439 case V2DI_FTYPE_V4SI:
23440 type = v2di_ftype_v4si;
23441 break;
23442 case V2SI_FTYPE_V2SI:
23443 type = v2si_ftype_v2si;
23444 break;
23445 case V2SI_FTYPE_V4SF:
23446 type = v2si_ftype_v4sf;
23447 break;
23448 case V2SI_FTYPE_V2DF:
23449 type = v2si_ftype_v2df;
23450 break;
23451 case V2SI_FTYPE_V2SF:
23452 type = v2si_ftype_v2sf;
23453 break;
23454 case V2DF_FTYPE_V4DF:
23455 type = v2df_ftype_v4df;
23456 break;
23457 case V2DF_FTYPE_V4SF:
23458 type = v2df_ftype_v4sf;
23459 break;
23460 case V2DF_FTYPE_V2DF:
23461 case V2DF_FTYPE_V2DF_VEC_MERGE:
23462 type = v2df_ftype_v2df;
23463 break;
23464 case V2DF_FTYPE_V2SI:
23465 type = v2df_ftype_v2si;
23466 break;
23467 case V2DF_FTYPE_V4SI:
23468 type = v2df_ftype_v4si;
23469 break;
23470 case V2SF_FTYPE_V2SF:
23471 type = v2sf_ftype_v2sf;
23472 break;
23473 case V2SF_FTYPE_V2SI:
23474 type = v2sf_ftype_v2si;
23475 break;
23476 case V16QI_FTYPE_V16QI_V16QI:
23477 type = v16qi_ftype_v16qi_v16qi;
23478 break;
23479 case V16QI_FTYPE_V8HI_V8HI:
23480 type = v16qi_ftype_v8hi_v8hi;
23481 break;
23482 case V8QI_FTYPE_V8QI_V8QI:
23483 type = v8qi_ftype_v8qi_v8qi;
23484 break;
23485 case V8QI_FTYPE_V4HI_V4HI:
23486 type = v8qi_ftype_v4hi_v4hi;
23487 break;
23488 case V8HI_FTYPE_V8HI_V8HI:
23489 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23490 type = v8hi_ftype_v8hi_v8hi;
23491 break;
23492 case V8HI_FTYPE_V16QI_V16QI:
23493 type = v8hi_ftype_v16qi_v16qi;
23494 break;
23495 case V8HI_FTYPE_V4SI_V4SI:
23496 type = v8hi_ftype_v4si_v4si;
23497 break;
23498 case V8HI_FTYPE_V8HI_SI_COUNT:
23499 type = v8hi_ftype_v8hi_int;
23500 break;
23501 case V8SF_FTYPE_V8SF_V8SF:
23502 type = v8sf_ftype_v8sf_v8sf;
23503 break;
23504 case V8SF_FTYPE_V8SF_V8SI:
23505 type = v8sf_ftype_v8sf_v8si;
23506 break;
23507 case V4SI_FTYPE_V4SI_V4SI:
23508 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23509 type = v4si_ftype_v4si_v4si;
23510 break;
23511 case V4SI_FTYPE_V8HI_V8HI:
23512 type = v4si_ftype_v8hi_v8hi;
23513 break;
23514 case V4SI_FTYPE_V4SF_V4SF:
23515 type = v4si_ftype_v4sf_v4sf;
23516 break;
23517 case V4SI_FTYPE_V2DF_V2DF:
23518 type = v4si_ftype_v2df_v2df;
23519 break;
23520 case V4SI_FTYPE_V4SI_SI_COUNT:
23521 type = v4si_ftype_v4si_int;
23522 break;
23523 case V4HI_FTYPE_V4HI_V4HI:
23524 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23525 type = v4hi_ftype_v4hi_v4hi;
23526 break;
23527 case V4HI_FTYPE_V8QI_V8QI:
23528 type = v4hi_ftype_v8qi_v8qi;
23529 break;
23530 case V4HI_FTYPE_V2SI_V2SI:
23531 type = v4hi_ftype_v2si_v2si;
23532 break;
23533 case V4HI_FTYPE_V4HI_SI_COUNT:
23534 type = v4hi_ftype_v4hi_int;
23535 break;
23536 case V4DF_FTYPE_V4DF_V4DF:
23537 type = v4df_ftype_v4df_v4df;
23538 break;
23539 case V4DF_FTYPE_V4DF_V4DI:
23540 type = v4df_ftype_v4df_v4di;
23541 break;
23542 case V4SF_FTYPE_V4SF_V4SF:
23543 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23544 type = v4sf_ftype_v4sf_v4sf;
23545 break;
23546 case V4SF_FTYPE_V4SF_V4SI:
23547 type = v4sf_ftype_v4sf_v4si;
23548 break;
23549 case V4SF_FTYPE_V4SF_V2SI:
23550 type = v4sf_ftype_v4sf_v2si;
23551 break;
23552 case V4SF_FTYPE_V4SF_V2DF:
23553 type = v4sf_ftype_v4sf_v2df;
23554 break;
23555 case V4SF_FTYPE_V4SF_DI:
23556 type = v4sf_ftype_v4sf_int64;
23557 break;
23558 case V4SF_FTYPE_V4SF_SI:
23559 type = v4sf_ftype_v4sf_int;
23560 break;
23561 case V2DI_FTYPE_V2DI_V2DI:
23562 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23563 type = v2di_ftype_v2di_v2di;
23564 break;
23565 case V2DI_FTYPE_V16QI_V16QI:
23566 type = v2di_ftype_v16qi_v16qi;
23567 break;
23568 case V2DI_FTYPE_V4SI_V4SI:
23569 type = v2di_ftype_v4si_v4si;
23570 break;
23571 case V2DI_FTYPE_V2DI_V16QI:
23572 type = v2di_ftype_v2di_v16qi;
23573 break;
23574 case V2DI_FTYPE_V2DF_V2DF:
23575 type = v2di_ftype_v2df_v2df;
23576 break;
23577 case V2DI_FTYPE_V2DI_SI_COUNT:
23578 type = v2di_ftype_v2di_int;
23579 break;
23580 case V2SI_FTYPE_V2SI_V2SI:
23581 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23582 type = v2si_ftype_v2si_v2si;
23583 break;
23584 case V2SI_FTYPE_V4HI_V4HI:
23585 type = v2si_ftype_v4hi_v4hi;
23586 break;
23587 case V2SI_FTYPE_V2SF_V2SF:
23588 type = v2si_ftype_v2sf_v2sf;
23589 break;
23590 case V2SI_FTYPE_V2SI_SI_COUNT:
23591 type = v2si_ftype_v2si_int;
23592 break;
23593 case V2DF_FTYPE_V2DF_V2DF:
23594 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23595 type = v2df_ftype_v2df_v2df;
23596 break;
23597 case V2DF_FTYPE_V2DF_V4SF:
23598 type = v2df_ftype_v2df_v4sf;
23599 break;
23600 case V2DF_FTYPE_V2DF_V2DI:
23601 type = v2df_ftype_v2df_v2di;
23602 break;
23603 case V2DF_FTYPE_V2DF_DI:
23604 type = v2df_ftype_v2df_int64;
23605 break;
23606 case V2DF_FTYPE_V2DF_SI:
23607 type = v2df_ftype_v2df_int;
23608 break;
23609 case V2SF_FTYPE_V2SF_V2SF:
23610 type = v2sf_ftype_v2sf_v2sf;
23611 break;
23612 case V1DI_FTYPE_V1DI_V1DI:
23613 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23614 type = v1di_ftype_v1di_v1di;
23615 break;
23616 case V1DI_FTYPE_V8QI_V8QI:
23617 type = v1di_ftype_v8qi_v8qi;
23618 break;
23619 case V1DI_FTYPE_V2SI_V2SI:
23620 type = v1di_ftype_v2si_v2si;
23621 break;
23622 case V1DI_FTYPE_V1DI_SI_COUNT:
23623 type = v1di_ftype_v1di_int;
23624 break;
23625 case UINT64_FTYPE_UINT64_UINT64:
23626 type = uint64_ftype_uint64_uint64;
23627 break;
23628 case UINT_FTYPE_UINT_UINT:
23629 type = unsigned_ftype_unsigned_unsigned;
23630 break;
23631 case UINT_FTYPE_UINT_USHORT:
23632 type = unsigned_ftype_unsigned_ushort;
23633 break;
23634 case UINT_FTYPE_UINT_UCHAR:
23635 type = unsigned_ftype_unsigned_uchar;
23636 break;
23637 case UINT16_FTYPE_UINT16_INT:
23638 type = ushort_ftype_ushort_int;
23639 break;
23640 case UINT8_FTYPE_UINT8_INT:
23641 type = uchar_ftype_uchar_int;
23642 break;
23643 case V8HI_FTYPE_V8HI_INT:
23644 type = v8hi_ftype_v8hi_int;
23645 break;
23646 case V8SF_FTYPE_V8SF_INT:
23647 type = v8sf_ftype_v8sf_int;
23648 break;
23649 case V4SI_FTYPE_V4SI_INT:
23650 type = v4si_ftype_v4si_int;
23651 break;
23652 case V4SI_FTYPE_V8SI_INT:
23653 type = v4si_ftype_v8si_int;
23654 break;
23655 case V4HI_FTYPE_V4HI_INT:
23656 type = v4hi_ftype_v4hi_int;
23657 break;
23658 case V4DF_FTYPE_V4DF_INT:
23659 type = v4df_ftype_v4df_int;
23660 break;
23661 case V4SF_FTYPE_V4SF_INT:
23662 type = v4sf_ftype_v4sf_int;
23663 break;
23664 case V4SF_FTYPE_V8SF_INT:
23665 type = v4sf_ftype_v8sf_int;
23666 break;
23667 case V2DI_FTYPE_V2DI_INT:
23668 case V2DI2TI_FTYPE_V2DI_INT:
23669 type = v2di_ftype_v2di_int;
23670 break;
23671 case V2DF_FTYPE_V2DF_INT:
23672 type = v2df_ftype_v2df_int;
23673 break;
23674 case V2DF_FTYPE_V4DF_INT:
23675 type = v2df_ftype_v4df_int;
23676 break;
23677 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23678 type = v16qi_ftype_v16qi_v16qi_v16qi;
23679 break;
23680 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23681 type = v8sf_ftype_v8sf_v8sf_v8sf;
23682 break;
23683 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23684 type = v4df_ftype_v4df_v4df_v4df;
23685 break;
23686 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23687 type = v4sf_ftype_v4sf_v4sf_v4sf;
23688 break;
23689 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23690 type = v2df_ftype_v2df_v2df_v2df;
23691 break;
23692 case V16QI_FTYPE_V16QI_V16QI_INT:
23693 type = v16qi_ftype_v16qi_v16qi_int;
23694 break;
23695 case V8SI_FTYPE_V8SI_V8SI_INT:
23696 type = v8si_ftype_v8si_v8si_int;
23697 break;
23698 case V8SI_FTYPE_V8SI_V4SI_INT:
23699 type = v8si_ftype_v8si_v4si_int;
23700 break;
23701 case V8HI_FTYPE_V8HI_V8HI_INT:
23702 type = v8hi_ftype_v8hi_v8hi_int;
23703 break;
23704 case V8SF_FTYPE_V8SF_V8SF_INT:
23705 type = v8sf_ftype_v8sf_v8sf_int;
23706 break;
23707 case V8SF_FTYPE_V8SF_V4SF_INT:
23708 type = v8sf_ftype_v8sf_v4sf_int;
23709 break;
23710 case V4SI_FTYPE_V4SI_V4SI_INT:
23711 type = v4si_ftype_v4si_v4si_int;
23712 break;
23713 case V4DF_FTYPE_V4DF_V4DF_INT:
23714 type = v4df_ftype_v4df_v4df_int;
23715 break;
23716 case V4DF_FTYPE_V4DF_V2DF_INT:
23717 type = v4df_ftype_v4df_v2df_int;
23718 break;
23719 case V4SF_FTYPE_V4SF_V4SF_INT:
23720 type = v4sf_ftype_v4sf_v4sf_int;
23721 break;
23722 case V2DI_FTYPE_V2DI_V2DI_INT:
23723 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23724 type = v2di_ftype_v2di_v2di_int;
23725 break;
23726 case V2DF_FTYPE_V2DF_V2DF_INT:
23727 type = v2df_ftype_v2df_v2df_int;
23728 break;
23729 case V2DI_FTYPE_V2DI_UINT_UINT:
23730 type = v2di_ftype_v2di_unsigned_unsigned;
23731 break;
23732 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23733 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23734 break;
23735 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23736 type = v1di_ftype_v1di_v1di_int;
23737 break;
23738 default:
23739 gcc_unreachable ();
23742 def_builtin_const (d->mask, d->name, type, d->code);
23745 /* pcmpestr[im] insns. */
23746 for (i = 0, d = bdesc_pcmpestr;
23747 i < ARRAY_SIZE (bdesc_pcmpestr);
23748 i++, d++)
23750 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23751 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23752 else
23753 ftype = int_ftype_v16qi_int_v16qi_int_int;
23754 def_builtin_const (d->mask, d->name, ftype, d->code);
23757 /* pcmpistr[im] insns. */
23758 for (i = 0, d = bdesc_pcmpistr;
23759 i < ARRAY_SIZE (bdesc_pcmpistr);
23760 i++, d++)
23762 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23763 ftype = v16qi_ftype_v16qi_v16qi_int;
23764 else
23765 ftype = int_ftype_v16qi_v16qi_int;
23766 def_builtin_const (d->mask, d->name, ftype, d->code);
23769 /* comi/ucomi insns. */
23770 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23771 if (d->mask == OPTION_MASK_ISA_SSE2)
23772 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23773 else
23774 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23776 /* SSE */
23777 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23778 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23780 /* SSE or 3DNow!A */
23781 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23783 /* SSE2 */
23784 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23786 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23787 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23789 /* SSE3. */
23790 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23791 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23793 /* AES */
23794 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23795 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23796 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23797 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23798 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23799 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23801 /* PCLMUL */
23802 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23804 /* AVX */
23805 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23806 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23808 /* Access to the vec_init patterns. */
23809 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23810 integer_type_node, NULL_TREE);
23811 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23813 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23814 short_integer_type_node,
23815 short_integer_type_node,
23816 short_integer_type_node, NULL_TREE);
23817 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23819 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23820 char_type_node, char_type_node,
23821 char_type_node, char_type_node,
23822 char_type_node, char_type_node,
23823 char_type_node, NULL_TREE);
23824 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23826 /* Access to the vec_extract patterns. */
23827 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23828 integer_type_node, NULL_TREE);
23829 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23831 ftype = build_function_type_list (long_long_integer_type_node,
23832 V2DI_type_node, integer_type_node,
23833 NULL_TREE);
23834 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23836 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23837 integer_type_node, NULL_TREE);
23838 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23840 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23841 integer_type_node, NULL_TREE);
23842 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23844 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23845 integer_type_node, NULL_TREE);
23846 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23848 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23849 integer_type_node, NULL_TREE);
23850 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23852 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23853 integer_type_node, NULL_TREE);
23854 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23856 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23857 integer_type_node, NULL_TREE);
23858 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23860 /* Access to the vec_set patterns. */
23861 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23862 intDI_type_node,
23863 integer_type_node, NULL_TREE);
23864 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23866 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23867 float_type_node,
23868 integer_type_node, NULL_TREE);
23869 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23871 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23872 intSI_type_node,
23873 integer_type_node, NULL_TREE);
23874 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23876 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23877 intHI_type_node,
23878 integer_type_node, NULL_TREE);
23879 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23881 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23882 intHI_type_node,
23883 integer_type_node, NULL_TREE);
23884 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23886 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23887 intQI_type_node,
23888 integer_type_node, NULL_TREE);
23889 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23891 /* Add SSE5 multi-arg argument instructions */
23892 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23894 tree mtype = NULL_TREE;
23896 if (d->name == 0)
23897 continue;
23899 switch ((enum multi_arg_type)d->flag)
23901 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23902 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23903 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23904 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23905 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23906 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23907 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23908 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23909 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23910 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23911 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23912 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23913 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23914 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23915 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23916 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23917 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23918 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23919 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23920 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23921 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23922 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23923 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23924 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23925 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23926 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23927 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23928 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23929 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23930 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23931 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23932 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23933 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23934 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23935 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23936 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23937 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23938 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23939 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23940 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23941 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23942 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23943 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23944 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23945 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23946 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23947 case MULTI_ARG_UNKNOWN:
23948 default:
23949 gcc_unreachable ();
23952 if (mtype)
23953 def_builtin_const (d->mask, d->name, mtype, d->code);
23957 /* Internal method for ix86_init_builtins. */
23959 static void
23960 ix86_init_builtins_va_builtins_abi (void)
23962 tree ms_va_ref, sysv_va_ref;
23963 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23964 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23965 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23966 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23968 if (!TARGET_64BIT)
23969 return;
23970 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23971 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23972 ms_va_ref = build_reference_type (ms_va_list_type_node);
23973 sysv_va_ref =
23974 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23976 fnvoid_va_end_ms =
23977 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23978 fnvoid_va_start_ms =
23979 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23980 fnvoid_va_end_sysv =
23981 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23982 fnvoid_va_start_sysv =
23983 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23984 NULL_TREE);
23985 fnvoid_va_copy_ms =
23986 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23987 NULL_TREE);
23988 fnvoid_va_copy_sysv =
23989 build_function_type_list (void_type_node, sysv_va_ref,
23990 sysv_va_ref, NULL_TREE);
23992 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23993 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23994 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23995 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23996 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23997 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23998 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23999 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24000 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24001 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24002 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24003 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24006 static void
24007 ix86_init_builtins (void)
24009 tree float128_type_node = make_node (REAL_TYPE);
24010 tree ftype, decl;
24012 /* The __float80 type. */
24013 if (TYPE_MODE (long_double_type_node) == XFmode)
24014 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
24015 "__float80");
24016 else
24018 /* The __float80 type. */
24019 tree float80_type_node = make_node (REAL_TYPE);
24021 TYPE_PRECISION (float80_type_node) = 80;
24022 layout_type (float80_type_node);
24023 (*lang_hooks.types.register_builtin_type) (float80_type_node,
24024 "__float80");
24027 /* The __float128 type. */
24028 TYPE_PRECISION (float128_type_node) = 128;
24029 layout_type (float128_type_node);
24030 (*lang_hooks.types.register_builtin_type) (float128_type_node,
24031 "__float128");
24033 /* TFmode support builtins. */
24034 ftype = build_function_type (float128_type_node, void_list_node);
24035 decl = add_builtin_function ("__builtin_infq", ftype,
24036 IX86_BUILTIN_INFQ, BUILT_IN_MD,
24037 NULL, NULL_TREE);
24038 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
24040 decl = add_builtin_function ("__builtin_huge_valq", ftype,
24041 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
24042 NULL, NULL_TREE);
24043 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
24045 /* We will expand them to normal call if SSE2 isn't available since
24046 they are used by libgcc. */
24047 ftype = build_function_type_list (float128_type_node,
24048 float128_type_node,
24049 NULL_TREE);
24050 decl = add_builtin_function ("__builtin_fabsq", ftype,
24051 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
24052 "__fabstf2", NULL_TREE);
24053 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
24054 TREE_READONLY (decl) = 1;
24056 ftype = build_function_type_list (float128_type_node,
24057 float128_type_node,
24058 float128_type_node,
24059 NULL_TREE);
24060 decl = add_builtin_function ("__builtin_copysignq", ftype,
24061 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
24062 "__copysigntf3", NULL_TREE);
24063 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
24064 TREE_READONLY (decl) = 1;
24066 ix86_init_mmx_sse_builtins ();
24067 if (TARGET_64BIT)
24068 ix86_init_builtins_va_builtins_abi ();
24071 /* Errors in the source file can cause expand_expr to return const0_rtx
24072 where we expect a vector. To avoid crashing, use one of the vector
24073 clear instructions. */
24074 static rtx
24075 safe_vector_operand (rtx x, enum machine_mode mode)
24077 if (x == const0_rtx)
24078 x = CONST0_RTX (mode);
24079 return x;
24082 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24084 static rtx
24085 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24087 rtx pat;
24088 tree arg0 = CALL_EXPR_ARG (exp, 0);
24089 tree arg1 = CALL_EXPR_ARG (exp, 1);
24090 rtx op0 = expand_normal (arg0);
24091 rtx op1 = expand_normal (arg1);
24092 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24093 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24094 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24096 if (VECTOR_MODE_P (mode0))
24097 op0 = safe_vector_operand (op0, mode0);
24098 if (VECTOR_MODE_P (mode1))
24099 op1 = safe_vector_operand (op1, mode1);
24101 if (optimize || !target
24102 || GET_MODE (target) != tmode
24103 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24104 target = gen_reg_rtx (tmode);
24106 if (GET_MODE (op1) == SImode && mode1 == TImode)
24108 rtx x = gen_reg_rtx (V4SImode);
24109 emit_insn (gen_sse2_loadd (x, op1));
24110 op1 = gen_lowpart (TImode, x);
24113 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24114 op0 = copy_to_mode_reg (mode0, op0);
24115 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24116 op1 = copy_to_mode_reg (mode1, op1);
24118 pat = GEN_FCN (icode) (target, op0, op1);
24119 if (! pat)
24120 return 0;
24122 emit_insn (pat);
24124 return target;
24127 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24129 static rtx
24130 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24131 enum multi_arg_type m_type,
24132 enum rtx_code sub_code)
24134 rtx pat;
24135 int i;
24136 int nargs;
24137 bool comparison_p = false;
24138 bool tf_p = false;
24139 bool last_arg_constant = false;
24140 int num_memory = 0;
24141 struct {
24142 rtx op;
24143 enum machine_mode mode;
24144 } args[4];
24146 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24148 switch (m_type)
24150 case MULTI_ARG_3_SF:
24151 case MULTI_ARG_3_DF:
24152 case MULTI_ARG_3_DI:
24153 case MULTI_ARG_3_SI:
24154 case MULTI_ARG_3_SI_DI:
24155 case MULTI_ARG_3_HI:
24156 case MULTI_ARG_3_HI_SI:
24157 case MULTI_ARG_3_QI:
24158 case MULTI_ARG_3_PERMPS:
24159 case MULTI_ARG_3_PERMPD:
24160 nargs = 3;
24161 break;
24163 case MULTI_ARG_2_SF:
24164 case MULTI_ARG_2_DF:
24165 case MULTI_ARG_2_DI:
24166 case MULTI_ARG_2_SI:
24167 case MULTI_ARG_2_HI:
24168 case MULTI_ARG_2_QI:
24169 nargs = 2;
24170 break;
24172 case MULTI_ARG_2_DI_IMM:
24173 case MULTI_ARG_2_SI_IMM:
24174 case MULTI_ARG_2_HI_IMM:
24175 case MULTI_ARG_2_QI_IMM:
24176 nargs = 2;
24177 last_arg_constant = true;
24178 break;
24180 case MULTI_ARG_1_SF:
24181 case MULTI_ARG_1_DF:
24182 case MULTI_ARG_1_DI:
24183 case MULTI_ARG_1_SI:
24184 case MULTI_ARG_1_HI:
24185 case MULTI_ARG_1_QI:
24186 case MULTI_ARG_1_SI_DI:
24187 case MULTI_ARG_1_HI_DI:
24188 case MULTI_ARG_1_HI_SI:
24189 case MULTI_ARG_1_QI_DI:
24190 case MULTI_ARG_1_QI_SI:
24191 case MULTI_ARG_1_QI_HI:
24192 case MULTI_ARG_1_PH2PS:
24193 case MULTI_ARG_1_PS2PH:
24194 nargs = 1;
24195 break;
24197 case MULTI_ARG_2_SF_CMP:
24198 case MULTI_ARG_2_DF_CMP:
24199 case MULTI_ARG_2_DI_CMP:
24200 case MULTI_ARG_2_SI_CMP:
24201 case MULTI_ARG_2_HI_CMP:
24202 case MULTI_ARG_2_QI_CMP:
24203 nargs = 2;
24204 comparison_p = true;
24205 break;
24207 case MULTI_ARG_2_SF_TF:
24208 case MULTI_ARG_2_DF_TF:
24209 case MULTI_ARG_2_DI_TF:
24210 case MULTI_ARG_2_SI_TF:
24211 case MULTI_ARG_2_HI_TF:
24212 case MULTI_ARG_2_QI_TF:
24213 nargs = 2;
24214 tf_p = true;
24215 break;
24217 case MULTI_ARG_UNKNOWN:
24218 default:
24219 gcc_unreachable ();
24222 if (optimize || !target
24223 || GET_MODE (target) != tmode
24224 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24225 target = gen_reg_rtx (tmode);
24227 gcc_assert (nargs <= 4);
24229 for (i = 0; i < nargs; i++)
24231 tree arg = CALL_EXPR_ARG (exp, i);
24232 rtx op = expand_normal (arg);
24233 int adjust = (comparison_p) ? 1 : 0;
24234 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24236 if (last_arg_constant && i == nargs-1)
24238 if (!CONST_INT_P (op))
24240 error ("last argument must be an immediate");
24241 return gen_reg_rtx (tmode);
24244 else
24246 if (VECTOR_MODE_P (mode))
24247 op = safe_vector_operand (op, mode);
24249 /* If we aren't optimizing, only allow one memory operand to be
24250 generated. */
24251 if (memory_operand (op, mode))
24252 num_memory++;
24254 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24256 if (optimize
24257 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24258 || num_memory > 1)
24259 op = force_reg (mode, op);
24262 args[i].op = op;
24263 args[i].mode = mode;
24266 switch (nargs)
24268 case 1:
24269 pat = GEN_FCN (icode) (target, args[0].op);
24270 break;
24272 case 2:
24273 if (tf_p)
24274 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24275 GEN_INT ((int)sub_code));
24276 else if (! comparison_p)
24277 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24278 else
24280 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24281 args[0].op,
24282 args[1].op);
24284 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24286 break;
24288 case 3:
24289 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24290 break;
24292 default:
24293 gcc_unreachable ();
24296 if (! pat)
24297 return 0;
24299 emit_insn (pat);
24300 return target;
24303 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24304 insns with vec_merge. */
24306 static rtx
24307 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24308 rtx target)
24310 rtx pat;
24311 tree arg0 = CALL_EXPR_ARG (exp, 0);
24312 rtx op1, op0 = expand_normal (arg0);
24313 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24314 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24316 if (optimize || !target
24317 || GET_MODE (target) != tmode
24318 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24319 target = gen_reg_rtx (tmode);
24321 if (VECTOR_MODE_P (mode0))
24322 op0 = safe_vector_operand (op0, mode0);
24324 if ((optimize && !register_operand (op0, mode0))
24325 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24326 op0 = copy_to_mode_reg (mode0, op0);
24328 op1 = op0;
24329 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24330 op1 = copy_to_mode_reg (mode0, op1);
24332 pat = GEN_FCN (icode) (target, op0, op1);
24333 if (! pat)
24334 return 0;
24335 emit_insn (pat);
24336 return target;
24339 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24341 static rtx
24342 ix86_expand_sse_compare (const struct builtin_description *d,
24343 tree exp, rtx target, bool swap)
24345 rtx pat;
24346 tree arg0 = CALL_EXPR_ARG (exp, 0);
24347 tree arg1 = CALL_EXPR_ARG (exp, 1);
24348 rtx op0 = expand_normal (arg0);
24349 rtx op1 = expand_normal (arg1);
24350 rtx op2;
24351 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24352 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24353 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24354 enum rtx_code comparison = d->comparison;
24356 if (VECTOR_MODE_P (mode0))
24357 op0 = safe_vector_operand (op0, mode0);
24358 if (VECTOR_MODE_P (mode1))
24359 op1 = safe_vector_operand (op1, mode1);
24361 /* Swap operands if we have a comparison that isn't available in
24362 hardware. */
24363 if (swap)
24365 rtx tmp = gen_reg_rtx (mode1);
24366 emit_move_insn (tmp, op1);
24367 op1 = op0;
24368 op0 = tmp;
24371 if (optimize || !target
24372 || GET_MODE (target) != tmode
24373 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24374 target = gen_reg_rtx (tmode);
24376 if ((optimize && !register_operand (op0, mode0))
24377 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24378 op0 = copy_to_mode_reg (mode0, op0);
24379 if ((optimize && !register_operand (op1, mode1))
24380 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24381 op1 = copy_to_mode_reg (mode1, op1);
24383 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24384 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24385 if (! pat)
24386 return 0;
24387 emit_insn (pat);
24388 return target;
24391 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24393 static rtx
24394 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24395 rtx target)
24397 rtx pat;
24398 tree arg0 = CALL_EXPR_ARG (exp, 0);
24399 tree arg1 = CALL_EXPR_ARG (exp, 1);
24400 rtx op0 = expand_normal (arg0);
24401 rtx op1 = expand_normal (arg1);
24402 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24403 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24404 enum rtx_code comparison = d->comparison;
24406 if (VECTOR_MODE_P (mode0))
24407 op0 = safe_vector_operand (op0, mode0);
24408 if (VECTOR_MODE_P (mode1))
24409 op1 = safe_vector_operand (op1, mode1);
24411 /* Swap operands if we have a comparison that isn't available in
24412 hardware. */
24413 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24415 rtx tmp = op1;
24416 op1 = op0;
24417 op0 = tmp;
24420 target = gen_reg_rtx (SImode);
24421 emit_move_insn (target, const0_rtx);
24422 target = gen_rtx_SUBREG (QImode, target, 0);
24424 if ((optimize && !register_operand (op0, mode0))
24425 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24426 op0 = copy_to_mode_reg (mode0, op0);
24427 if ((optimize && !register_operand (op1, mode1))
24428 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24429 op1 = copy_to_mode_reg (mode1, op1);
24431 pat = GEN_FCN (d->icode) (op0, op1);
24432 if (! pat)
24433 return 0;
24434 emit_insn (pat);
24435 emit_insn (gen_rtx_SET (VOIDmode,
24436 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24437 gen_rtx_fmt_ee (comparison, QImode,
24438 SET_DEST (pat),
24439 const0_rtx)));
24441 return SUBREG_REG (target);
24444 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24446 static rtx
24447 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24448 rtx target)
24450 rtx pat;
24451 tree arg0 = CALL_EXPR_ARG (exp, 0);
24452 tree arg1 = CALL_EXPR_ARG (exp, 1);
24453 rtx op0 = expand_normal (arg0);
24454 rtx op1 = expand_normal (arg1);
24455 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24456 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24457 enum rtx_code comparison = d->comparison;
24459 if (VECTOR_MODE_P (mode0))
24460 op0 = safe_vector_operand (op0, mode0);
24461 if (VECTOR_MODE_P (mode1))
24462 op1 = safe_vector_operand (op1, mode1);
24464 target = gen_reg_rtx (SImode);
24465 emit_move_insn (target, const0_rtx);
24466 target = gen_rtx_SUBREG (QImode, target, 0);
24468 if ((optimize && !register_operand (op0, mode0))
24469 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24470 op0 = copy_to_mode_reg (mode0, op0);
24471 if ((optimize && !register_operand (op1, mode1))
24472 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24473 op1 = copy_to_mode_reg (mode1, op1);
24475 pat = GEN_FCN (d->icode) (op0, op1);
24476 if (! pat)
24477 return 0;
24478 emit_insn (pat);
24479 emit_insn (gen_rtx_SET (VOIDmode,
24480 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24481 gen_rtx_fmt_ee (comparison, QImode,
24482 SET_DEST (pat),
24483 const0_rtx)));
24485 return SUBREG_REG (target);
24488 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24490 static rtx
24491 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24492 tree exp, rtx target)
24494 rtx pat;
24495 tree arg0 = CALL_EXPR_ARG (exp, 0);
24496 tree arg1 = CALL_EXPR_ARG (exp, 1);
24497 tree arg2 = CALL_EXPR_ARG (exp, 2);
24498 tree arg3 = CALL_EXPR_ARG (exp, 3);
24499 tree arg4 = CALL_EXPR_ARG (exp, 4);
24500 rtx scratch0, scratch1;
24501 rtx op0 = expand_normal (arg0);
24502 rtx op1 = expand_normal (arg1);
24503 rtx op2 = expand_normal (arg2);
24504 rtx op3 = expand_normal (arg3);
24505 rtx op4 = expand_normal (arg4);
24506 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24508 tmode0 = insn_data[d->icode].operand[0].mode;
24509 tmode1 = insn_data[d->icode].operand[1].mode;
24510 modev2 = insn_data[d->icode].operand[2].mode;
24511 modei3 = insn_data[d->icode].operand[3].mode;
24512 modev4 = insn_data[d->icode].operand[4].mode;
24513 modei5 = insn_data[d->icode].operand[5].mode;
24514 modeimm = insn_data[d->icode].operand[6].mode;
24516 if (VECTOR_MODE_P (modev2))
24517 op0 = safe_vector_operand (op0, modev2);
24518 if (VECTOR_MODE_P (modev4))
24519 op2 = safe_vector_operand (op2, modev4);
24521 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24522 op0 = copy_to_mode_reg (modev2, op0);
24523 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24524 op1 = copy_to_mode_reg (modei3, op1);
24525 if ((optimize && !register_operand (op2, modev4))
24526 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24527 op2 = copy_to_mode_reg (modev4, op2);
24528 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24529 op3 = copy_to_mode_reg (modei5, op3);
24531 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24533 error ("the fifth argument must be a 8-bit immediate");
24534 return const0_rtx;
24537 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24539 if (optimize || !target
24540 || GET_MODE (target) != tmode0
24541 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24542 target = gen_reg_rtx (tmode0);
24544 scratch1 = gen_reg_rtx (tmode1);
24546 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24548 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24550 if (optimize || !target
24551 || GET_MODE (target) != tmode1
24552 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24553 target = gen_reg_rtx (tmode1);
24555 scratch0 = gen_reg_rtx (tmode0);
24557 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24559 else
24561 gcc_assert (d->flag);
24563 scratch0 = gen_reg_rtx (tmode0);
24564 scratch1 = gen_reg_rtx (tmode1);
24566 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24569 if (! pat)
24570 return 0;
24572 emit_insn (pat);
24574 if (d->flag)
24576 target = gen_reg_rtx (SImode);
24577 emit_move_insn (target, const0_rtx);
24578 target = gen_rtx_SUBREG (QImode, target, 0);
24580 emit_insn
24581 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24582 gen_rtx_fmt_ee (EQ, QImode,
24583 gen_rtx_REG ((enum machine_mode) d->flag,
24584 FLAGS_REG),
24585 const0_rtx)));
24586 return SUBREG_REG (target);
24588 else
24589 return target;
24593 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24595 static rtx
24596 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24597 tree exp, rtx target)
24599 rtx pat;
24600 tree arg0 = CALL_EXPR_ARG (exp, 0);
24601 tree arg1 = CALL_EXPR_ARG (exp, 1);
24602 tree arg2 = CALL_EXPR_ARG (exp, 2);
24603 rtx scratch0, scratch1;
24604 rtx op0 = expand_normal (arg0);
24605 rtx op1 = expand_normal (arg1);
24606 rtx op2 = expand_normal (arg2);
24607 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24609 tmode0 = insn_data[d->icode].operand[0].mode;
24610 tmode1 = insn_data[d->icode].operand[1].mode;
24611 modev2 = insn_data[d->icode].operand[2].mode;
24612 modev3 = insn_data[d->icode].operand[3].mode;
24613 modeimm = insn_data[d->icode].operand[4].mode;
24615 if (VECTOR_MODE_P (modev2))
24616 op0 = safe_vector_operand (op0, modev2);
24617 if (VECTOR_MODE_P (modev3))
24618 op1 = safe_vector_operand (op1, modev3);
24620 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24621 op0 = copy_to_mode_reg (modev2, op0);
24622 if ((optimize && !register_operand (op1, modev3))
24623 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24624 op1 = copy_to_mode_reg (modev3, op1);
24626 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24628 error ("the third argument must be a 8-bit immediate");
24629 return const0_rtx;
24632 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24634 if (optimize || !target
24635 || GET_MODE (target) != tmode0
24636 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24637 target = gen_reg_rtx (tmode0);
24639 scratch1 = gen_reg_rtx (tmode1);
24641 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24643 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24645 if (optimize || !target
24646 || GET_MODE (target) != tmode1
24647 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24648 target = gen_reg_rtx (tmode1);
24650 scratch0 = gen_reg_rtx (tmode0);
24652 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24654 else
24656 gcc_assert (d->flag);
24658 scratch0 = gen_reg_rtx (tmode0);
24659 scratch1 = gen_reg_rtx (tmode1);
24661 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24664 if (! pat)
24665 return 0;
24667 emit_insn (pat);
24669 if (d->flag)
24671 target = gen_reg_rtx (SImode);
24672 emit_move_insn (target, const0_rtx);
24673 target = gen_rtx_SUBREG (QImode, target, 0);
24675 emit_insn
24676 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24677 gen_rtx_fmt_ee (EQ, QImode,
24678 gen_rtx_REG ((enum machine_mode) d->flag,
24679 FLAGS_REG),
24680 const0_rtx)));
24681 return SUBREG_REG (target);
24683 else
24684 return target;
24687 /* Subroutine of ix86_expand_builtin to take care of insns with
24688 variable number of operands. */
24690 static rtx
24691 ix86_expand_args_builtin (const struct builtin_description *d,
24692 tree exp, rtx target)
24694 rtx pat, real_target;
24695 unsigned int i, nargs;
24696 unsigned int nargs_constant = 0;
24697 int num_memory = 0;
24698 struct
24700 rtx op;
24701 enum machine_mode mode;
24702 } args[4];
24703 bool last_arg_count = false;
24704 enum insn_code icode = d->icode;
24705 const struct insn_data *insn_p = &insn_data[icode];
24706 enum machine_mode tmode = insn_p->operand[0].mode;
24707 enum machine_mode rmode = VOIDmode;
24708 bool swap = false;
24709 enum rtx_code comparison = d->comparison;
24711 switch ((enum ix86_builtin_type) d->flag)
24713 case INT_FTYPE_V8SF_V8SF_PTEST:
24714 case INT_FTYPE_V4DI_V4DI_PTEST:
24715 case INT_FTYPE_V4DF_V4DF_PTEST:
24716 case INT_FTYPE_V4SF_V4SF_PTEST:
24717 case INT_FTYPE_V2DI_V2DI_PTEST:
24718 case INT_FTYPE_V2DF_V2DF_PTEST:
24719 return ix86_expand_sse_ptest (d, exp, target);
24720 case FLOAT128_FTYPE_FLOAT128:
24721 case FLOAT_FTYPE_FLOAT:
24722 case INT_FTYPE_INT:
24723 case UINT64_FTYPE_INT:
24724 case INT64_FTYPE_INT64:
24725 case INT64_FTYPE_V4SF:
24726 case INT64_FTYPE_V2DF:
24727 case INT_FTYPE_V16QI:
24728 case INT_FTYPE_V8QI:
24729 case INT_FTYPE_V8SF:
24730 case INT_FTYPE_V4DF:
24731 case INT_FTYPE_V4SF:
24732 case INT_FTYPE_V2DF:
24733 case V16QI_FTYPE_V16QI:
24734 case V8SI_FTYPE_V8SF:
24735 case V8SI_FTYPE_V4SI:
24736 case V8HI_FTYPE_V8HI:
24737 case V8HI_FTYPE_V16QI:
24738 case V8QI_FTYPE_V8QI:
24739 case V8SF_FTYPE_V8SF:
24740 case V8SF_FTYPE_V8SI:
24741 case V8SF_FTYPE_V4SF:
24742 case V4SI_FTYPE_V4SI:
24743 case V4SI_FTYPE_V16QI:
24744 case V4SI_FTYPE_V4SF:
24745 case V4SI_FTYPE_V8SI:
24746 case V4SI_FTYPE_V8HI:
24747 case V4SI_FTYPE_V4DF:
24748 case V4SI_FTYPE_V2DF:
24749 case V4HI_FTYPE_V4HI:
24750 case V4DF_FTYPE_V4DF:
24751 case V4DF_FTYPE_V4SI:
24752 case V4DF_FTYPE_V4SF:
24753 case V4DF_FTYPE_V2DF:
24754 case V4SF_FTYPE_V4SF:
24755 case V4SF_FTYPE_V4SI:
24756 case V4SF_FTYPE_V8SF:
24757 case V4SF_FTYPE_V4DF:
24758 case V4SF_FTYPE_V2DF:
24759 case V2DI_FTYPE_V2DI:
24760 case V2DI_FTYPE_V16QI:
24761 case V2DI_FTYPE_V8HI:
24762 case V2DI_FTYPE_V4SI:
24763 case V2DF_FTYPE_V2DF:
24764 case V2DF_FTYPE_V4SI:
24765 case V2DF_FTYPE_V4DF:
24766 case V2DF_FTYPE_V4SF:
24767 case V2DF_FTYPE_V2SI:
24768 case V2SI_FTYPE_V2SI:
24769 case V2SI_FTYPE_V4SF:
24770 case V2SI_FTYPE_V2SF:
24771 case V2SI_FTYPE_V2DF:
24772 case V2SF_FTYPE_V2SF:
24773 case V2SF_FTYPE_V2SI:
24774 nargs = 1;
24775 break;
24776 case V4SF_FTYPE_V4SF_VEC_MERGE:
24777 case V2DF_FTYPE_V2DF_VEC_MERGE:
24778 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24779 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24780 case V16QI_FTYPE_V16QI_V16QI:
24781 case V16QI_FTYPE_V8HI_V8HI:
24782 case V8QI_FTYPE_V8QI_V8QI:
24783 case V8QI_FTYPE_V4HI_V4HI:
24784 case V8HI_FTYPE_V8HI_V8HI:
24785 case V8HI_FTYPE_V16QI_V16QI:
24786 case V8HI_FTYPE_V4SI_V4SI:
24787 case V8SF_FTYPE_V8SF_V8SF:
24788 case V8SF_FTYPE_V8SF_V8SI:
24789 case V4SI_FTYPE_V4SI_V4SI:
24790 case V4SI_FTYPE_V8HI_V8HI:
24791 case V4SI_FTYPE_V4SF_V4SF:
24792 case V4SI_FTYPE_V2DF_V2DF:
24793 case V4HI_FTYPE_V4HI_V4HI:
24794 case V4HI_FTYPE_V8QI_V8QI:
24795 case V4HI_FTYPE_V2SI_V2SI:
24796 case V4DF_FTYPE_V4DF_V4DF:
24797 case V4DF_FTYPE_V4DF_V4DI:
24798 case V4SF_FTYPE_V4SF_V4SF:
24799 case V4SF_FTYPE_V4SF_V4SI:
24800 case V4SF_FTYPE_V4SF_V2SI:
24801 case V4SF_FTYPE_V4SF_V2DF:
24802 case V4SF_FTYPE_V4SF_DI:
24803 case V4SF_FTYPE_V4SF_SI:
24804 case V2DI_FTYPE_V2DI_V2DI:
24805 case V2DI_FTYPE_V16QI_V16QI:
24806 case V2DI_FTYPE_V4SI_V4SI:
24807 case V2DI_FTYPE_V2DI_V16QI:
24808 case V2DI_FTYPE_V2DF_V2DF:
24809 case V2SI_FTYPE_V2SI_V2SI:
24810 case V2SI_FTYPE_V4HI_V4HI:
24811 case V2SI_FTYPE_V2SF_V2SF:
24812 case V2DF_FTYPE_V2DF_V2DF:
24813 case V2DF_FTYPE_V2DF_V4SF:
24814 case V2DF_FTYPE_V2DF_V2DI:
24815 case V2DF_FTYPE_V2DF_DI:
24816 case V2DF_FTYPE_V2DF_SI:
24817 case V2SF_FTYPE_V2SF_V2SF:
24818 case V1DI_FTYPE_V1DI_V1DI:
24819 case V1DI_FTYPE_V8QI_V8QI:
24820 case V1DI_FTYPE_V2SI_V2SI:
24821 if (comparison == UNKNOWN)
24822 return ix86_expand_binop_builtin (icode, exp, target);
24823 nargs = 2;
24824 break;
24825 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24826 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24827 gcc_assert (comparison != UNKNOWN);
24828 nargs = 2;
24829 swap = true;
24830 break;
24831 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24832 case V8HI_FTYPE_V8HI_SI_COUNT:
24833 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24834 case V4SI_FTYPE_V4SI_SI_COUNT:
24835 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24836 case V4HI_FTYPE_V4HI_SI_COUNT:
24837 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24838 case V2DI_FTYPE_V2DI_SI_COUNT:
24839 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24840 case V2SI_FTYPE_V2SI_SI_COUNT:
24841 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24842 case V1DI_FTYPE_V1DI_SI_COUNT:
24843 nargs = 2;
24844 last_arg_count = true;
24845 break;
24846 case UINT64_FTYPE_UINT64_UINT64:
24847 case UINT_FTYPE_UINT_UINT:
24848 case UINT_FTYPE_UINT_USHORT:
24849 case UINT_FTYPE_UINT_UCHAR:
24850 case UINT16_FTYPE_UINT16_INT:
24851 case UINT8_FTYPE_UINT8_INT:
24852 nargs = 2;
24853 break;
24854 case V2DI2TI_FTYPE_V2DI_INT:
24855 nargs = 2;
24856 rmode = V2DImode;
24857 nargs_constant = 1;
24858 break;
24859 case V8HI_FTYPE_V8HI_INT:
24860 case V8SF_FTYPE_V8SF_INT:
24861 case V4SI_FTYPE_V4SI_INT:
24862 case V4SI_FTYPE_V8SI_INT:
24863 case V4HI_FTYPE_V4HI_INT:
24864 case V4DF_FTYPE_V4DF_INT:
24865 case V4SF_FTYPE_V4SF_INT:
24866 case V4SF_FTYPE_V8SF_INT:
24867 case V2DI_FTYPE_V2DI_INT:
24868 case V2DF_FTYPE_V2DF_INT:
24869 case V2DF_FTYPE_V4DF_INT:
24870 nargs = 2;
24871 nargs_constant = 1;
24872 break;
24873 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24874 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24875 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24876 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24877 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24878 nargs = 3;
24879 break;
24880 case V16QI_FTYPE_V16QI_V16QI_INT:
24881 case V8HI_FTYPE_V8HI_V8HI_INT:
24882 case V8SI_FTYPE_V8SI_V8SI_INT:
24883 case V8SI_FTYPE_V8SI_V4SI_INT:
24884 case V8SF_FTYPE_V8SF_V8SF_INT:
24885 case V8SF_FTYPE_V8SF_V4SF_INT:
24886 case V4SI_FTYPE_V4SI_V4SI_INT:
24887 case V4DF_FTYPE_V4DF_V4DF_INT:
24888 case V4DF_FTYPE_V4DF_V2DF_INT:
24889 case V4SF_FTYPE_V4SF_V4SF_INT:
24890 case V2DI_FTYPE_V2DI_V2DI_INT:
24891 case V2DF_FTYPE_V2DF_V2DF_INT:
24892 nargs = 3;
24893 nargs_constant = 1;
24894 break;
24895 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24896 nargs = 3;
24897 rmode = V2DImode;
24898 nargs_constant = 1;
24899 break;
24900 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24901 nargs = 3;
24902 rmode = DImode;
24903 nargs_constant = 1;
24904 break;
24905 case V2DI_FTYPE_V2DI_UINT_UINT:
24906 nargs = 3;
24907 nargs_constant = 2;
24908 break;
24909 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24910 nargs = 4;
24911 nargs_constant = 2;
24912 break;
24913 default:
24914 gcc_unreachable ();
24917 gcc_assert (nargs <= ARRAY_SIZE (args));
24919 if (comparison != UNKNOWN)
24921 gcc_assert (nargs == 2);
24922 return ix86_expand_sse_compare (d, exp, target, swap);
24925 if (rmode == VOIDmode || rmode == tmode)
24927 if (optimize
24928 || target == 0
24929 || GET_MODE (target) != tmode
24930 || ! (*insn_p->operand[0].predicate) (target, tmode))
24931 target = gen_reg_rtx (tmode);
24932 real_target = target;
24934 else
24936 target = gen_reg_rtx (rmode);
24937 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24940 for (i = 0; i < nargs; i++)
24942 tree arg = CALL_EXPR_ARG (exp, i);
24943 rtx op = expand_normal (arg);
24944 enum machine_mode mode = insn_p->operand[i + 1].mode;
24945 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24947 if (last_arg_count && (i + 1) == nargs)
24949 /* SIMD shift insns take either an 8-bit immediate or
24950 register as count. But builtin functions take int as
24951 count. If count doesn't match, we put it in register. */
24952 if (!match)
24954 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24955 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24956 op = copy_to_reg (op);
24959 else if ((nargs - i) <= nargs_constant)
24961 if (!match)
24962 switch (icode)
24964 case CODE_FOR_sse4_1_roundpd:
24965 case CODE_FOR_sse4_1_roundps:
24966 case CODE_FOR_sse4_1_roundsd:
24967 case CODE_FOR_sse4_1_roundss:
24968 case CODE_FOR_sse4_1_blendps:
24969 case CODE_FOR_avx_blendpd256:
24970 case CODE_FOR_avx_vpermilv4df:
24971 case CODE_FOR_avx_roundpd256:
24972 case CODE_FOR_avx_roundps256:
24973 error ("the last argument must be a 4-bit immediate");
24974 return const0_rtx;
24976 case CODE_FOR_sse4_1_blendpd:
24977 case CODE_FOR_avx_vpermilv2df:
24978 error ("the last argument must be a 2-bit immediate");
24979 return const0_rtx;
24981 case CODE_FOR_avx_vextractf128v4df:
24982 case CODE_FOR_avx_vextractf128v8sf:
24983 case CODE_FOR_avx_vextractf128v8si:
24984 case CODE_FOR_avx_vinsertf128v4df:
24985 case CODE_FOR_avx_vinsertf128v8sf:
24986 case CODE_FOR_avx_vinsertf128v8si:
24987 error ("the last argument must be a 1-bit immediate");
24988 return const0_rtx;
24990 case CODE_FOR_avx_cmpsdv2df3:
24991 case CODE_FOR_avx_cmpssv4sf3:
24992 case CODE_FOR_avx_cmppdv2df3:
24993 case CODE_FOR_avx_cmppsv4sf3:
24994 case CODE_FOR_avx_cmppdv4df3:
24995 case CODE_FOR_avx_cmppsv8sf3:
24996 error ("the last argument must be a 5-bit immediate");
24997 return const0_rtx;
24999 default:
25000 switch (nargs_constant)
25002 case 2:
25003 if ((nargs - i) == nargs_constant)
25005 error ("the next to last argument must be an 8-bit immediate");
25006 break;
25008 case 1:
25009 error ("the last argument must be an 8-bit immediate");
25010 break;
25011 default:
25012 gcc_unreachable ();
25014 return const0_rtx;
25017 else
25019 if (VECTOR_MODE_P (mode))
25020 op = safe_vector_operand (op, mode);
25022 /* If we aren't optimizing, only allow one memory operand to
25023 be generated. */
25024 if (memory_operand (op, mode))
25025 num_memory++;
25027 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25029 if (optimize || !match || num_memory > 1)
25030 op = copy_to_mode_reg (mode, op);
25032 else
25034 op = copy_to_reg (op);
25035 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25039 args[i].op = op;
25040 args[i].mode = mode;
25043 switch (nargs)
25045 case 1:
25046 pat = GEN_FCN (icode) (real_target, args[0].op);
25047 break;
25048 case 2:
25049 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25050 break;
25051 case 3:
25052 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25053 args[2].op);
25054 break;
25055 case 4:
25056 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25057 args[2].op, args[3].op);
25058 break;
25059 default:
25060 gcc_unreachable ();
25063 if (! pat)
25064 return 0;
25066 emit_insn (pat);
25067 return target;
25070 /* Subroutine of ix86_expand_builtin to take care of special insns
25071 with variable number of operands. */
25073 static rtx
25074 ix86_expand_special_args_builtin (const struct builtin_description *d,
25075 tree exp, rtx target)
25077 tree arg;
25078 rtx pat, op;
25079 unsigned int i, nargs, arg_adjust, memory;
25080 struct
25082 rtx op;
25083 enum machine_mode mode;
25084 } args[2];
25085 enum insn_code icode = d->icode;
25086 bool last_arg_constant = false;
25087 const struct insn_data *insn_p = &insn_data[icode];
25088 enum machine_mode tmode = insn_p->operand[0].mode;
25089 enum { load, store } klass;
25091 switch ((enum ix86_special_builtin_type) d->flag)
25093 case VOID_FTYPE_VOID:
25094 emit_insn (GEN_FCN (icode) (target));
25095 return 0;
25096 case UINT64_FTYPE_VOID:
25097 nargs = 0;
25098 klass = load;
25099 memory = 0;
25100 break;
25101 case UINT64_FTYPE_PUNSIGNED:
25102 case V2DI_FTYPE_PV2DI:
25103 case V32QI_FTYPE_PCCHAR:
25104 case V16QI_FTYPE_PCCHAR:
25105 case V8SF_FTYPE_PCV4SF:
25106 case V8SF_FTYPE_PCFLOAT:
25107 case V4SF_FTYPE_PCFLOAT:
25108 case V4DF_FTYPE_PCV2DF:
25109 case V4DF_FTYPE_PCDOUBLE:
25110 case V2DF_FTYPE_PCDOUBLE:
25111 nargs = 1;
25112 klass = load;
25113 memory = 0;
25114 break;
25115 case VOID_FTYPE_PV2SF_V4SF:
25116 case VOID_FTYPE_PV4DI_V4DI:
25117 case VOID_FTYPE_PV2DI_V2DI:
25118 case VOID_FTYPE_PCHAR_V32QI:
25119 case VOID_FTYPE_PCHAR_V16QI:
25120 case VOID_FTYPE_PFLOAT_V8SF:
25121 case VOID_FTYPE_PFLOAT_V4SF:
25122 case VOID_FTYPE_PDOUBLE_V4DF:
25123 case VOID_FTYPE_PDOUBLE_V2DF:
25124 case VOID_FTYPE_PDI_DI:
25125 case VOID_FTYPE_PINT_INT:
25126 nargs = 1;
25127 klass = store;
25128 /* Reserve memory operand for target. */
25129 memory = ARRAY_SIZE (args);
25130 break;
25131 case V4SF_FTYPE_V4SF_PCV2SF:
25132 case V2DF_FTYPE_V2DF_PCDOUBLE:
25133 nargs = 2;
25134 klass = load;
25135 memory = 1;
25136 break;
25137 case V8SF_FTYPE_PCV8SF_V8SF:
25138 case V4DF_FTYPE_PCV4DF_V4DF:
25139 case V4SF_FTYPE_PCV4SF_V4SF:
25140 case V2DF_FTYPE_PCV2DF_V2DF:
25141 nargs = 2;
25142 klass = load;
25143 memory = 0;
25144 break;
25145 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25146 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25147 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25148 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25149 nargs = 2;
25150 klass = store;
25151 /* Reserve memory operand for target. */
25152 memory = ARRAY_SIZE (args);
25153 break;
25154 default:
25155 gcc_unreachable ();
25158 gcc_assert (nargs <= ARRAY_SIZE (args));
25160 if (klass == store)
25162 arg = CALL_EXPR_ARG (exp, 0);
25163 op = expand_normal (arg);
25164 gcc_assert (target == 0);
25165 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25166 arg_adjust = 1;
25168 else
25170 arg_adjust = 0;
25171 if (optimize
25172 || target == 0
25173 || GET_MODE (target) != tmode
25174 || ! (*insn_p->operand[0].predicate) (target, tmode))
25175 target = gen_reg_rtx (tmode);
25178 for (i = 0; i < nargs; i++)
25180 enum machine_mode mode = insn_p->operand[i + 1].mode;
25181 bool match;
25183 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25184 op = expand_normal (arg);
25185 match = (*insn_p->operand[i + 1].predicate) (op, mode);
25187 if (last_arg_constant && (i + 1) == nargs)
25189 if (!match)
25190 switch (icode)
25192 default:
25193 error ("the last argument must be an 8-bit immediate");
25194 return const0_rtx;
25197 else
25199 if (i == memory)
25201 /* This must be the memory operand. */
25202 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25203 gcc_assert (GET_MODE (op) == mode
25204 || GET_MODE (op) == VOIDmode);
25206 else
25208 /* This must be register. */
25209 if (VECTOR_MODE_P (mode))
25210 op = safe_vector_operand (op, mode);
25212 gcc_assert (GET_MODE (op) == mode
25213 || GET_MODE (op) == VOIDmode);
25214 op = copy_to_mode_reg (mode, op);
25218 args[i].op = op;
25219 args[i].mode = mode;
25222 switch (nargs)
25224 case 0:
25225 pat = GEN_FCN (icode) (target);
25226 break;
25227 case 1:
25228 pat = GEN_FCN (icode) (target, args[0].op);
25229 break;
25230 case 2:
25231 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25232 break;
25233 default:
25234 gcc_unreachable ();
25237 if (! pat)
25238 return 0;
25239 emit_insn (pat);
25240 return klass == store ? 0 : target;
25243 /* Return the integer constant in ARG. Constrain it to be in the range
25244 of the subparts of VEC_TYPE; issue an error if not. */
25246 static int
25247 get_element_number (tree vec_type, tree arg)
25249 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25251 if (!host_integerp (arg, 1)
25252 || (elt = tree_low_cst (arg, 1), elt > max))
25254 error ("selector must be an integer constant in the range 0..%wi", max);
25255 return 0;
25258 return elt;
25261 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25262 ix86_expand_vector_init. We DO have language-level syntax for this, in
25263 the form of (type){ init-list }. Except that since we can't place emms
25264 instructions from inside the compiler, we can't allow the use of MMX
25265 registers unless the user explicitly asks for it. So we do *not* define
25266 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25267 we have builtins invoked by mmintrin.h that gives us license to emit
25268 these sorts of instructions. */
25270 static rtx
25271 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25273 enum machine_mode tmode = TYPE_MODE (type);
25274 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25275 int i, n_elt = GET_MODE_NUNITS (tmode);
25276 rtvec v = rtvec_alloc (n_elt);
25278 gcc_assert (VECTOR_MODE_P (tmode));
25279 gcc_assert (call_expr_nargs (exp) == n_elt);
25281 for (i = 0; i < n_elt; ++i)
25283 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25284 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25287 if (!target || !register_operand (target, tmode))
25288 target = gen_reg_rtx (tmode);
25290 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25291 return target;
25294 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25295 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25296 had a language-level syntax for referencing vector elements. */
25298 static rtx
25299 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25301 enum machine_mode tmode, mode0;
25302 tree arg0, arg1;
25303 int elt;
25304 rtx op0;
25306 arg0 = CALL_EXPR_ARG (exp, 0);
25307 arg1 = CALL_EXPR_ARG (exp, 1);
25309 op0 = expand_normal (arg0);
25310 elt = get_element_number (TREE_TYPE (arg0), arg1);
25312 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25313 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25314 gcc_assert (VECTOR_MODE_P (mode0));
25316 op0 = force_reg (mode0, op0);
25318 if (optimize || !target || !register_operand (target, tmode))
25319 target = gen_reg_rtx (tmode);
25321 ix86_expand_vector_extract (true, target, op0, elt);
25323 return target;
25326 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25327 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25328 a language-level syntax for referencing vector elements. */
25330 static rtx
25331 ix86_expand_vec_set_builtin (tree exp)
25333 enum machine_mode tmode, mode1;
25334 tree arg0, arg1, arg2;
25335 int elt;
25336 rtx op0, op1, target;
25338 arg0 = CALL_EXPR_ARG (exp, 0);
25339 arg1 = CALL_EXPR_ARG (exp, 1);
25340 arg2 = CALL_EXPR_ARG (exp, 2);
25342 tmode = TYPE_MODE (TREE_TYPE (arg0));
25343 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25344 gcc_assert (VECTOR_MODE_P (tmode));
25346 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25347 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25348 elt = get_element_number (TREE_TYPE (arg0), arg2);
25350 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25351 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25353 op0 = force_reg (tmode, op0);
25354 op1 = force_reg (mode1, op1);
25356 /* OP0 is the source of these builtin functions and shouldn't be
25357 modified. Create a copy, use it and return it as target. */
25358 target = gen_reg_rtx (tmode);
25359 emit_move_insn (target, op0);
25360 ix86_expand_vector_set (true, target, op1, elt);
25362 return target;
25365 /* Expand an expression EXP that calls a built-in function,
25366 with result going to TARGET if that's convenient
25367 (and in mode MODE if that's convenient).
25368 SUBTARGET may be used as the target for computing one of EXP's operands.
25369 IGNORE is nonzero if the value is to be ignored. */
25371 static rtx
25372 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25373 enum machine_mode mode ATTRIBUTE_UNUSED,
25374 int ignore ATTRIBUTE_UNUSED)
25376 const struct builtin_description *d;
25377 size_t i;
25378 enum insn_code icode;
25379 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25380 tree arg0, arg1, arg2;
25381 rtx op0, op1, op2, pat;
25382 enum machine_mode mode0, mode1, mode2;
25383 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25385 /* Determine whether the builtin function is available under the current ISA.
25386 Originally the builtin was not created if it wasn't applicable to the
25387 current ISA based on the command line switches. With function specific
25388 options, we need to check in the context of the function making the call
25389 whether it is supported. */
25390 if (ix86_builtins_isa[fcode].isa
25391 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25393 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25394 NULL, NULL, false);
25396 if (!opts)
25397 error ("%qE needs unknown isa option", fndecl);
25398 else
25400 gcc_assert (opts != NULL);
25401 error ("%qE needs isa option %s", fndecl, opts);
25402 free (opts);
25404 return const0_rtx;
25407 switch (fcode)
25409 case IX86_BUILTIN_MASKMOVQ:
25410 case IX86_BUILTIN_MASKMOVDQU:
25411 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25412 ? CODE_FOR_mmx_maskmovq
25413 : CODE_FOR_sse2_maskmovdqu);
25414 /* Note the arg order is different from the operand order. */
25415 arg1 = CALL_EXPR_ARG (exp, 0);
25416 arg2 = CALL_EXPR_ARG (exp, 1);
25417 arg0 = CALL_EXPR_ARG (exp, 2);
25418 op0 = expand_normal (arg0);
25419 op1 = expand_normal (arg1);
25420 op2 = expand_normal (arg2);
25421 mode0 = insn_data[icode].operand[0].mode;
25422 mode1 = insn_data[icode].operand[1].mode;
25423 mode2 = insn_data[icode].operand[2].mode;
25425 op0 = force_reg (Pmode, op0);
25426 op0 = gen_rtx_MEM (mode1, op0);
25428 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25429 op0 = copy_to_mode_reg (mode0, op0);
25430 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25431 op1 = copy_to_mode_reg (mode1, op1);
25432 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25433 op2 = copy_to_mode_reg (mode2, op2);
25434 pat = GEN_FCN (icode) (op0, op1, op2);
25435 if (! pat)
25436 return 0;
25437 emit_insn (pat);
25438 return 0;
25440 case IX86_BUILTIN_LDMXCSR:
25441 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25442 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25443 emit_move_insn (target, op0);
25444 emit_insn (gen_sse_ldmxcsr (target));
25445 return 0;
25447 case IX86_BUILTIN_STMXCSR:
25448 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25449 emit_insn (gen_sse_stmxcsr (target));
25450 return copy_to_mode_reg (SImode, target);
25452 case IX86_BUILTIN_CLFLUSH:
25453 arg0 = CALL_EXPR_ARG (exp, 0);
25454 op0 = expand_normal (arg0);
25455 icode = CODE_FOR_sse2_clflush;
25456 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25457 op0 = copy_to_mode_reg (Pmode, op0);
25459 emit_insn (gen_sse2_clflush (op0));
25460 return 0;
25462 case IX86_BUILTIN_MONITOR:
25463 arg0 = CALL_EXPR_ARG (exp, 0);
25464 arg1 = CALL_EXPR_ARG (exp, 1);
25465 arg2 = CALL_EXPR_ARG (exp, 2);
25466 op0 = expand_normal (arg0);
25467 op1 = expand_normal (arg1);
25468 op2 = expand_normal (arg2);
25469 if (!REG_P (op0))
25470 op0 = copy_to_mode_reg (Pmode, op0);
25471 if (!REG_P (op1))
25472 op1 = copy_to_mode_reg (SImode, op1);
25473 if (!REG_P (op2))
25474 op2 = copy_to_mode_reg (SImode, op2);
25475 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25476 return 0;
25478 case IX86_BUILTIN_MWAIT:
25479 arg0 = CALL_EXPR_ARG (exp, 0);
25480 arg1 = CALL_EXPR_ARG (exp, 1);
25481 op0 = expand_normal (arg0);
25482 op1 = expand_normal (arg1);
25483 if (!REG_P (op0))
25484 op0 = copy_to_mode_reg (SImode, op0);
25485 if (!REG_P (op1))
25486 op1 = copy_to_mode_reg (SImode, op1);
25487 emit_insn (gen_sse3_mwait (op0, op1));
25488 return 0;
25490 case IX86_BUILTIN_VEC_INIT_V2SI:
25491 case IX86_BUILTIN_VEC_INIT_V4HI:
25492 case IX86_BUILTIN_VEC_INIT_V8QI:
25493 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25495 case IX86_BUILTIN_VEC_EXT_V2DF:
25496 case IX86_BUILTIN_VEC_EXT_V2DI:
25497 case IX86_BUILTIN_VEC_EXT_V4SF:
25498 case IX86_BUILTIN_VEC_EXT_V4SI:
25499 case IX86_BUILTIN_VEC_EXT_V8HI:
25500 case IX86_BUILTIN_VEC_EXT_V2SI:
25501 case IX86_BUILTIN_VEC_EXT_V4HI:
25502 case IX86_BUILTIN_VEC_EXT_V16QI:
25503 return ix86_expand_vec_ext_builtin (exp, target);
25505 case IX86_BUILTIN_VEC_SET_V2DI:
25506 case IX86_BUILTIN_VEC_SET_V4SF:
25507 case IX86_BUILTIN_VEC_SET_V4SI:
25508 case IX86_BUILTIN_VEC_SET_V8HI:
25509 case IX86_BUILTIN_VEC_SET_V4HI:
25510 case IX86_BUILTIN_VEC_SET_V16QI:
25511 return ix86_expand_vec_set_builtin (exp);
25513 case IX86_BUILTIN_INFQ:
25514 case IX86_BUILTIN_HUGE_VALQ:
25516 REAL_VALUE_TYPE inf;
25517 rtx tmp;
25519 real_inf (&inf);
25520 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25522 tmp = validize_mem (force_const_mem (mode, tmp));
25524 if (target == 0)
25525 target = gen_reg_rtx (mode);
25527 emit_move_insn (target, tmp);
25528 return target;
25531 default:
25532 break;
25535 for (i = 0, d = bdesc_special_args;
25536 i < ARRAY_SIZE (bdesc_special_args);
25537 i++, d++)
25538 if (d->code == fcode)
25539 return ix86_expand_special_args_builtin (d, exp, target);
25541 for (i = 0, d = bdesc_args;
25542 i < ARRAY_SIZE (bdesc_args);
25543 i++, d++)
25544 if (d->code == fcode)
25545 switch (fcode)
25547 case IX86_BUILTIN_FABSQ:
25548 case IX86_BUILTIN_COPYSIGNQ:
25549 if (!TARGET_SSE2)
25550 /* Emit a normal call if SSE2 isn't available. */
25551 return expand_call (exp, target, ignore);
25552 default:
25553 return ix86_expand_args_builtin (d, exp, target);
25556 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25557 if (d->code == fcode)
25558 return ix86_expand_sse_comi (d, exp, target);
25560 for (i = 0, d = bdesc_pcmpestr;
25561 i < ARRAY_SIZE (bdesc_pcmpestr);
25562 i++, d++)
25563 if (d->code == fcode)
25564 return ix86_expand_sse_pcmpestr (d, exp, target);
25566 for (i = 0, d = bdesc_pcmpistr;
25567 i < ARRAY_SIZE (bdesc_pcmpistr);
25568 i++, d++)
25569 if (d->code == fcode)
25570 return ix86_expand_sse_pcmpistr (d, exp, target);
25572 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25573 if (d->code == fcode)
25574 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25575 (enum multi_arg_type)d->flag,
25576 d->comparison);
25578 gcc_unreachable ();
25581 /* Returns a function decl for a vectorized version of the builtin function
25582 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25583 if it is not available. */
25585 static tree
25586 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25587 tree type_in)
25589 enum machine_mode in_mode, out_mode;
25590 int in_n, out_n;
25592 if (TREE_CODE (type_out) != VECTOR_TYPE
25593 || TREE_CODE (type_in) != VECTOR_TYPE)
25594 return NULL_TREE;
25596 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25597 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25598 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25599 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25601 switch (fn)
25603 case BUILT_IN_SQRT:
25604 if (out_mode == DFmode && out_n == 2
25605 && in_mode == DFmode && in_n == 2)
25606 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25607 break;
25609 case BUILT_IN_SQRTF:
25610 if (out_mode == SFmode && out_n == 4
25611 && in_mode == SFmode && in_n == 4)
25612 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25613 break;
25615 case BUILT_IN_LRINT:
25616 if (out_mode == SImode && out_n == 4
25617 && in_mode == DFmode && in_n == 2)
25618 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25619 break;
25621 case BUILT_IN_LRINTF:
25622 if (out_mode == SImode && out_n == 4
25623 && in_mode == SFmode && in_n == 4)
25624 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25625 break;
25627 default:
25631 /* Dispatch to a handler for a vectorization library. */
25632 if (ix86_veclib_handler)
25633 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25634 type_in);
25636 return NULL_TREE;
25639 /* Handler for an SVML-style interface to
25640 a library with vectorized intrinsics. */
25642 static tree
25643 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25645 char name[20];
25646 tree fntype, new_fndecl, args;
25647 unsigned arity;
25648 const char *bname;
25649 enum machine_mode el_mode, in_mode;
25650 int n, in_n;
25652 /* The SVML is suitable for unsafe math only. */
25653 if (!flag_unsafe_math_optimizations)
25654 return NULL_TREE;
25656 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25657 n = TYPE_VECTOR_SUBPARTS (type_out);
25658 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25659 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25660 if (el_mode != in_mode
25661 || n != in_n)
25662 return NULL_TREE;
25664 switch (fn)
25666 case BUILT_IN_EXP:
25667 case BUILT_IN_LOG:
25668 case BUILT_IN_LOG10:
25669 case BUILT_IN_POW:
25670 case BUILT_IN_TANH:
25671 case BUILT_IN_TAN:
25672 case BUILT_IN_ATAN:
25673 case BUILT_IN_ATAN2:
25674 case BUILT_IN_ATANH:
25675 case BUILT_IN_CBRT:
25676 case BUILT_IN_SINH:
25677 case BUILT_IN_SIN:
25678 case BUILT_IN_ASINH:
25679 case BUILT_IN_ASIN:
25680 case BUILT_IN_COSH:
25681 case BUILT_IN_COS:
25682 case BUILT_IN_ACOSH:
25683 case BUILT_IN_ACOS:
25684 if (el_mode != DFmode || n != 2)
25685 return NULL_TREE;
25686 break;
25688 case BUILT_IN_EXPF:
25689 case BUILT_IN_LOGF:
25690 case BUILT_IN_LOG10F:
25691 case BUILT_IN_POWF:
25692 case BUILT_IN_TANHF:
25693 case BUILT_IN_TANF:
25694 case BUILT_IN_ATANF:
25695 case BUILT_IN_ATAN2F:
25696 case BUILT_IN_ATANHF:
25697 case BUILT_IN_CBRTF:
25698 case BUILT_IN_SINHF:
25699 case BUILT_IN_SINF:
25700 case BUILT_IN_ASINHF:
25701 case BUILT_IN_ASINF:
25702 case BUILT_IN_COSHF:
25703 case BUILT_IN_COSF:
25704 case BUILT_IN_ACOSHF:
25705 case BUILT_IN_ACOSF:
25706 if (el_mode != SFmode || n != 4)
25707 return NULL_TREE;
25708 break;
25710 default:
25711 return NULL_TREE;
25714 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25716 if (fn == BUILT_IN_LOGF)
25717 strcpy (name, "vmlsLn4");
25718 else if (fn == BUILT_IN_LOG)
25719 strcpy (name, "vmldLn2");
25720 else if (n == 4)
25722 sprintf (name, "vmls%s", bname+10);
25723 name[strlen (name)-1] = '4';
25725 else
25726 sprintf (name, "vmld%s2", bname+10);
25728 /* Convert to uppercase. */
25729 name[4] &= ~0x20;
25731 arity = 0;
25732 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25733 args = TREE_CHAIN (args))
25734 arity++;
25736 if (arity == 1)
25737 fntype = build_function_type_list (type_out, type_in, NULL);
25738 else
25739 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25741 /* Build a function declaration for the vectorized function. */
25742 new_fndecl = build_decl (BUILTINS_LOCATION,
25743 FUNCTION_DECL, get_identifier (name), fntype);
25744 TREE_PUBLIC (new_fndecl) = 1;
25745 DECL_EXTERNAL (new_fndecl) = 1;
25746 DECL_IS_NOVOPS (new_fndecl) = 1;
25747 TREE_READONLY (new_fndecl) = 1;
25749 return new_fndecl;
25752 /* Handler for an ACML-style interface to
25753 a library with vectorized intrinsics. */
25755 static tree
25756 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25758 char name[20] = "__vr.._";
25759 tree fntype, new_fndecl, args;
25760 unsigned arity;
25761 const char *bname;
25762 enum machine_mode el_mode, in_mode;
25763 int n, in_n;
25765 /* The ACML is 64bits only and suitable for unsafe math only as
25766 it does not correctly support parts of IEEE with the required
25767 precision such as denormals. */
25768 if (!TARGET_64BIT
25769 || !flag_unsafe_math_optimizations)
25770 return NULL_TREE;
25772 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25773 n = TYPE_VECTOR_SUBPARTS (type_out);
25774 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25775 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25776 if (el_mode != in_mode
25777 || n != in_n)
25778 return NULL_TREE;
25780 switch (fn)
25782 case BUILT_IN_SIN:
25783 case BUILT_IN_COS:
25784 case BUILT_IN_EXP:
25785 case BUILT_IN_LOG:
25786 case BUILT_IN_LOG2:
25787 case BUILT_IN_LOG10:
25788 name[4] = 'd';
25789 name[5] = '2';
25790 if (el_mode != DFmode
25791 || n != 2)
25792 return NULL_TREE;
25793 break;
25795 case BUILT_IN_SINF:
25796 case BUILT_IN_COSF:
25797 case BUILT_IN_EXPF:
25798 case BUILT_IN_POWF:
25799 case BUILT_IN_LOGF:
25800 case BUILT_IN_LOG2F:
25801 case BUILT_IN_LOG10F:
25802 name[4] = 's';
25803 name[5] = '4';
25804 if (el_mode != SFmode
25805 || n != 4)
25806 return NULL_TREE;
25807 break;
25809 default:
25810 return NULL_TREE;
25813 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25814 sprintf (name + 7, "%s", bname+10);
25816 arity = 0;
25817 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25818 args = TREE_CHAIN (args))
25819 arity++;
25821 if (arity == 1)
25822 fntype = build_function_type_list (type_out, type_in, NULL);
25823 else
25824 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25826 /* Build a function declaration for the vectorized function. */
25827 new_fndecl = build_decl (BUILTINS_LOCATION,
25828 FUNCTION_DECL, get_identifier (name), fntype);
25829 TREE_PUBLIC (new_fndecl) = 1;
25830 DECL_EXTERNAL (new_fndecl) = 1;
25831 DECL_IS_NOVOPS (new_fndecl) = 1;
25832 TREE_READONLY (new_fndecl) = 1;
25834 return new_fndecl;
25838 /* Returns a decl of a function that implements conversion of an integer vector
25839 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25840 side of the conversion.
25841 Return NULL_TREE if it is not available. */
25843 static tree
25844 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25846 if (TREE_CODE (type) != VECTOR_TYPE
25847 /* There are only conversions from/to signed integers. */
25848 || TYPE_UNSIGNED (TREE_TYPE (type)))
25849 return NULL_TREE;
25851 switch (code)
25853 case FLOAT_EXPR:
25854 switch (TYPE_MODE (type))
25856 case V4SImode:
25857 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25858 default:
25859 return NULL_TREE;
25862 case FIX_TRUNC_EXPR:
25863 switch (TYPE_MODE (type))
25865 case V4SImode:
25866 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25867 default:
25868 return NULL_TREE;
25870 default:
25871 return NULL_TREE;
25876 /* Returns a code for a target-specific builtin that implements
25877 reciprocal of the function, or NULL_TREE if not available. */
25879 static tree
25880 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25881 bool sqrt ATTRIBUTE_UNUSED)
25883 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25884 && flag_finite_math_only && !flag_trapping_math
25885 && flag_unsafe_math_optimizations))
25886 return NULL_TREE;
25888 if (md_fn)
25889 /* Machine dependent builtins. */
25890 switch (fn)
25892 /* Vectorized version of sqrt to rsqrt conversion. */
25893 case IX86_BUILTIN_SQRTPS_NR:
25894 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25896 default:
25897 return NULL_TREE;
25899 else
25900 /* Normal builtins. */
25901 switch (fn)
25903 /* Sqrt to rsqrt conversion. */
25904 case BUILT_IN_SQRTF:
25905 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25907 default:
25908 return NULL_TREE;
25912 /* Store OPERAND to the memory after reload is completed. This means
25913 that we can't easily use assign_stack_local. */
25915 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25917 rtx result;
25919 gcc_assert (reload_completed);
25920 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25922 result = gen_rtx_MEM (mode,
25923 gen_rtx_PLUS (Pmode,
25924 stack_pointer_rtx,
25925 GEN_INT (-RED_ZONE_SIZE)));
25926 emit_move_insn (result, operand);
25928 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25930 switch (mode)
25932 case HImode:
25933 case SImode:
25934 operand = gen_lowpart (DImode, operand);
25935 /* FALLTHRU */
25936 case DImode:
25937 emit_insn (
25938 gen_rtx_SET (VOIDmode,
25939 gen_rtx_MEM (DImode,
25940 gen_rtx_PRE_DEC (DImode,
25941 stack_pointer_rtx)),
25942 operand));
25943 break;
25944 default:
25945 gcc_unreachable ();
25947 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25949 else
25951 switch (mode)
25953 case DImode:
25955 rtx operands[2];
25956 split_di (&operand, 1, operands, operands + 1);
25957 emit_insn (
25958 gen_rtx_SET (VOIDmode,
25959 gen_rtx_MEM (SImode,
25960 gen_rtx_PRE_DEC (Pmode,
25961 stack_pointer_rtx)),
25962 operands[1]));
25963 emit_insn (
25964 gen_rtx_SET (VOIDmode,
25965 gen_rtx_MEM (SImode,
25966 gen_rtx_PRE_DEC (Pmode,
25967 stack_pointer_rtx)),
25968 operands[0]));
25970 break;
25971 case HImode:
25972 /* Store HImodes as SImodes. */
25973 operand = gen_lowpart (SImode, operand);
25974 /* FALLTHRU */
25975 case SImode:
25976 emit_insn (
25977 gen_rtx_SET (VOIDmode,
25978 gen_rtx_MEM (GET_MODE (operand),
25979 gen_rtx_PRE_DEC (SImode,
25980 stack_pointer_rtx)),
25981 operand));
25982 break;
25983 default:
25984 gcc_unreachable ();
25986 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25988 return result;
25991 /* Free operand from the memory. */
25992 void
25993 ix86_free_from_memory (enum machine_mode mode)
25995 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25997 int size;
25999 if (mode == DImode || TARGET_64BIT)
26000 size = 8;
26001 else
26002 size = 4;
26003 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26004 to pop or add instruction if registers are available. */
26005 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26006 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26007 GEN_INT (size))));
26011 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26012 QImode must go into class Q_REGS.
26013 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26014 movdf to do mem-to-mem moves through integer regs. */
26015 enum reg_class
26016 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26018 enum machine_mode mode = GET_MODE (x);
26020 /* We're only allowed to return a subclass of CLASS. Many of the
26021 following checks fail for NO_REGS, so eliminate that early. */
26022 if (regclass == NO_REGS)
26023 return NO_REGS;
26025 /* All classes can load zeros. */
26026 if (x == CONST0_RTX (mode))
26027 return regclass;
26029 /* Force constants into memory if we are loading a (nonzero) constant into
26030 an MMX or SSE register. This is because there are no MMX/SSE instructions
26031 to load from a constant. */
26032 if (CONSTANT_P (x)
26033 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26034 return NO_REGS;
26036 /* Prefer SSE regs only, if we can use them for math. */
26037 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26038 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26040 /* Floating-point constants need more complex checks. */
26041 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26043 /* General regs can load everything. */
26044 if (reg_class_subset_p (regclass, GENERAL_REGS))
26045 return regclass;
26047 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26048 zero above. We only want to wind up preferring 80387 registers if
26049 we plan on doing computation with them. */
26050 if (TARGET_80387
26051 && standard_80387_constant_p (x))
26053 /* Limit class to non-sse. */
26054 if (regclass == FLOAT_SSE_REGS)
26055 return FLOAT_REGS;
26056 if (regclass == FP_TOP_SSE_REGS)
26057 return FP_TOP_REG;
26058 if (regclass == FP_SECOND_SSE_REGS)
26059 return FP_SECOND_REG;
26060 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26061 return regclass;
26064 return NO_REGS;
26067 /* Generally when we see PLUS here, it's the function invariant
26068 (plus soft-fp const_int). Which can only be computed into general
26069 regs. */
26070 if (GET_CODE (x) == PLUS)
26071 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26073 /* QImode constants are easy to load, but non-constant QImode data
26074 must go into Q_REGS. */
26075 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26077 if (reg_class_subset_p (regclass, Q_REGS))
26078 return regclass;
26079 if (reg_class_subset_p (Q_REGS, regclass))
26080 return Q_REGS;
26081 return NO_REGS;
26084 return regclass;
26087 /* Discourage putting floating-point values in SSE registers unless
26088 SSE math is being used, and likewise for the 387 registers. */
26089 enum reg_class
26090 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26092 enum machine_mode mode = GET_MODE (x);
26094 /* Restrict the output reload class to the register bank that we are doing
26095 math on. If we would like not to return a subset of CLASS, reject this
26096 alternative: if reload cannot do this, it will still use its choice. */
26097 mode = GET_MODE (x);
26098 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26099 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26101 if (X87_FLOAT_MODE_P (mode))
26103 if (regclass == FP_TOP_SSE_REGS)
26104 return FP_TOP_REG;
26105 else if (regclass == FP_SECOND_SSE_REGS)
26106 return FP_SECOND_REG;
26107 else
26108 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26111 return regclass;
26114 static enum reg_class
26115 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
26116 enum machine_mode mode,
26117 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26119 /* QImode spills from non-QI registers require
26120 intermediate register on 32bit targets. */
26121 if (!in_p && mode == QImode && !TARGET_64BIT
26122 && (rclass == GENERAL_REGS
26123 || rclass == LEGACY_REGS
26124 || rclass == INDEX_REGS))
26126 int regno;
26128 if (REG_P (x))
26129 regno = REGNO (x);
26130 else
26131 regno = -1;
26133 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26134 regno = true_regnum (x);
26136 /* Return Q_REGS if the operand is in memory. */
26137 if (regno == -1)
26138 return Q_REGS;
26141 return NO_REGS;
26144 /* If we are copying between general and FP registers, we need a memory
26145 location. The same is true for SSE and MMX registers.
26147 To optimize register_move_cost performance, allow inline variant.
26149 The macro can't work reliably when one of the CLASSES is class containing
26150 registers from multiple units (SSE, MMX, integer). We avoid this by never
26151 combining those units in single alternative in the machine description.
26152 Ensure that this constraint holds to avoid unexpected surprises.
26154 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26155 enforce these sanity checks. */
26157 static inline int
26158 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26159 enum machine_mode mode, int strict)
26161 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26162 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26163 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26164 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26165 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26166 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26168 gcc_assert (!strict);
26169 return true;
26172 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26173 return true;
26175 /* ??? This is a lie. We do have moves between mmx/general, and for
26176 mmx/sse2. But by saying we need secondary memory we discourage the
26177 register allocator from using the mmx registers unless needed. */
26178 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26179 return true;
26181 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26183 /* SSE1 doesn't have any direct moves from other classes. */
26184 if (!TARGET_SSE2)
26185 return true;
26187 /* If the target says that inter-unit moves are more expensive
26188 than moving through memory, then don't generate them. */
26189 if (!TARGET_INTER_UNIT_MOVES)
26190 return true;
26192 /* Between SSE and general, we have moves no larger than word size. */
26193 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26194 return true;
26197 return false;
26201 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26202 enum machine_mode mode, int strict)
26204 return inline_secondary_memory_needed (class1, class2, mode, strict);
26207 /* Return true if the registers in CLASS cannot represent the change from
26208 modes FROM to TO. */
26210 bool
26211 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26212 enum reg_class regclass)
26214 if (from == to)
26215 return false;
26217 /* x87 registers can't do subreg at all, as all values are reformatted
26218 to extended precision. */
26219 if (MAYBE_FLOAT_CLASS_P (regclass))
26220 return true;
26222 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26224 /* Vector registers do not support QI or HImode loads. If we don't
26225 disallow a change to these modes, reload will assume it's ok to
26226 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26227 the vec_dupv4hi pattern. */
26228 if (GET_MODE_SIZE (from) < 4)
26229 return true;
26231 /* Vector registers do not support subreg with nonzero offsets, which
26232 are otherwise valid for integer registers. Since we can't see
26233 whether we have a nonzero offset from here, prohibit all
26234 nonparadoxical subregs changing size. */
26235 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26236 return true;
26239 return false;
26242 /* Return the cost of moving data of mode M between a
26243 register and memory. A value of 2 is the default; this cost is
26244 relative to those in `REGISTER_MOVE_COST'.
26246 This function is used extensively by register_move_cost that is used to
26247 build tables at startup. Make it inline in this case.
26248 When IN is 2, return maximum of in and out move cost.
26250 If moving between registers and memory is more expensive than
26251 between two registers, you should define this macro to express the
26252 relative cost.
26254 Model also increased moving costs of QImode registers in non
26255 Q_REGS classes.
26257 static inline int
26258 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26259 int in)
26261 int cost;
26262 if (FLOAT_CLASS_P (regclass))
26264 int index;
26265 switch (mode)
26267 case SFmode:
26268 index = 0;
26269 break;
26270 case DFmode:
26271 index = 1;
26272 break;
26273 case XFmode:
26274 index = 2;
26275 break;
26276 default:
26277 return 100;
26279 if (in == 2)
26280 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26281 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26283 if (SSE_CLASS_P (regclass))
26285 int index;
26286 switch (GET_MODE_SIZE (mode))
26288 case 4:
26289 index = 0;
26290 break;
26291 case 8:
26292 index = 1;
26293 break;
26294 case 16:
26295 index = 2;
26296 break;
26297 default:
26298 return 100;
26300 if (in == 2)
26301 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26302 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26304 if (MMX_CLASS_P (regclass))
26306 int index;
26307 switch (GET_MODE_SIZE (mode))
26309 case 4:
26310 index = 0;
26311 break;
26312 case 8:
26313 index = 1;
26314 break;
26315 default:
26316 return 100;
26318 if (in)
26319 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26320 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26322 switch (GET_MODE_SIZE (mode))
26324 case 1:
26325 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26327 if (!in)
26328 return ix86_cost->int_store[0];
26329 if (TARGET_PARTIAL_REG_DEPENDENCY
26330 && optimize_function_for_speed_p (cfun))
26331 cost = ix86_cost->movzbl_load;
26332 else
26333 cost = ix86_cost->int_load[0];
26334 if (in == 2)
26335 return MAX (cost, ix86_cost->int_store[0]);
26336 return cost;
26338 else
26340 if (in == 2)
26341 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26342 if (in)
26343 return ix86_cost->movzbl_load;
26344 else
26345 return ix86_cost->int_store[0] + 4;
26347 break;
26348 case 2:
26349 if (in == 2)
26350 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26351 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26352 default:
26353 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26354 if (mode == TFmode)
26355 mode = XFmode;
26356 if (in == 2)
26357 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26358 else if (in)
26359 cost = ix86_cost->int_load[2];
26360 else
26361 cost = ix86_cost->int_store[2];
26362 return (cost * (((int) GET_MODE_SIZE (mode)
26363 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26368 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26370 return inline_memory_move_cost (mode, regclass, in);
26374 /* Return the cost of moving data from a register in class CLASS1 to
26375 one in class CLASS2.
26377 It is not required that the cost always equal 2 when FROM is the same as TO;
26378 on some machines it is expensive to move between registers if they are not
26379 general registers. */
26382 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26383 enum reg_class class2)
26385 /* In case we require secondary memory, compute cost of the store followed
26386 by load. In order to avoid bad register allocation choices, we need
26387 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26389 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26391 int cost = 1;
26393 cost += inline_memory_move_cost (mode, class1, 2);
26394 cost += inline_memory_move_cost (mode, class2, 2);
26396 /* In case of copying from general_purpose_register we may emit multiple
26397 stores followed by single load causing memory size mismatch stall.
26398 Count this as arbitrarily high cost of 20. */
26399 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26400 cost += 20;
26402 /* In the case of FP/MMX moves, the registers actually overlap, and we
26403 have to switch modes in order to treat them differently. */
26404 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26405 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26406 cost += 20;
26408 return cost;
26411 /* Moves between SSE/MMX and integer unit are expensive. */
26412 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26413 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26415 /* ??? By keeping returned value relatively high, we limit the number
26416 of moves between integer and MMX/SSE registers for all targets.
26417 Additionally, high value prevents problem with x86_modes_tieable_p(),
26418 where integer modes in MMX/SSE registers are not tieable
26419 because of missing QImode and HImode moves to, from or between
26420 MMX/SSE registers. */
26421 return MAX (8, ix86_cost->mmxsse_to_integer);
26423 if (MAYBE_FLOAT_CLASS_P (class1))
26424 return ix86_cost->fp_move;
26425 if (MAYBE_SSE_CLASS_P (class1))
26426 return ix86_cost->sse_move;
26427 if (MAYBE_MMX_CLASS_P (class1))
26428 return ix86_cost->mmx_move;
26429 return 2;
26432 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26434 bool
26435 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26437 /* Flags and only flags can only hold CCmode values. */
26438 if (CC_REGNO_P (regno))
26439 return GET_MODE_CLASS (mode) == MODE_CC;
26440 if (GET_MODE_CLASS (mode) == MODE_CC
26441 || GET_MODE_CLASS (mode) == MODE_RANDOM
26442 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26443 return 0;
26444 if (FP_REGNO_P (regno))
26445 return VALID_FP_MODE_P (mode);
26446 if (SSE_REGNO_P (regno))
26448 /* We implement the move patterns for all vector modes into and
26449 out of SSE registers, even when no operation instructions
26450 are available. OImode move is available only when AVX is
26451 enabled. */
26452 return ((TARGET_AVX && mode == OImode)
26453 || VALID_AVX256_REG_MODE (mode)
26454 || VALID_SSE_REG_MODE (mode)
26455 || VALID_SSE2_REG_MODE (mode)
26456 || VALID_MMX_REG_MODE (mode)
26457 || VALID_MMX_REG_MODE_3DNOW (mode));
26459 if (MMX_REGNO_P (regno))
26461 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26462 so if the register is available at all, then we can move data of
26463 the given mode into or out of it. */
26464 return (VALID_MMX_REG_MODE (mode)
26465 || VALID_MMX_REG_MODE_3DNOW (mode));
26468 if (mode == QImode)
26470 /* Take care for QImode values - they can be in non-QI regs,
26471 but then they do cause partial register stalls. */
26472 if (regno <= BX_REG || TARGET_64BIT)
26473 return 1;
26474 if (!TARGET_PARTIAL_REG_STALL)
26475 return 1;
26476 return reload_in_progress || reload_completed;
26478 /* We handle both integer and floats in the general purpose registers. */
26479 else if (VALID_INT_MODE_P (mode))
26480 return 1;
26481 else if (VALID_FP_MODE_P (mode))
26482 return 1;
26483 else if (VALID_DFP_MODE_P (mode))
26484 return 1;
26485 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26486 on to use that value in smaller contexts, this can easily force a
26487 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26488 supporting DImode, allow it. */
26489 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26490 return 1;
26492 return 0;
26495 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26496 tieable integer mode. */
26498 static bool
26499 ix86_tieable_integer_mode_p (enum machine_mode mode)
26501 switch (mode)
26503 case HImode:
26504 case SImode:
26505 return true;
26507 case QImode:
26508 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26510 case DImode:
26511 return TARGET_64BIT;
26513 default:
26514 return false;
26518 /* Return true if MODE1 is accessible in a register that can hold MODE2
26519 without copying. That is, all register classes that can hold MODE2
26520 can also hold MODE1. */
26522 bool
26523 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26525 if (mode1 == mode2)
26526 return true;
26528 if (ix86_tieable_integer_mode_p (mode1)
26529 && ix86_tieable_integer_mode_p (mode2))
26530 return true;
26532 /* MODE2 being XFmode implies fp stack or general regs, which means we
26533 can tie any smaller floating point modes to it. Note that we do not
26534 tie this with TFmode. */
26535 if (mode2 == XFmode)
26536 return mode1 == SFmode || mode1 == DFmode;
26538 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26539 that we can tie it with SFmode. */
26540 if (mode2 == DFmode)
26541 return mode1 == SFmode;
26543 /* If MODE2 is only appropriate for an SSE register, then tie with
26544 any other mode acceptable to SSE registers. */
26545 if (GET_MODE_SIZE (mode2) == 16
26546 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26547 return (GET_MODE_SIZE (mode1) == 16
26548 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26550 /* If MODE2 is appropriate for an MMX register, then tie
26551 with any other mode acceptable to MMX registers. */
26552 if (GET_MODE_SIZE (mode2) == 8
26553 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26554 return (GET_MODE_SIZE (mode1) == 8
26555 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26557 return false;
26560 /* Compute a (partial) cost for rtx X. Return true if the complete
26561 cost has been computed, and false if subexpressions should be
26562 scanned. In either case, *TOTAL contains the cost result. */
26564 static bool
26565 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26567 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26568 enum machine_mode mode = GET_MODE (x);
26569 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26571 switch (code)
26573 case CONST_INT:
26574 case CONST:
26575 case LABEL_REF:
26576 case SYMBOL_REF:
26577 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26578 *total = 3;
26579 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26580 *total = 2;
26581 else if (flag_pic && SYMBOLIC_CONST (x)
26582 && (!TARGET_64BIT
26583 || (!GET_CODE (x) != LABEL_REF
26584 && (GET_CODE (x) != SYMBOL_REF
26585 || !SYMBOL_REF_LOCAL_P (x)))))
26586 *total = 1;
26587 else
26588 *total = 0;
26589 return true;
26591 case CONST_DOUBLE:
26592 if (mode == VOIDmode)
26593 *total = 0;
26594 else
26595 switch (standard_80387_constant_p (x))
26597 case 1: /* 0.0 */
26598 *total = 1;
26599 break;
26600 default: /* Other constants */
26601 *total = 2;
26602 break;
26603 case 0:
26604 case -1:
26605 /* Start with (MEM (SYMBOL_REF)), since that's where
26606 it'll probably end up. Add a penalty for size. */
26607 *total = (COSTS_N_INSNS (1)
26608 + (flag_pic != 0 && !TARGET_64BIT)
26609 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26610 break;
26612 return true;
26614 case ZERO_EXTEND:
26615 /* The zero extensions is often completely free on x86_64, so make
26616 it as cheap as possible. */
26617 if (TARGET_64BIT && mode == DImode
26618 && GET_MODE (XEXP (x, 0)) == SImode)
26619 *total = 1;
26620 else if (TARGET_ZERO_EXTEND_WITH_AND)
26621 *total = cost->add;
26622 else
26623 *total = cost->movzx;
26624 return false;
26626 case SIGN_EXTEND:
26627 *total = cost->movsx;
26628 return false;
26630 case ASHIFT:
26631 if (CONST_INT_P (XEXP (x, 1))
26632 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26634 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26635 if (value == 1)
26637 *total = cost->add;
26638 return false;
26640 if ((value == 2 || value == 3)
26641 && cost->lea <= cost->shift_const)
26643 *total = cost->lea;
26644 return false;
26647 /* FALLTHRU */
26649 case ROTATE:
26650 case ASHIFTRT:
26651 case LSHIFTRT:
26652 case ROTATERT:
26653 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26655 if (CONST_INT_P (XEXP (x, 1)))
26657 if (INTVAL (XEXP (x, 1)) > 32)
26658 *total = cost->shift_const + COSTS_N_INSNS (2);
26659 else
26660 *total = cost->shift_const * 2;
26662 else
26664 if (GET_CODE (XEXP (x, 1)) == AND)
26665 *total = cost->shift_var * 2;
26666 else
26667 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26670 else
26672 if (CONST_INT_P (XEXP (x, 1)))
26673 *total = cost->shift_const;
26674 else
26675 *total = cost->shift_var;
26677 return false;
26679 case MULT:
26680 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26682 /* ??? SSE scalar cost should be used here. */
26683 *total = cost->fmul;
26684 return false;
26686 else if (X87_FLOAT_MODE_P (mode))
26688 *total = cost->fmul;
26689 return false;
26691 else if (FLOAT_MODE_P (mode))
26693 /* ??? SSE vector cost should be used here. */
26694 *total = cost->fmul;
26695 return false;
26697 else
26699 rtx op0 = XEXP (x, 0);
26700 rtx op1 = XEXP (x, 1);
26701 int nbits;
26702 if (CONST_INT_P (XEXP (x, 1)))
26704 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26705 for (nbits = 0; value != 0; value &= value - 1)
26706 nbits++;
26708 else
26709 /* This is arbitrary. */
26710 nbits = 7;
26712 /* Compute costs correctly for widening multiplication. */
26713 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26714 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26715 == GET_MODE_SIZE (mode))
26717 int is_mulwiden = 0;
26718 enum machine_mode inner_mode = GET_MODE (op0);
26720 if (GET_CODE (op0) == GET_CODE (op1))
26721 is_mulwiden = 1, op1 = XEXP (op1, 0);
26722 else if (CONST_INT_P (op1))
26724 if (GET_CODE (op0) == SIGN_EXTEND)
26725 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26726 == INTVAL (op1);
26727 else
26728 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26731 if (is_mulwiden)
26732 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26735 *total = (cost->mult_init[MODE_INDEX (mode)]
26736 + nbits * cost->mult_bit
26737 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26739 return true;
26742 case DIV:
26743 case UDIV:
26744 case MOD:
26745 case UMOD:
26746 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26747 /* ??? SSE cost should be used here. */
26748 *total = cost->fdiv;
26749 else if (X87_FLOAT_MODE_P (mode))
26750 *total = cost->fdiv;
26751 else if (FLOAT_MODE_P (mode))
26752 /* ??? SSE vector cost should be used here. */
26753 *total = cost->fdiv;
26754 else
26755 *total = cost->divide[MODE_INDEX (mode)];
26756 return false;
26758 case PLUS:
26759 if (GET_MODE_CLASS (mode) == MODE_INT
26760 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26762 if (GET_CODE (XEXP (x, 0)) == PLUS
26763 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26764 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26765 && CONSTANT_P (XEXP (x, 1)))
26767 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26768 if (val == 2 || val == 4 || val == 8)
26770 *total = cost->lea;
26771 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26772 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26773 outer_code, speed);
26774 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26775 return true;
26778 else if (GET_CODE (XEXP (x, 0)) == MULT
26779 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26781 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26782 if (val == 2 || val == 4 || val == 8)
26784 *total = cost->lea;
26785 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26786 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26787 return true;
26790 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26792 *total = cost->lea;
26793 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26794 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26795 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26796 return true;
26799 /* FALLTHRU */
26801 case MINUS:
26802 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26804 /* ??? SSE cost should be used here. */
26805 *total = cost->fadd;
26806 return false;
26808 else if (X87_FLOAT_MODE_P (mode))
26810 *total = cost->fadd;
26811 return false;
26813 else if (FLOAT_MODE_P (mode))
26815 /* ??? SSE vector cost should be used here. */
26816 *total = cost->fadd;
26817 return false;
26819 /* FALLTHRU */
26821 case AND:
26822 case IOR:
26823 case XOR:
26824 if (!TARGET_64BIT && mode == DImode)
26826 *total = (cost->add * 2
26827 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26828 << (GET_MODE (XEXP (x, 0)) != DImode))
26829 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26830 << (GET_MODE (XEXP (x, 1)) != DImode)));
26831 return true;
26833 /* FALLTHRU */
26835 case NEG:
26836 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26838 /* ??? SSE cost should be used here. */
26839 *total = cost->fchs;
26840 return false;
26842 else if (X87_FLOAT_MODE_P (mode))
26844 *total = cost->fchs;
26845 return false;
26847 else if (FLOAT_MODE_P (mode))
26849 /* ??? SSE vector cost should be used here. */
26850 *total = cost->fchs;
26851 return false;
26853 /* FALLTHRU */
26855 case NOT:
26856 if (!TARGET_64BIT && mode == DImode)
26857 *total = cost->add * 2;
26858 else
26859 *total = cost->add;
26860 return false;
26862 case COMPARE:
26863 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26864 && XEXP (XEXP (x, 0), 1) == const1_rtx
26865 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26866 && XEXP (x, 1) == const0_rtx)
26868 /* This kind of construct is implemented using test[bwl].
26869 Treat it as if we had an AND. */
26870 *total = (cost->add
26871 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26872 + rtx_cost (const1_rtx, outer_code, speed));
26873 return true;
26875 return false;
26877 case FLOAT_EXTEND:
26878 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26879 *total = 0;
26880 return false;
26882 case ABS:
26883 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26884 /* ??? SSE cost should be used here. */
26885 *total = cost->fabs;
26886 else if (X87_FLOAT_MODE_P (mode))
26887 *total = cost->fabs;
26888 else if (FLOAT_MODE_P (mode))
26889 /* ??? SSE vector cost should be used here. */
26890 *total = cost->fabs;
26891 return false;
26893 case SQRT:
26894 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26895 /* ??? SSE cost should be used here. */
26896 *total = cost->fsqrt;
26897 else if (X87_FLOAT_MODE_P (mode))
26898 *total = cost->fsqrt;
26899 else if (FLOAT_MODE_P (mode))
26900 /* ??? SSE vector cost should be used here. */
26901 *total = cost->fsqrt;
26902 return false;
26904 case UNSPEC:
26905 if (XINT (x, 1) == UNSPEC_TP)
26906 *total = 0;
26907 return false;
26909 default:
26910 return false;
26914 #if TARGET_MACHO
26916 static int current_machopic_label_num;
26918 /* Given a symbol name and its associated stub, write out the
26919 definition of the stub. */
26921 void
26922 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26924 unsigned int length;
26925 char *binder_name, *symbol_name, lazy_ptr_name[32];
26926 int label = ++current_machopic_label_num;
26928 /* For 64-bit we shouldn't get here. */
26929 gcc_assert (!TARGET_64BIT);
26931 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26932 symb = (*targetm.strip_name_encoding) (symb);
26934 length = strlen (stub);
26935 binder_name = XALLOCAVEC (char, length + 32);
26936 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26938 length = strlen (symb);
26939 symbol_name = XALLOCAVEC (char, length + 32);
26940 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26942 sprintf (lazy_ptr_name, "L%d$lz", label);
26944 if (MACHOPIC_PURE)
26945 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26946 else
26947 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26949 fprintf (file, "%s:\n", stub);
26950 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26952 if (MACHOPIC_PURE)
26954 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26955 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26956 fprintf (file, "\tjmp\t*%%edx\n");
26958 else
26959 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26961 fprintf (file, "%s:\n", binder_name);
26963 if (MACHOPIC_PURE)
26965 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26966 fprintf (file, "\tpushl\t%%eax\n");
26968 else
26969 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26971 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26973 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26974 fprintf (file, "%s:\n", lazy_ptr_name);
26975 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26976 fprintf (file, "\t.long %s\n", binder_name);
26979 void
26980 darwin_x86_file_end (void)
26982 darwin_file_end ();
26983 ix86_file_end ();
26985 #endif /* TARGET_MACHO */
26987 /* Order the registers for register allocator. */
26989 void
26990 x86_order_regs_for_local_alloc (void)
26992 int pos = 0;
26993 int i;
26995 /* First allocate the local general purpose registers. */
26996 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26997 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26998 reg_alloc_order [pos++] = i;
27000 /* Global general purpose registers. */
27001 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27002 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27003 reg_alloc_order [pos++] = i;
27005 /* x87 registers come first in case we are doing FP math
27006 using them. */
27007 if (!TARGET_SSE_MATH)
27008 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27009 reg_alloc_order [pos++] = i;
27011 /* SSE registers. */
27012 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27013 reg_alloc_order [pos++] = i;
27014 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27015 reg_alloc_order [pos++] = i;
27017 /* x87 registers. */
27018 if (TARGET_SSE_MATH)
27019 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27020 reg_alloc_order [pos++] = i;
27022 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27023 reg_alloc_order [pos++] = i;
27025 /* Initialize the rest of array as we do not allocate some registers
27026 at all. */
27027 while (pos < FIRST_PSEUDO_REGISTER)
27028 reg_alloc_order [pos++] = 0;
27031 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27032 struct attribute_spec.handler. */
27033 static tree
27034 ix86_handle_abi_attribute (tree *node, tree name,
27035 tree args ATTRIBUTE_UNUSED,
27036 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27038 if (TREE_CODE (*node) != FUNCTION_TYPE
27039 && TREE_CODE (*node) != METHOD_TYPE
27040 && TREE_CODE (*node) != FIELD_DECL
27041 && TREE_CODE (*node) != TYPE_DECL)
27043 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27044 name);
27045 *no_add_attrs = true;
27046 return NULL_TREE;
27048 if (!TARGET_64BIT)
27050 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27051 name);
27052 *no_add_attrs = true;
27053 return NULL_TREE;
27056 /* Can combine regparm with all attributes but fastcall. */
27057 if (is_attribute_p ("ms_abi", name))
27059 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27061 error ("ms_abi and sysv_abi attributes are not compatible");
27064 return NULL_TREE;
27066 else if (is_attribute_p ("sysv_abi", name))
27068 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27070 error ("ms_abi and sysv_abi attributes are not compatible");
27073 return NULL_TREE;
27076 return NULL_TREE;
27079 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27080 struct attribute_spec.handler. */
27081 static tree
27082 ix86_handle_struct_attribute (tree *node, tree name,
27083 tree args ATTRIBUTE_UNUSED,
27084 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27086 tree *type = NULL;
27087 if (DECL_P (*node))
27089 if (TREE_CODE (*node) == TYPE_DECL)
27090 type = &TREE_TYPE (*node);
27092 else
27093 type = node;
27095 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27096 || TREE_CODE (*type) == UNION_TYPE)))
27098 warning (OPT_Wattributes, "%qE attribute ignored",
27099 name);
27100 *no_add_attrs = true;
27103 else if ((is_attribute_p ("ms_struct", name)
27104 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27105 || ((is_attribute_p ("gcc_struct", name)
27106 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27108 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27109 name);
27110 *no_add_attrs = true;
27113 return NULL_TREE;
27116 static bool
27117 ix86_ms_bitfield_layout_p (const_tree record_type)
27119 return (TARGET_MS_BITFIELD_LAYOUT &&
27120 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27121 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27124 /* Returns an expression indicating where the this parameter is
27125 located on entry to the FUNCTION. */
27127 static rtx
27128 x86_this_parameter (tree function)
27130 tree type = TREE_TYPE (function);
27131 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27132 int nregs;
27134 if (TARGET_64BIT)
27136 const int *parm_regs;
27138 if (ix86_function_type_abi (type) == MS_ABI)
27139 parm_regs = x86_64_ms_abi_int_parameter_registers;
27140 else
27141 parm_regs = x86_64_int_parameter_registers;
27142 return gen_rtx_REG (DImode, parm_regs[aggr]);
27145 nregs = ix86_function_regparm (type, function);
27147 if (nregs > 0 && !stdarg_p (type))
27149 int regno;
27151 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27152 regno = aggr ? DX_REG : CX_REG;
27153 else
27155 regno = AX_REG;
27156 if (aggr)
27158 regno = DX_REG;
27159 if (nregs == 1)
27160 return gen_rtx_MEM (SImode,
27161 plus_constant (stack_pointer_rtx, 4));
27164 return gen_rtx_REG (SImode, regno);
27167 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27170 /* Determine whether x86_output_mi_thunk can succeed. */
27172 static bool
27173 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27174 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27175 HOST_WIDE_INT vcall_offset, const_tree function)
27177 /* 64-bit can handle anything. */
27178 if (TARGET_64BIT)
27179 return true;
27181 /* For 32-bit, everything's fine if we have one free register. */
27182 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27183 return true;
27185 /* Need a free register for vcall_offset. */
27186 if (vcall_offset)
27187 return false;
27189 /* Need a free register for GOT references. */
27190 if (flag_pic && !(*targetm.binds_local_p) (function))
27191 return false;
27193 /* Otherwise ok. */
27194 return true;
27197 /* Output the assembler code for a thunk function. THUNK_DECL is the
27198 declaration for the thunk function itself, FUNCTION is the decl for
27199 the target function. DELTA is an immediate constant offset to be
27200 added to THIS. If VCALL_OFFSET is nonzero, the word at
27201 *(*this + vcall_offset) should be added to THIS. */
27203 static void
27204 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27205 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27206 HOST_WIDE_INT vcall_offset, tree function)
27208 rtx xops[3];
27209 rtx this_param = x86_this_parameter (function);
27210 rtx this_reg, tmp;
27212 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27213 pull it in now and let DELTA benefit. */
27214 if (REG_P (this_param))
27215 this_reg = this_param;
27216 else if (vcall_offset)
27218 /* Put the this parameter into %eax. */
27219 xops[0] = this_param;
27220 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27221 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27223 else
27224 this_reg = NULL_RTX;
27226 /* Adjust the this parameter by a fixed constant. */
27227 if (delta)
27229 xops[0] = GEN_INT (delta);
27230 xops[1] = this_reg ? this_reg : this_param;
27231 if (TARGET_64BIT)
27233 if (!x86_64_general_operand (xops[0], DImode))
27235 tmp = gen_rtx_REG (DImode, R10_REG);
27236 xops[1] = tmp;
27237 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27238 xops[0] = tmp;
27239 xops[1] = this_param;
27241 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27243 else
27244 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27247 /* Adjust the this parameter by a value stored in the vtable. */
27248 if (vcall_offset)
27250 if (TARGET_64BIT)
27251 tmp = gen_rtx_REG (DImode, R10_REG);
27252 else
27254 int tmp_regno = CX_REG;
27255 if (lookup_attribute ("fastcall",
27256 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27257 tmp_regno = AX_REG;
27258 tmp = gen_rtx_REG (SImode, tmp_regno);
27261 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27262 xops[1] = tmp;
27263 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27265 /* Adjust the this parameter. */
27266 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27267 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27269 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27270 xops[0] = GEN_INT (vcall_offset);
27271 xops[1] = tmp2;
27272 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27273 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27275 xops[1] = this_reg;
27276 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27279 /* If necessary, drop THIS back to its stack slot. */
27280 if (this_reg && this_reg != this_param)
27282 xops[0] = this_reg;
27283 xops[1] = this_param;
27284 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27287 xops[0] = XEXP (DECL_RTL (function), 0);
27288 if (TARGET_64BIT)
27290 if (!flag_pic || (*targetm.binds_local_p) (function))
27291 output_asm_insn ("jmp\t%P0", xops);
27292 /* All thunks should be in the same object as their target,
27293 and thus binds_local_p should be true. */
27294 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27295 gcc_unreachable ();
27296 else
27298 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27299 tmp = gen_rtx_CONST (Pmode, tmp);
27300 tmp = gen_rtx_MEM (QImode, tmp);
27301 xops[0] = tmp;
27302 output_asm_insn ("jmp\t%A0", xops);
27305 else
27307 if (!flag_pic || (*targetm.binds_local_p) (function))
27308 output_asm_insn ("jmp\t%P0", xops);
27309 else
27310 #if TARGET_MACHO
27311 if (TARGET_MACHO)
27313 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27314 tmp = (gen_rtx_SYMBOL_REF
27315 (Pmode,
27316 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27317 tmp = gen_rtx_MEM (QImode, tmp);
27318 xops[0] = tmp;
27319 output_asm_insn ("jmp\t%0", xops);
27321 else
27322 #endif /* TARGET_MACHO */
27324 tmp = gen_rtx_REG (SImode, CX_REG);
27325 output_set_got (tmp, NULL_RTX);
27327 xops[1] = tmp;
27328 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27329 output_asm_insn ("jmp\t{*}%1", xops);
27334 static void
27335 x86_file_start (void)
27337 default_file_start ();
27338 #if TARGET_MACHO
27339 darwin_file_start ();
27340 #endif
27341 if (X86_FILE_START_VERSION_DIRECTIVE)
27342 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27343 if (X86_FILE_START_FLTUSED)
27344 fputs ("\t.global\t__fltused\n", asm_out_file);
27345 if (ix86_asm_dialect == ASM_INTEL)
27346 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27350 x86_field_alignment (tree field, int computed)
27352 enum machine_mode mode;
27353 tree type = TREE_TYPE (field);
27355 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27356 return computed;
27357 mode = TYPE_MODE (strip_array_types (type));
27358 if (mode == DFmode || mode == DCmode
27359 || GET_MODE_CLASS (mode) == MODE_INT
27360 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27361 return MIN (32, computed);
27362 return computed;
27365 /* Output assembler code to FILE to increment profiler label # LABELNO
27366 for profiling a function entry. */
27367 void
27368 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27370 if (TARGET_64BIT)
27372 #ifndef NO_PROFILE_COUNTERS
27373 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27374 #endif
27376 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27377 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27378 else
27379 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27381 else if (flag_pic)
27383 #ifndef NO_PROFILE_COUNTERS
27384 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27385 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27386 #endif
27387 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27389 else
27391 #ifndef NO_PROFILE_COUNTERS
27392 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27393 PROFILE_COUNT_REGISTER);
27394 #endif
27395 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27399 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27400 /* We don't have exact information about the insn sizes, but we may assume
27401 quite safely that we are informed about all 1 byte insns and memory
27402 address sizes. This is enough to eliminate unnecessary padding in
27403 99% of cases. */
27405 static int
27406 min_insn_size (rtx insn)
27408 int l = 0, len;
27410 if (!INSN_P (insn) || !active_insn_p (insn))
27411 return 0;
27413 /* Discard alignments we've emit and jump instructions. */
27414 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27415 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27416 return 0;
27417 if (JUMP_TABLE_DATA_P (insn))
27418 return 0;
27420 /* Important case - calls are always 5 bytes.
27421 It is common to have many calls in the row. */
27422 if (CALL_P (insn)
27423 && symbolic_reference_mentioned_p (PATTERN (insn))
27424 && !SIBLING_CALL_P (insn))
27425 return 5;
27426 len = get_attr_length (insn);
27427 if (len <= 1)
27428 return 1;
27430 /* For normal instructions we rely on get_attr_length being exact,
27431 with a few exceptions. */
27432 if (!JUMP_P (insn))
27434 enum attr_type type = get_attr_type (insn);
27436 switch (type)
27438 case TYPE_MULTI:
27439 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27440 || asm_noperands (PATTERN (insn)) >= 0)
27441 return 0;
27442 break;
27443 case TYPE_OTHER:
27444 case TYPE_FCMP:
27445 break;
27446 default:
27447 /* Otherwise trust get_attr_length. */
27448 return len;
27451 l = get_attr_length_address (insn);
27452 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27453 l = 4;
27455 if (l)
27456 return 1+l;
27457 else
27458 return 2;
27461 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27462 window. */
27464 static void
27465 ix86_avoid_jump_mispredicts (void)
27467 rtx insn, start = get_insns ();
27468 int nbytes = 0, njumps = 0;
27469 int isjump = 0;
27471 /* Look for all minimal intervals of instructions containing 4 jumps.
27472 The intervals are bounded by START and INSN. NBYTES is the total
27473 size of instructions in the interval including INSN and not including
27474 START. When the NBYTES is smaller than 16 bytes, it is possible
27475 that the end of START and INSN ends up in the same 16byte page.
27477 The smallest offset in the page INSN can start is the case where START
27478 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27479 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27481 for (insn = start; insn; insn = NEXT_INSN (insn))
27483 int min_size;
27485 if (LABEL_P (insn))
27487 int align = label_to_alignment (insn);
27488 int max_skip = label_to_max_skip (insn);
27490 if (max_skip > 15)
27491 max_skip = 15;
27492 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27493 already in the current 16 byte page, because otherwise
27494 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27495 bytes to reach 16 byte boundary. */
27496 if (align <= 0
27497 || (align <= 3 && max_skip != (1 << align) - 1))
27498 max_skip = 0;
27499 if (dump_file)
27500 fprintf (dump_file, "Label %i with max_skip %i\n",
27501 INSN_UID (insn), max_skip);
27502 if (max_skip)
27504 while (nbytes + max_skip >= 16)
27506 start = NEXT_INSN (start);
27507 if ((JUMP_P (start)
27508 && GET_CODE (PATTERN (start)) != ADDR_VEC
27509 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27510 || CALL_P (start))
27511 njumps--, isjump = 1;
27512 else
27513 isjump = 0;
27514 nbytes -= min_insn_size (start);
27517 continue;
27520 min_size = min_insn_size (insn);
27521 nbytes += min_size;
27522 if (dump_file)
27523 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27524 INSN_UID (insn), min_size);
27525 if ((JUMP_P (insn)
27526 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27527 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27528 || CALL_P (insn))
27529 njumps++;
27530 else
27531 continue;
27533 while (njumps > 3)
27535 start = NEXT_INSN (start);
27536 if ((JUMP_P (start)
27537 && GET_CODE (PATTERN (start)) != ADDR_VEC
27538 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27539 || CALL_P (start))
27540 njumps--, isjump = 1;
27541 else
27542 isjump = 0;
27543 nbytes -= min_insn_size (start);
27545 gcc_assert (njumps >= 0);
27546 if (dump_file)
27547 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27548 INSN_UID (start), INSN_UID (insn), nbytes);
27550 if (njumps == 3 && isjump && nbytes < 16)
27552 int padsize = 15 - nbytes + min_insn_size (insn);
27554 if (dump_file)
27555 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27556 INSN_UID (insn), padsize);
27557 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27561 #endif
27563 /* AMD Athlon works faster
27564 when RET is not destination of conditional jump or directly preceded
27565 by other jump instruction. We avoid the penalty by inserting NOP just
27566 before the RET instructions in such cases. */
27567 static void
27568 ix86_pad_returns (void)
27570 edge e;
27571 edge_iterator ei;
27573 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27575 basic_block bb = e->src;
27576 rtx ret = BB_END (bb);
27577 rtx prev;
27578 bool replace = false;
27580 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27581 || optimize_bb_for_size_p (bb))
27582 continue;
27583 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27584 if (active_insn_p (prev) || LABEL_P (prev))
27585 break;
27586 if (prev && LABEL_P (prev))
27588 edge e;
27589 edge_iterator ei;
27591 FOR_EACH_EDGE (e, ei, bb->preds)
27592 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27593 && !(e->flags & EDGE_FALLTHRU))
27594 replace = true;
27596 if (!replace)
27598 prev = prev_active_insn (ret);
27599 if (prev
27600 && ((JUMP_P (prev) && any_condjump_p (prev))
27601 || CALL_P (prev)))
27602 replace = true;
27603 /* Empty functions get branch mispredict even when the jump destination
27604 is not visible to us. */
27605 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27606 replace = true;
27608 if (replace)
27610 emit_jump_insn_before (gen_return_internal_long (), ret);
27611 delete_insn (ret);
27616 /* Implement machine specific optimizations. We implement padding of returns
27617 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27618 static void
27619 ix86_reorg (void)
27621 if (optimize && optimize_function_for_speed_p (cfun))
27623 if (TARGET_PAD_RETURNS)
27624 ix86_pad_returns ();
27625 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27626 if (TARGET_FOUR_JUMP_LIMIT)
27627 ix86_avoid_jump_mispredicts ();
27628 #endif
27632 /* Return nonzero when QImode register that must be represented via REX prefix
27633 is used. */
27634 bool
27635 x86_extended_QIreg_mentioned_p (rtx insn)
27637 int i;
27638 extract_insn_cached (insn);
27639 for (i = 0; i < recog_data.n_operands; i++)
27640 if (REG_P (recog_data.operand[i])
27641 && REGNO (recog_data.operand[i]) > BX_REG)
27642 return true;
27643 return false;
27646 /* Return nonzero when P points to register encoded via REX prefix.
27647 Called via for_each_rtx. */
27648 static int
27649 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27651 unsigned int regno;
27652 if (!REG_P (*p))
27653 return 0;
27654 regno = REGNO (*p);
27655 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27658 /* Return true when INSN mentions register that must be encoded using REX
27659 prefix. */
27660 bool
27661 x86_extended_reg_mentioned_p (rtx insn)
27663 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27664 extended_reg_mentioned_1, NULL);
27667 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27668 optabs would emit if we didn't have TFmode patterns. */
27670 void
27671 x86_emit_floatuns (rtx operands[2])
27673 rtx neglab, donelab, i0, i1, f0, in, out;
27674 enum machine_mode mode, inmode;
27676 inmode = GET_MODE (operands[1]);
27677 gcc_assert (inmode == SImode || inmode == DImode);
27679 out = operands[0];
27680 in = force_reg (inmode, operands[1]);
27681 mode = GET_MODE (out);
27682 neglab = gen_label_rtx ();
27683 donelab = gen_label_rtx ();
27684 f0 = gen_reg_rtx (mode);
27686 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27688 expand_float (out, in, 0);
27690 emit_jump_insn (gen_jump (donelab));
27691 emit_barrier ();
27693 emit_label (neglab);
27695 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27696 1, OPTAB_DIRECT);
27697 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27698 1, OPTAB_DIRECT);
27699 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27701 expand_float (f0, i0, 0);
27703 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27705 emit_label (donelab);
27708 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27709 with all elements equal to VAR. Return true if successful. */
27711 static bool
27712 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27713 rtx target, rtx val)
27715 enum machine_mode hmode, smode, wsmode, wvmode;
27716 rtx x;
27718 switch (mode)
27720 case V2SImode:
27721 case V2SFmode:
27722 if (!mmx_ok)
27723 return false;
27724 /* FALLTHRU */
27726 case V2DFmode:
27727 case V2DImode:
27728 case V4SFmode:
27729 case V4SImode:
27730 val = force_reg (GET_MODE_INNER (mode), val);
27731 x = gen_rtx_VEC_DUPLICATE (mode, val);
27732 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27733 return true;
27735 case V4HImode:
27736 if (!mmx_ok)
27737 return false;
27738 if (TARGET_SSE || TARGET_3DNOW_A)
27740 val = gen_lowpart (SImode, val);
27741 x = gen_rtx_TRUNCATE (HImode, val);
27742 x = gen_rtx_VEC_DUPLICATE (mode, x);
27743 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27744 return true;
27746 else
27748 smode = HImode;
27749 wsmode = SImode;
27750 wvmode = V2SImode;
27751 goto widen;
27754 case V8QImode:
27755 if (!mmx_ok)
27756 return false;
27757 smode = QImode;
27758 wsmode = HImode;
27759 wvmode = V4HImode;
27760 goto widen;
27761 case V8HImode:
27762 if (TARGET_SSE2)
27764 rtx tmp1, tmp2;
27765 /* Extend HImode to SImode using a paradoxical SUBREG. */
27766 tmp1 = gen_reg_rtx (SImode);
27767 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27768 /* Insert the SImode value as low element of V4SImode vector. */
27769 tmp2 = gen_reg_rtx (V4SImode);
27770 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27771 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27772 CONST0_RTX (V4SImode),
27773 const1_rtx);
27774 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27775 /* Cast the V4SImode vector back to a V8HImode vector. */
27776 tmp1 = gen_reg_rtx (V8HImode);
27777 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27778 /* Duplicate the low short through the whole low SImode word. */
27779 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27780 /* Cast the V8HImode vector back to a V4SImode vector. */
27781 tmp2 = gen_reg_rtx (V4SImode);
27782 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27783 /* Replicate the low element of the V4SImode vector. */
27784 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27785 /* Cast the V2SImode back to V8HImode, and store in target. */
27786 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27787 return true;
27789 smode = HImode;
27790 wsmode = SImode;
27791 wvmode = V4SImode;
27792 goto widen;
27793 case V16QImode:
27794 if (TARGET_SSE2)
27796 rtx tmp1, tmp2;
27797 /* Extend QImode to SImode using a paradoxical SUBREG. */
27798 tmp1 = gen_reg_rtx (SImode);
27799 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27800 /* Insert the SImode value as low element of V4SImode vector. */
27801 tmp2 = gen_reg_rtx (V4SImode);
27802 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27803 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27804 CONST0_RTX (V4SImode),
27805 const1_rtx);
27806 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27807 /* Cast the V4SImode vector back to a V16QImode vector. */
27808 tmp1 = gen_reg_rtx (V16QImode);
27809 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27810 /* Duplicate the low byte through the whole low SImode word. */
27811 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27812 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27813 /* Cast the V16QImode vector back to a V4SImode vector. */
27814 tmp2 = gen_reg_rtx (V4SImode);
27815 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27816 /* Replicate the low element of the V4SImode vector. */
27817 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27818 /* Cast the V2SImode back to V16QImode, and store in target. */
27819 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27820 return true;
27822 smode = QImode;
27823 wsmode = HImode;
27824 wvmode = V8HImode;
27825 goto widen;
27826 widen:
27827 /* Replicate the value once into the next wider mode and recurse. */
27828 val = convert_modes (wsmode, smode, val, true);
27829 x = expand_simple_binop (wsmode, ASHIFT, val,
27830 GEN_INT (GET_MODE_BITSIZE (smode)),
27831 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27832 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27834 x = gen_reg_rtx (wvmode);
27835 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27836 gcc_unreachable ();
27837 emit_move_insn (target, gen_lowpart (mode, x));
27838 return true;
27840 case V4DFmode:
27841 hmode = V2DFmode;
27842 goto half;
27843 case V4DImode:
27844 hmode = V2DImode;
27845 goto half;
27846 case V8SFmode:
27847 hmode = V4SFmode;
27848 goto half;
27849 case V8SImode:
27850 hmode = V4SImode;
27851 goto half;
27852 case V16HImode:
27853 hmode = V8HImode;
27854 goto half;
27855 case V32QImode:
27856 hmode = V16QImode;
27857 goto half;
27858 half:
27860 rtx tmp = gen_reg_rtx (hmode);
27861 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27862 emit_insn (gen_rtx_SET (VOIDmode, target,
27863 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27865 return true;
27867 default:
27868 return false;
27872 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27873 whose ONE_VAR element is VAR, and other elements are zero. Return true
27874 if successful. */
27876 static bool
27877 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27878 rtx target, rtx var, int one_var)
27880 enum machine_mode vsimode;
27881 rtx new_target;
27882 rtx x, tmp;
27883 bool use_vector_set = false;
27885 switch (mode)
27887 case V2DImode:
27888 /* For SSE4.1, we normally use vector set. But if the second
27889 element is zero and inter-unit moves are OK, we use movq
27890 instead. */
27891 use_vector_set = (TARGET_64BIT
27892 && TARGET_SSE4_1
27893 && !(TARGET_INTER_UNIT_MOVES
27894 && one_var == 0));
27895 break;
27896 case V16QImode:
27897 case V4SImode:
27898 case V4SFmode:
27899 use_vector_set = TARGET_SSE4_1;
27900 break;
27901 case V8HImode:
27902 use_vector_set = TARGET_SSE2;
27903 break;
27904 case V4HImode:
27905 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27906 break;
27907 case V32QImode:
27908 case V16HImode:
27909 case V8SImode:
27910 case V8SFmode:
27911 case V4DFmode:
27912 use_vector_set = TARGET_AVX;
27913 break;
27914 case V4DImode:
27915 /* Use ix86_expand_vector_set in 64bit mode only. */
27916 use_vector_set = TARGET_AVX && TARGET_64BIT;
27917 break;
27918 default:
27919 break;
27922 if (use_vector_set)
27924 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27925 var = force_reg (GET_MODE_INNER (mode), var);
27926 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27927 return true;
27930 switch (mode)
27932 case V2SFmode:
27933 case V2SImode:
27934 if (!mmx_ok)
27935 return false;
27936 /* FALLTHRU */
27938 case V2DFmode:
27939 case V2DImode:
27940 if (one_var != 0)
27941 return false;
27942 var = force_reg (GET_MODE_INNER (mode), var);
27943 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27944 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27945 return true;
27947 case V4SFmode:
27948 case V4SImode:
27949 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27950 new_target = gen_reg_rtx (mode);
27951 else
27952 new_target = target;
27953 var = force_reg (GET_MODE_INNER (mode), var);
27954 x = gen_rtx_VEC_DUPLICATE (mode, var);
27955 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27956 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27957 if (one_var != 0)
27959 /* We need to shuffle the value to the correct position, so
27960 create a new pseudo to store the intermediate result. */
27962 /* With SSE2, we can use the integer shuffle insns. */
27963 if (mode != V4SFmode && TARGET_SSE2)
27965 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27966 GEN_INT (1),
27967 GEN_INT (one_var == 1 ? 0 : 1),
27968 GEN_INT (one_var == 2 ? 0 : 1),
27969 GEN_INT (one_var == 3 ? 0 : 1)));
27970 if (target != new_target)
27971 emit_move_insn (target, new_target);
27972 return true;
27975 /* Otherwise convert the intermediate result to V4SFmode and
27976 use the SSE1 shuffle instructions. */
27977 if (mode != V4SFmode)
27979 tmp = gen_reg_rtx (V4SFmode);
27980 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27982 else
27983 tmp = new_target;
27985 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27986 GEN_INT (1),
27987 GEN_INT (one_var == 1 ? 0 : 1),
27988 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27989 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27991 if (mode != V4SFmode)
27992 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27993 else if (tmp != target)
27994 emit_move_insn (target, tmp);
27996 else if (target != new_target)
27997 emit_move_insn (target, new_target);
27998 return true;
28000 case V8HImode:
28001 case V16QImode:
28002 vsimode = V4SImode;
28003 goto widen;
28004 case V4HImode:
28005 case V8QImode:
28006 if (!mmx_ok)
28007 return false;
28008 vsimode = V2SImode;
28009 goto widen;
28010 widen:
28011 if (one_var != 0)
28012 return false;
28014 /* Zero extend the variable element to SImode and recurse. */
28015 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28017 x = gen_reg_rtx (vsimode);
28018 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28019 var, one_var))
28020 gcc_unreachable ();
28022 emit_move_insn (target, gen_lowpart (mode, x));
28023 return true;
28025 default:
28026 return false;
28030 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28031 consisting of the values in VALS. It is known that all elements
28032 except ONE_VAR are constants. Return true if successful. */
28034 static bool
28035 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28036 rtx target, rtx vals, int one_var)
28038 rtx var = XVECEXP (vals, 0, one_var);
28039 enum machine_mode wmode;
28040 rtx const_vec, x;
28042 const_vec = copy_rtx (vals);
28043 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28044 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28046 switch (mode)
28048 case V2DFmode:
28049 case V2DImode:
28050 case V2SFmode:
28051 case V2SImode:
28052 /* For the two element vectors, it's just as easy to use
28053 the general case. */
28054 return false;
28056 case V4DImode:
28057 /* Use ix86_expand_vector_set in 64bit mode only. */
28058 if (!TARGET_64BIT)
28059 return false;
28060 case V4DFmode:
28061 case V8SFmode:
28062 case V8SImode:
28063 case V16HImode:
28064 case V32QImode:
28065 case V4SFmode:
28066 case V4SImode:
28067 case V8HImode:
28068 case V4HImode:
28069 break;
28071 case V16QImode:
28072 if (TARGET_SSE4_1)
28073 break;
28074 wmode = V8HImode;
28075 goto widen;
28076 case V8QImode:
28077 wmode = V4HImode;
28078 goto widen;
28079 widen:
28080 /* There's no way to set one QImode entry easily. Combine
28081 the variable value with its adjacent constant value, and
28082 promote to an HImode set. */
28083 x = XVECEXP (vals, 0, one_var ^ 1);
28084 if (one_var & 1)
28086 var = convert_modes (HImode, QImode, var, true);
28087 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28088 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28089 x = GEN_INT (INTVAL (x) & 0xff);
28091 else
28093 var = convert_modes (HImode, QImode, var, true);
28094 x = gen_int_mode (INTVAL (x) << 8, HImode);
28096 if (x != const0_rtx)
28097 var = expand_simple_binop (HImode, IOR, var, x, var,
28098 1, OPTAB_LIB_WIDEN);
28100 x = gen_reg_rtx (wmode);
28101 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28102 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28104 emit_move_insn (target, gen_lowpart (mode, x));
28105 return true;
28107 default:
28108 return false;
28111 emit_move_insn (target, const_vec);
28112 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28113 return true;
28116 /* A subroutine of ix86_expand_vector_init_general. Use vector
28117 concatenate to handle the most general case: all values variable,
28118 and none identical. */
28120 static void
28121 ix86_expand_vector_init_concat (enum machine_mode mode,
28122 rtx target, rtx *ops, int n)
28124 enum machine_mode cmode, hmode = VOIDmode;
28125 rtx first[8], second[4];
28126 rtvec v;
28127 int i, j;
28129 switch (n)
28131 case 2:
28132 switch (mode)
28134 case V8SImode:
28135 cmode = V4SImode;
28136 break;
28137 case V8SFmode:
28138 cmode = V4SFmode;
28139 break;
28140 case V4DImode:
28141 cmode = V2DImode;
28142 break;
28143 case V4DFmode:
28144 cmode = V2DFmode;
28145 break;
28146 case V4SImode:
28147 cmode = V2SImode;
28148 break;
28149 case V4SFmode:
28150 cmode = V2SFmode;
28151 break;
28152 case V2DImode:
28153 cmode = DImode;
28154 break;
28155 case V2SImode:
28156 cmode = SImode;
28157 break;
28158 case V2DFmode:
28159 cmode = DFmode;
28160 break;
28161 case V2SFmode:
28162 cmode = SFmode;
28163 break;
28164 default:
28165 gcc_unreachable ();
28168 if (!register_operand (ops[1], cmode))
28169 ops[1] = force_reg (cmode, ops[1]);
28170 if (!register_operand (ops[0], cmode))
28171 ops[0] = force_reg (cmode, ops[0]);
28172 emit_insn (gen_rtx_SET (VOIDmode, target,
28173 gen_rtx_VEC_CONCAT (mode, ops[0],
28174 ops[1])));
28175 break;
28177 case 4:
28178 switch (mode)
28180 case V4DImode:
28181 cmode = V2DImode;
28182 break;
28183 case V4DFmode:
28184 cmode = V2DFmode;
28185 break;
28186 case V4SImode:
28187 cmode = V2SImode;
28188 break;
28189 case V4SFmode:
28190 cmode = V2SFmode;
28191 break;
28192 default:
28193 gcc_unreachable ();
28195 goto half;
28197 case 8:
28198 switch (mode)
28200 case V8SImode:
28201 cmode = V2SImode;
28202 hmode = V4SImode;
28203 break;
28204 case V8SFmode:
28205 cmode = V2SFmode;
28206 hmode = V4SFmode;
28207 break;
28208 default:
28209 gcc_unreachable ();
28211 goto half;
28213 half:
28214 /* FIXME: We process inputs backward to help RA. PR 36222. */
28215 i = n - 1;
28216 j = (n >> 1) - 1;
28217 for (; i > 0; i -= 2, j--)
28219 first[j] = gen_reg_rtx (cmode);
28220 v = gen_rtvec (2, ops[i - 1], ops[i]);
28221 ix86_expand_vector_init (false, first[j],
28222 gen_rtx_PARALLEL (cmode, v));
28225 n >>= 1;
28226 if (n > 2)
28228 gcc_assert (hmode != VOIDmode);
28229 for (i = j = 0; i < n; i += 2, j++)
28231 second[j] = gen_reg_rtx (hmode);
28232 ix86_expand_vector_init_concat (hmode, second [j],
28233 &first [i], 2);
28235 n >>= 1;
28236 ix86_expand_vector_init_concat (mode, target, second, n);
28238 else
28239 ix86_expand_vector_init_concat (mode, target, first, n);
28240 break;
28242 default:
28243 gcc_unreachable ();
28247 /* A subroutine of ix86_expand_vector_init_general. Use vector
28248 interleave to handle the most general case: all values variable,
28249 and none identical. */
28251 static void
28252 ix86_expand_vector_init_interleave (enum machine_mode mode,
28253 rtx target, rtx *ops, int n)
28255 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28256 int i, j;
28257 rtx op0, op1;
28258 rtx (*gen_load_even) (rtx, rtx, rtx);
28259 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28260 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28262 switch (mode)
28264 case V8HImode:
28265 gen_load_even = gen_vec_setv8hi;
28266 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28267 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28268 inner_mode = HImode;
28269 first_imode = V4SImode;
28270 second_imode = V2DImode;
28271 third_imode = VOIDmode;
28272 break;
28273 case V16QImode:
28274 gen_load_even = gen_vec_setv16qi;
28275 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28276 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28277 inner_mode = QImode;
28278 first_imode = V8HImode;
28279 second_imode = V4SImode;
28280 third_imode = V2DImode;
28281 break;
28282 default:
28283 gcc_unreachable ();
28286 for (i = 0; i < n; i++)
28288 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28289 op0 = gen_reg_rtx (SImode);
28290 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28292 /* Insert the SImode value as low element of V4SImode vector. */
28293 op1 = gen_reg_rtx (V4SImode);
28294 op0 = gen_rtx_VEC_MERGE (V4SImode,
28295 gen_rtx_VEC_DUPLICATE (V4SImode,
28296 op0),
28297 CONST0_RTX (V4SImode),
28298 const1_rtx);
28299 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28301 /* Cast the V4SImode vector back to a vector in orignal mode. */
28302 op0 = gen_reg_rtx (mode);
28303 emit_move_insn (op0, gen_lowpart (mode, op1));
28305 /* Load even elements into the second positon. */
28306 emit_insn ((*gen_load_even) (op0,
28307 force_reg (inner_mode,
28308 ops [i + i + 1]),
28309 const1_rtx));
28311 /* Cast vector to FIRST_IMODE vector. */
28312 ops[i] = gen_reg_rtx (first_imode);
28313 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28316 /* Interleave low FIRST_IMODE vectors. */
28317 for (i = j = 0; i < n; i += 2, j++)
28319 op0 = gen_reg_rtx (first_imode);
28320 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28322 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28323 ops[j] = gen_reg_rtx (second_imode);
28324 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28327 /* Interleave low SECOND_IMODE vectors. */
28328 switch (second_imode)
28330 case V4SImode:
28331 for (i = j = 0; i < n / 2; i += 2, j++)
28333 op0 = gen_reg_rtx (second_imode);
28334 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28335 ops[i + 1]));
28337 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28338 vector. */
28339 ops[j] = gen_reg_rtx (third_imode);
28340 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28342 second_imode = V2DImode;
28343 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28344 /* FALLTHRU */
28346 case V2DImode:
28347 op0 = gen_reg_rtx (second_imode);
28348 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28349 ops[1]));
28351 /* Cast the SECOND_IMODE vector back to a vector on original
28352 mode. */
28353 emit_insn (gen_rtx_SET (VOIDmode, target,
28354 gen_lowpart (mode, op0)));
28355 break;
28357 default:
28358 gcc_unreachable ();
28362 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28363 all values variable, and none identical. */
28365 static void
28366 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28367 rtx target, rtx vals)
28369 rtx ops[32], op0, op1;
28370 enum machine_mode half_mode = VOIDmode;
28371 int n, i;
28373 switch (mode)
28375 case V2SFmode:
28376 case V2SImode:
28377 if (!mmx_ok && !TARGET_SSE)
28378 break;
28379 /* FALLTHRU */
28381 case V8SFmode:
28382 case V8SImode:
28383 case V4DFmode:
28384 case V4DImode:
28385 case V4SFmode:
28386 case V4SImode:
28387 case V2DFmode:
28388 case V2DImode:
28389 n = GET_MODE_NUNITS (mode);
28390 for (i = 0; i < n; i++)
28391 ops[i] = XVECEXP (vals, 0, i);
28392 ix86_expand_vector_init_concat (mode, target, ops, n);
28393 return;
28395 case V32QImode:
28396 half_mode = V16QImode;
28397 goto half;
28399 case V16HImode:
28400 half_mode = V8HImode;
28401 goto half;
28403 half:
28404 n = GET_MODE_NUNITS (mode);
28405 for (i = 0; i < n; i++)
28406 ops[i] = XVECEXP (vals, 0, i);
28407 op0 = gen_reg_rtx (half_mode);
28408 op1 = gen_reg_rtx (half_mode);
28409 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28410 n >> 2);
28411 ix86_expand_vector_init_interleave (half_mode, op1,
28412 &ops [n >> 1], n >> 2);
28413 emit_insn (gen_rtx_SET (VOIDmode, target,
28414 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28415 return;
28417 case V16QImode:
28418 if (!TARGET_SSE4_1)
28419 break;
28420 /* FALLTHRU */
28422 case V8HImode:
28423 if (!TARGET_SSE2)
28424 break;
28426 /* Don't use ix86_expand_vector_init_interleave if we can't
28427 move from GPR to SSE register directly. */
28428 if (!TARGET_INTER_UNIT_MOVES)
28429 break;
28431 n = GET_MODE_NUNITS (mode);
28432 for (i = 0; i < n; i++)
28433 ops[i] = XVECEXP (vals, 0, i);
28434 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28435 return;
28437 case V4HImode:
28438 case V8QImode:
28439 break;
28441 default:
28442 gcc_unreachable ();
28446 int i, j, n_elts, n_words, n_elt_per_word;
28447 enum machine_mode inner_mode;
28448 rtx words[4], shift;
28450 inner_mode = GET_MODE_INNER (mode);
28451 n_elts = GET_MODE_NUNITS (mode);
28452 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28453 n_elt_per_word = n_elts / n_words;
28454 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28456 for (i = 0; i < n_words; ++i)
28458 rtx word = NULL_RTX;
28460 for (j = 0; j < n_elt_per_word; ++j)
28462 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28463 elt = convert_modes (word_mode, inner_mode, elt, true);
28465 if (j == 0)
28466 word = elt;
28467 else
28469 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28470 word, 1, OPTAB_LIB_WIDEN);
28471 word = expand_simple_binop (word_mode, IOR, word, elt,
28472 word, 1, OPTAB_LIB_WIDEN);
28476 words[i] = word;
28479 if (n_words == 1)
28480 emit_move_insn (target, gen_lowpart (mode, words[0]));
28481 else if (n_words == 2)
28483 rtx tmp = gen_reg_rtx (mode);
28484 emit_clobber (tmp);
28485 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28486 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28487 emit_move_insn (target, tmp);
28489 else if (n_words == 4)
28491 rtx tmp = gen_reg_rtx (V4SImode);
28492 gcc_assert (word_mode == SImode);
28493 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28494 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28495 emit_move_insn (target, gen_lowpart (mode, tmp));
28497 else
28498 gcc_unreachable ();
28502 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28503 instructions unless MMX_OK is true. */
28505 void
28506 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28508 enum machine_mode mode = GET_MODE (target);
28509 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28510 int n_elts = GET_MODE_NUNITS (mode);
28511 int n_var = 0, one_var = -1;
28512 bool all_same = true, all_const_zero = true;
28513 int i;
28514 rtx x;
28516 for (i = 0; i < n_elts; ++i)
28518 x = XVECEXP (vals, 0, i);
28519 if (!(CONST_INT_P (x)
28520 || GET_CODE (x) == CONST_DOUBLE
28521 || GET_CODE (x) == CONST_FIXED))
28522 n_var++, one_var = i;
28523 else if (x != CONST0_RTX (inner_mode))
28524 all_const_zero = false;
28525 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28526 all_same = false;
28529 /* Constants are best loaded from the constant pool. */
28530 if (n_var == 0)
28532 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28533 return;
28536 /* If all values are identical, broadcast the value. */
28537 if (all_same
28538 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28539 XVECEXP (vals, 0, 0)))
28540 return;
28542 /* Values where only one field is non-constant are best loaded from
28543 the pool and overwritten via move later. */
28544 if (n_var == 1)
28546 if (all_const_zero
28547 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28548 XVECEXP (vals, 0, one_var),
28549 one_var))
28550 return;
28552 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28553 return;
28556 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28559 void
28560 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28562 enum machine_mode mode = GET_MODE (target);
28563 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28564 enum machine_mode half_mode;
28565 bool use_vec_merge = false;
28566 rtx tmp;
28567 static rtx (*gen_extract[6][2]) (rtx, rtx)
28569 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28570 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28571 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28572 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28573 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28574 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28576 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28578 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28579 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28580 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28581 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28582 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28583 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28585 int i, j, n;
28587 switch (mode)
28589 case V2SFmode:
28590 case V2SImode:
28591 if (mmx_ok)
28593 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28594 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28595 if (elt == 0)
28596 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28597 else
28598 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28599 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28600 return;
28602 break;
28604 case V2DImode:
28605 use_vec_merge = TARGET_SSE4_1;
28606 if (use_vec_merge)
28607 break;
28609 case V2DFmode:
28611 rtx op0, op1;
28613 /* For the two element vectors, we implement a VEC_CONCAT with
28614 the extraction of the other element. */
28616 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28617 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28619 if (elt == 0)
28620 op0 = val, op1 = tmp;
28621 else
28622 op0 = tmp, op1 = val;
28624 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28625 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28627 return;
28629 case V4SFmode:
28630 use_vec_merge = TARGET_SSE4_1;
28631 if (use_vec_merge)
28632 break;
28634 switch (elt)
28636 case 0:
28637 use_vec_merge = true;
28638 break;
28640 case 1:
28641 /* tmp = target = A B C D */
28642 tmp = copy_to_reg (target);
28643 /* target = A A B B */
28644 emit_insn (gen_sse_unpcklps (target, target, target));
28645 /* target = X A B B */
28646 ix86_expand_vector_set (false, target, val, 0);
28647 /* target = A X C D */
28648 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28649 GEN_INT (1), GEN_INT (0),
28650 GEN_INT (2+4), GEN_INT (3+4)));
28651 return;
28653 case 2:
28654 /* tmp = target = A B C D */
28655 tmp = copy_to_reg (target);
28656 /* tmp = X B C D */
28657 ix86_expand_vector_set (false, tmp, val, 0);
28658 /* target = A B X D */
28659 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28660 GEN_INT (0), GEN_INT (1),
28661 GEN_INT (0+4), GEN_INT (3+4)));
28662 return;
28664 case 3:
28665 /* tmp = target = A B C D */
28666 tmp = copy_to_reg (target);
28667 /* tmp = X B C D */
28668 ix86_expand_vector_set (false, tmp, val, 0);
28669 /* target = A B X D */
28670 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28671 GEN_INT (0), GEN_INT (1),
28672 GEN_INT (2+4), GEN_INT (0+4)));
28673 return;
28675 default:
28676 gcc_unreachable ();
28678 break;
28680 case V4SImode:
28681 use_vec_merge = TARGET_SSE4_1;
28682 if (use_vec_merge)
28683 break;
28685 /* Element 0 handled by vec_merge below. */
28686 if (elt == 0)
28688 use_vec_merge = true;
28689 break;
28692 if (TARGET_SSE2)
28694 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28695 store into element 0, then shuffle them back. */
28697 rtx order[4];
28699 order[0] = GEN_INT (elt);
28700 order[1] = const1_rtx;
28701 order[2] = const2_rtx;
28702 order[3] = GEN_INT (3);
28703 order[elt] = const0_rtx;
28705 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28706 order[1], order[2], order[3]));
28708 ix86_expand_vector_set (false, target, val, 0);
28710 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28711 order[1], order[2], order[3]));
28713 else
28715 /* For SSE1, we have to reuse the V4SF code. */
28716 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28717 gen_lowpart (SFmode, val), elt);
28719 return;
28721 case V8HImode:
28722 use_vec_merge = TARGET_SSE2;
28723 break;
28724 case V4HImode:
28725 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28726 break;
28728 case V16QImode:
28729 use_vec_merge = TARGET_SSE4_1;
28730 break;
28732 case V8QImode:
28733 break;
28735 case V32QImode:
28736 half_mode = V16QImode;
28737 j = 0;
28738 n = 16;
28739 goto half;
28741 case V16HImode:
28742 half_mode = V8HImode;
28743 j = 1;
28744 n = 8;
28745 goto half;
28747 case V8SImode:
28748 half_mode = V4SImode;
28749 j = 2;
28750 n = 4;
28751 goto half;
28753 case V4DImode:
28754 half_mode = V2DImode;
28755 j = 3;
28756 n = 2;
28757 goto half;
28759 case V8SFmode:
28760 half_mode = V4SFmode;
28761 j = 4;
28762 n = 4;
28763 goto half;
28765 case V4DFmode:
28766 half_mode = V2DFmode;
28767 j = 5;
28768 n = 2;
28769 goto half;
28771 half:
28772 /* Compute offset. */
28773 i = elt / n;
28774 elt %= n;
28776 gcc_assert (i <= 1);
28778 /* Extract the half. */
28779 tmp = gen_reg_rtx (half_mode);
28780 emit_insn ((*gen_extract[j][i]) (tmp, target));
28782 /* Put val in tmp at elt. */
28783 ix86_expand_vector_set (false, tmp, val, elt);
28785 /* Put it back. */
28786 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28787 return;
28789 default:
28790 break;
28793 if (use_vec_merge)
28795 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28796 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28797 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28799 else
28801 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28803 emit_move_insn (mem, target);
28805 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28806 emit_move_insn (tmp, val);
28808 emit_move_insn (target, mem);
28812 void
28813 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28815 enum machine_mode mode = GET_MODE (vec);
28816 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28817 bool use_vec_extr = false;
28818 rtx tmp;
28820 switch (mode)
28822 case V2SImode:
28823 case V2SFmode:
28824 if (!mmx_ok)
28825 break;
28826 /* FALLTHRU */
28828 case V2DFmode:
28829 case V2DImode:
28830 use_vec_extr = true;
28831 break;
28833 case V4SFmode:
28834 use_vec_extr = TARGET_SSE4_1;
28835 if (use_vec_extr)
28836 break;
28838 switch (elt)
28840 case 0:
28841 tmp = vec;
28842 break;
28844 case 1:
28845 case 3:
28846 tmp = gen_reg_rtx (mode);
28847 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28848 GEN_INT (elt), GEN_INT (elt),
28849 GEN_INT (elt+4), GEN_INT (elt+4)));
28850 break;
28852 case 2:
28853 tmp = gen_reg_rtx (mode);
28854 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28855 break;
28857 default:
28858 gcc_unreachable ();
28860 vec = tmp;
28861 use_vec_extr = true;
28862 elt = 0;
28863 break;
28865 case V4SImode:
28866 use_vec_extr = TARGET_SSE4_1;
28867 if (use_vec_extr)
28868 break;
28870 if (TARGET_SSE2)
28872 switch (elt)
28874 case 0:
28875 tmp = vec;
28876 break;
28878 case 1:
28879 case 3:
28880 tmp = gen_reg_rtx (mode);
28881 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28882 GEN_INT (elt), GEN_INT (elt),
28883 GEN_INT (elt), GEN_INT (elt)));
28884 break;
28886 case 2:
28887 tmp = gen_reg_rtx (mode);
28888 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28889 break;
28891 default:
28892 gcc_unreachable ();
28894 vec = tmp;
28895 use_vec_extr = true;
28896 elt = 0;
28898 else
28900 /* For SSE1, we have to reuse the V4SF code. */
28901 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28902 gen_lowpart (V4SFmode, vec), elt);
28903 return;
28905 break;
28907 case V8HImode:
28908 use_vec_extr = TARGET_SSE2;
28909 break;
28910 case V4HImode:
28911 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28912 break;
28914 case V16QImode:
28915 use_vec_extr = TARGET_SSE4_1;
28916 break;
28918 case V8QImode:
28919 /* ??? Could extract the appropriate HImode element and shift. */
28920 default:
28921 break;
28924 if (use_vec_extr)
28926 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28927 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28929 /* Let the rtl optimizers know about the zero extension performed. */
28930 if (inner_mode == QImode || inner_mode == HImode)
28932 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28933 target = gen_lowpart (SImode, target);
28936 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28938 else
28940 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28942 emit_move_insn (mem, vec);
28944 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28945 emit_move_insn (target, tmp);
28949 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28950 pattern to reduce; DEST is the destination; IN is the input vector. */
28952 void
28953 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28955 rtx tmp1, tmp2, tmp3;
28957 tmp1 = gen_reg_rtx (V4SFmode);
28958 tmp2 = gen_reg_rtx (V4SFmode);
28959 tmp3 = gen_reg_rtx (V4SFmode);
28961 emit_insn (gen_sse_movhlps (tmp1, in, in));
28962 emit_insn (fn (tmp2, tmp1, in));
28964 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28965 GEN_INT (1), GEN_INT (1),
28966 GEN_INT (1+4), GEN_INT (1+4)));
28967 emit_insn (fn (dest, tmp2, tmp3));
28970 /* Target hook for scalar_mode_supported_p. */
28971 static bool
28972 ix86_scalar_mode_supported_p (enum machine_mode mode)
28974 if (DECIMAL_FLOAT_MODE_P (mode))
28975 return true;
28976 else if (mode == TFmode)
28977 return true;
28978 else
28979 return default_scalar_mode_supported_p (mode);
28982 /* Implements target hook vector_mode_supported_p. */
28983 static bool
28984 ix86_vector_mode_supported_p (enum machine_mode mode)
28986 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28987 return true;
28988 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28989 return true;
28990 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28991 return true;
28992 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28993 return true;
28994 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28995 return true;
28996 return false;
28999 /* Target hook for c_mode_for_suffix. */
29000 static enum machine_mode
29001 ix86_c_mode_for_suffix (char suffix)
29003 if (suffix == 'q')
29004 return TFmode;
29005 if (suffix == 'w')
29006 return XFmode;
29008 return VOIDmode;
29011 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29013 We do this in the new i386 backend to maintain source compatibility
29014 with the old cc0-based compiler. */
29016 static tree
29017 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29018 tree inputs ATTRIBUTE_UNUSED,
29019 tree clobbers)
29021 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29022 clobbers);
29023 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29024 clobbers);
29025 return clobbers;
29028 /* Implements target vector targetm.asm.encode_section_info. This
29029 is not used by netware. */
29031 static void ATTRIBUTE_UNUSED
29032 ix86_encode_section_info (tree decl, rtx rtl, int first)
29034 default_encode_section_info (decl, rtl, first);
29036 if (TREE_CODE (decl) == VAR_DECL
29037 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29038 && ix86_in_large_data_p (decl))
29039 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29042 /* Worker function for REVERSE_CONDITION. */
29044 enum rtx_code
29045 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29047 return (mode != CCFPmode && mode != CCFPUmode
29048 ? reverse_condition (code)
29049 : reverse_condition_maybe_unordered (code));
29052 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29053 to OPERANDS[0]. */
29055 const char *
29056 output_387_reg_move (rtx insn, rtx *operands)
29058 if (REG_P (operands[0]))
29060 if (REG_P (operands[1])
29061 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29063 if (REGNO (operands[0]) == FIRST_STACK_REG)
29064 return output_387_ffreep (operands, 0);
29065 return "fstp\t%y0";
29067 if (STACK_TOP_P (operands[0]))
29068 return "fld%Z1\t%y1";
29069 return "fst\t%y0";
29071 else if (MEM_P (operands[0]))
29073 gcc_assert (REG_P (operands[1]));
29074 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29075 return "fstp%Z0\t%y0";
29076 else
29078 /* There is no non-popping store to memory for XFmode.
29079 So if we need one, follow the store with a load. */
29080 if (GET_MODE (operands[0]) == XFmode)
29081 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29082 else
29083 return "fst%Z0\t%y0";
29086 else
29087 gcc_unreachable();
29090 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29091 FP status register is set. */
29093 void
29094 ix86_emit_fp_unordered_jump (rtx label)
29096 rtx reg = gen_reg_rtx (HImode);
29097 rtx temp;
29099 emit_insn (gen_x86_fnstsw_1 (reg));
29101 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29103 emit_insn (gen_x86_sahf_1 (reg));
29105 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29106 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29108 else
29110 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29112 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29113 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29116 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29117 gen_rtx_LABEL_REF (VOIDmode, label),
29118 pc_rtx);
29119 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29121 emit_jump_insn (temp);
29122 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29125 /* Output code to perform a log1p XFmode calculation. */
29127 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29129 rtx label1 = gen_label_rtx ();
29130 rtx label2 = gen_label_rtx ();
29132 rtx tmp = gen_reg_rtx (XFmode);
29133 rtx tmp2 = gen_reg_rtx (XFmode);
29134 rtx test;
29136 emit_insn (gen_absxf2 (tmp, op1));
29137 test = gen_rtx_GE (VOIDmode, tmp,
29138 CONST_DOUBLE_FROM_REAL_VALUE (
29139 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29140 XFmode));
29141 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29143 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29144 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29145 emit_jump (label2);
29147 emit_label (label1);
29148 emit_move_insn (tmp, CONST1_RTX (XFmode));
29149 emit_insn (gen_addxf3 (tmp, op1, tmp));
29150 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29151 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29153 emit_label (label2);
29156 /* Output code to perform a Newton-Rhapson approximation of a single precision
29157 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29159 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29161 rtx x0, x1, e0, e1, two;
29163 x0 = gen_reg_rtx (mode);
29164 e0 = gen_reg_rtx (mode);
29165 e1 = gen_reg_rtx (mode);
29166 x1 = gen_reg_rtx (mode);
29168 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29170 if (VECTOR_MODE_P (mode))
29171 two = ix86_build_const_vector (SFmode, true, two);
29173 two = force_reg (mode, two);
29175 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29177 /* x0 = rcp(b) estimate */
29178 emit_insn (gen_rtx_SET (VOIDmode, x0,
29179 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29180 UNSPEC_RCP)));
29181 /* e0 = x0 * b */
29182 emit_insn (gen_rtx_SET (VOIDmode, e0,
29183 gen_rtx_MULT (mode, x0, b)));
29184 /* e1 = 2. - e0 */
29185 emit_insn (gen_rtx_SET (VOIDmode, e1,
29186 gen_rtx_MINUS (mode, two, e0)));
29187 /* x1 = x0 * e1 */
29188 emit_insn (gen_rtx_SET (VOIDmode, x1,
29189 gen_rtx_MULT (mode, x0, e1)));
29190 /* res = a * x1 */
29191 emit_insn (gen_rtx_SET (VOIDmode, res,
29192 gen_rtx_MULT (mode, a, x1)));
29195 /* Output code to perform a Newton-Rhapson approximation of a
29196 single precision floating point [reciprocal] square root. */
29198 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29199 bool recip)
29201 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29202 REAL_VALUE_TYPE r;
29204 x0 = gen_reg_rtx (mode);
29205 e0 = gen_reg_rtx (mode);
29206 e1 = gen_reg_rtx (mode);
29207 e2 = gen_reg_rtx (mode);
29208 e3 = gen_reg_rtx (mode);
29210 real_from_integer (&r, VOIDmode, -3, -1, 0);
29211 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29213 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29214 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29216 if (VECTOR_MODE_P (mode))
29218 mthree = ix86_build_const_vector (SFmode, true, mthree);
29219 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29222 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29223 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29225 /* x0 = rsqrt(a) estimate */
29226 emit_insn (gen_rtx_SET (VOIDmode, x0,
29227 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29228 UNSPEC_RSQRT)));
29230 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29231 if (!recip)
29233 rtx zero, mask;
29235 zero = gen_reg_rtx (mode);
29236 mask = gen_reg_rtx (mode);
29238 zero = force_reg (mode, CONST0_RTX(mode));
29239 emit_insn (gen_rtx_SET (VOIDmode, mask,
29240 gen_rtx_NE (mode, zero, a)));
29242 emit_insn (gen_rtx_SET (VOIDmode, x0,
29243 gen_rtx_AND (mode, x0, mask)));
29246 /* e0 = x0 * a */
29247 emit_insn (gen_rtx_SET (VOIDmode, e0,
29248 gen_rtx_MULT (mode, x0, a)));
29249 /* e1 = e0 * x0 */
29250 emit_insn (gen_rtx_SET (VOIDmode, e1,
29251 gen_rtx_MULT (mode, e0, x0)));
29253 /* e2 = e1 - 3. */
29254 mthree = force_reg (mode, mthree);
29255 emit_insn (gen_rtx_SET (VOIDmode, e2,
29256 gen_rtx_PLUS (mode, e1, mthree)));
29258 mhalf = force_reg (mode, mhalf);
29259 if (recip)
29260 /* e3 = -.5 * x0 */
29261 emit_insn (gen_rtx_SET (VOIDmode, e3,
29262 gen_rtx_MULT (mode, x0, mhalf)));
29263 else
29264 /* e3 = -.5 * e0 */
29265 emit_insn (gen_rtx_SET (VOIDmode, e3,
29266 gen_rtx_MULT (mode, e0, mhalf)));
29267 /* ret = e2 * e3 */
29268 emit_insn (gen_rtx_SET (VOIDmode, res,
29269 gen_rtx_MULT (mode, e2, e3)));
29272 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29274 static void ATTRIBUTE_UNUSED
29275 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29276 tree decl)
29278 /* With Binutils 2.15, the "@unwind" marker must be specified on
29279 every occurrence of the ".eh_frame" section, not just the first
29280 one. */
29281 if (TARGET_64BIT
29282 && strcmp (name, ".eh_frame") == 0)
29284 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29285 flags & SECTION_WRITE ? "aw" : "a");
29286 return;
29288 default_elf_asm_named_section (name, flags, decl);
29291 /* Return the mangling of TYPE if it is an extended fundamental type. */
29293 static const char *
29294 ix86_mangle_type (const_tree type)
29296 type = TYPE_MAIN_VARIANT (type);
29298 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29299 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29300 return NULL;
29302 switch (TYPE_MODE (type))
29304 case TFmode:
29305 /* __float128 is "g". */
29306 return "g";
29307 case XFmode:
29308 /* "long double" or __float80 is "e". */
29309 return "e";
29310 default:
29311 return NULL;
29315 /* For 32-bit code we can save PIC register setup by using
29316 __stack_chk_fail_local hidden function instead of calling
29317 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29318 register, so it is better to call __stack_chk_fail directly. */
29320 static tree
29321 ix86_stack_protect_fail (void)
29323 return TARGET_64BIT
29324 ? default_external_stack_protect_fail ()
29325 : default_hidden_stack_protect_fail ();
29328 /* Select a format to encode pointers in exception handling data. CODE
29329 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29330 true if the symbol may be affected by dynamic relocations.
29332 ??? All x86 object file formats are capable of representing this.
29333 After all, the relocation needed is the same as for the call insn.
29334 Whether or not a particular assembler allows us to enter such, I
29335 guess we'll have to see. */
29337 asm_preferred_eh_data_format (int code, int global)
29339 if (flag_pic)
29341 int type = DW_EH_PE_sdata8;
29342 if (!TARGET_64BIT
29343 || ix86_cmodel == CM_SMALL_PIC
29344 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29345 type = DW_EH_PE_sdata4;
29346 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29348 if (ix86_cmodel == CM_SMALL
29349 || (ix86_cmodel == CM_MEDIUM && code))
29350 return DW_EH_PE_udata4;
29351 return DW_EH_PE_absptr;
29354 /* Expand copysign from SIGN to the positive value ABS_VALUE
29355 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29356 the sign-bit. */
29357 static void
29358 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29360 enum machine_mode mode = GET_MODE (sign);
29361 rtx sgn = gen_reg_rtx (mode);
29362 if (mask == NULL_RTX)
29364 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29365 if (!VECTOR_MODE_P (mode))
29367 /* We need to generate a scalar mode mask in this case. */
29368 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29369 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29370 mask = gen_reg_rtx (mode);
29371 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29374 else
29375 mask = gen_rtx_NOT (mode, mask);
29376 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29377 gen_rtx_AND (mode, mask, sign)));
29378 emit_insn (gen_rtx_SET (VOIDmode, result,
29379 gen_rtx_IOR (mode, abs_value, sgn)));
29382 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29383 mask for masking out the sign-bit is stored in *SMASK, if that is
29384 non-null. */
29385 static rtx
29386 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29388 enum machine_mode mode = GET_MODE (op0);
29389 rtx xa, mask;
29391 xa = gen_reg_rtx (mode);
29392 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29393 if (!VECTOR_MODE_P (mode))
29395 /* We need to generate a scalar mode mask in this case. */
29396 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29397 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29398 mask = gen_reg_rtx (mode);
29399 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29401 emit_insn (gen_rtx_SET (VOIDmode, xa,
29402 gen_rtx_AND (mode, op0, mask)));
29404 if (smask)
29405 *smask = mask;
29407 return xa;
29410 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29411 swapping the operands if SWAP_OPERANDS is true. The expanded
29412 code is a forward jump to a newly created label in case the
29413 comparison is true. The generated label rtx is returned. */
29414 static rtx
29415 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29416 bool swap_operands)
29418 rtx label, tmp;
29420 if (swap_operands)
29422 tmp = op0;
29423 op0 = op1;
29424 op1 = tmp;
29427 label = gen_label_rtx ();
29428 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29429 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29430 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29431 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29432 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29433 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29434 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29435 JUMP_LABEL (tmp) = label;
29437 return label;
29440 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29441 using comparison code CODE. Operands are swapped for the comparison if
29442 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29443 static rtx
29444 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29445 bool swap_operands)
29447 enum machine_mode mode = GET_MODE (op0);
29448 rtx mask = gen_reg_rtx (mode);
29450 if (swap_operands)
29452 rtx tmp = op0;
29453 op0 = op1;
29454 op1 = tmp;
29457 if (mode == DFmode)
29458 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29459 gen_rtx_fmt_ee (code, mode, op0, op1)));
29460 else
29461 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29462 gen_rtx_fmt_ee (code, mode, op0, op1)));
29464 return mask;
29467 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29468 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29469 static rtx
29470 ix86_gen_TWO52 (enum machine_mode mode)
29472 REAL_VALUE_TYPE TWO52r;
29473 rtx TWO52;
29475 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29476 TWO52 = const_double_from_real_value (TWO52r, mode);
29477 TWO52 = force_reg (mode, TWO52);
29479 return TWO52;
29482 /* Expand SSE sequence for computing lround from OP1 storing
29483 into OP0. */
29484 void
29485 ix86_expand_lround (rtx op0, rtx op1)
29487 /* C code for the stuff we're doing below:
29488 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29489 return (long)tmp;
29491 enum machine_mode mode = GET_MODE (op1);
29492 const struct real_format *fmt;
29493 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29494 rtx adj;
29496 /* load nextafter (0.5, 0.0) */
29497 fmt = REAL_MODE_FORMAT (mode);
29498 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29499 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29501 /* adj = copysign (0.5, op1) */
29502 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29503 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29505 /* adj = op1 + adj */
29506 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29508 /* op0 = (imode)adj */
29509 expand_fix (op0, adj, 0);
29512 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29513 into OPERAND0. */
29514 void
29515 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29517 /* C code for the stuff we're doing below (for do_floor):
29518 xi = (long)op1;
29519 xi -= (double)xi > op1 ? 1 : 0;
29520 return xi;
29522 enum machine_mode fmode = GET_MODE (op1);
29523 enum machine_mode imode = GET_MODE (op0);
29524 rtx ireg, freg, label, tmp;
29526 /* reg = (long)op1 */
29527 ireg = gen_reg_rtx (imode);
29528 expand_fix (ireg, op1, 0);
29530 /* freg = (double)reg */
29531 freg = gen_reg_rtx (fmode);
29532 expand_float (freg, ireg, 0);
29534 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29535 label = ix86_expand_sse_compare_and_jump (UNLE,
29536 freg, op1, !do_floor);
29537 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29538 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29539 emit_move_insn (ireg, tmp);
29541 emit_label (label);
29542 LABEL_NUSES (label) = 1;
29544 emit_move_insn (op0, ireg);
29547 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29548 result in OPERAND0. */
29549 void
29550 ix86_expand_rint (rtx operand0, rtx operand1)
29552 /* C code for the stuff we're doing below:
29553 xa = fabs (operand1);
29554 if (!isless (xa, 2**52))
29555 return operand1;
29556 xa = xa + 2**52 - 2**52;
29557 return copysign (xa, operand1);
29559 enum machine_mode mode = GET_MODE (operand0);
29560 rtx res, xa, label, TWO52, mask;
29562 res = gen_reg_rtx (mode);
29563 emit_move_insn (res, operand1);
29565 /* xa = abs (operand1) */
29566 xa = ix86_expand_sse_fabs (res, &mask);
29568 /* if (!isless (xa, TWO52)) goto label; */
29569 TWO52 = ix86_gen_TWO52 (mode);
29570 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29572 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29573 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29575 ix86_sse_copysign_to_positive (res, xa, res, mask);
29577 emit_label (label);
29578 LABEL_NUSES (label) = 1;
29580 emit_move_insn (operand0, res);
29583 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29584 into OPERAND0. */
29585 void
29586 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29588 /* C code for the stuff we expand below.
29589 double xa = fabs (x), x2;
29590 if (!isless (xa, TWO52))
29591 return x;
29592 xa = xa + TWO52 - TWO52;
29593 x2 = copysign (xa, x);
29594 Compensate. Floor:
29595 if (x2 > x)
29596 x2 -= 1;
29597 Compensate. Ceil:
29598 if (x2 < x)
29599 x2 -= -1;
29600 return x2;
29602 enum machine_mode mode = GET_MODE (operand0);
29603 rtx xa, TWO52, tmp, label, one, res, mask;
29605 TWO52 = ix86_gen_TWO52 (mode);
29607 /* Temporary for holding the result, initialized to the input
29608 operand to ease control flow. */
29609 res = gen_reg_rtx (mode);
29610 emit_move_insn (res, operand1);
29612 /* xa = abs (operand1) */
29613 xa = ix86_expand_sse_fabs (res, &mask);
29615 /* if (!isless (xa, TWO52)) goto label; */
29616 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29618 /* xa = xa + TWO52 - TWO52; */
29619 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29620 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29622 /* xa = copysign (xa, operand1) */
29623 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29625 /* generate 1.0 or -1.0 */
29626 one = force_reg (mode,
29627 const_double_from_real_value (do_floor
29628 ? dconst1 : dconstm1, mode));
29630 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29631 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29632 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29633 gen_rtx_AND (mode, one, tmp)));
29634 /* We always need to subtract here to preserve signed zero. */
29635 tmp = expand_simple_binop (mode, MINUS,
29636 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29637 emit_move_insn (res, tmp);
29639 emit_label (label);
29640 LABEL_NUSES (label) = 1;
29642 emit_move_insn (operand0, res);
29645 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29646 into OPERAND0. */
29647 void
29648 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29650 /* C code for the stuff we expand below.
29651 double xa = fabs (x), x2;
29652 if (!isless (xa, TWO52))
29653 return x;
29654 x2 = (double)(long)x;
29655 Compensate. Floor:
29656 if (x2 > x)
29657 x2 -= 1;
29658 Compensate. Ceil:
29659 if (x2 < x)
29660 x2 += 1;
29661 if (HONOR_SIGNED_ZEROS (mode))
29662 return copysign (x2, x);
29663 return x2;
29665 enum machine_mode mode = GET_MODE (operand0);
29666 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29668 TWO52 = ix86_gen_TWO52 (mode);
29670 /* Temporary for holding the result, initialized to the input
29671 operand to ease control flow. */
29672 res = gen_reg_rtx (mode);
29673 emit_move_insn (res, operand1);
29675 /* xa = abs (operand1) */
29676 xa = ix86_expand_sse_fabs (res, &mask);
29678 /* if (!isless (xa, TWO52)) goto label; */
29679 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29681 /* xa = (double)(long)x */
29682 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29683 expand_fix (xi, res, 0);
29684 expand_float (xa, xi, 0);
29686 /* generate 1.0 */
29687 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29689 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29690 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29691 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29692 gen_rtx_AND (mode, one, tmp)));
29693 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29694 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29695 emit_move_insn (res, tmp);
29697 if (HONOR_SIGNED_ZEROS (mode))
29698 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29700 emit_label (label);
29701 LABEL_NUSES (label) = 1;
29703 emit_move_insn (operand0, res);
29706 /* Expand SSE sequence for computing round from OPERAND1 storing
29707 into OPERAND0. Sequence that works without relying on DImode truncation
29708 via cvttsd2siq that is only available on 64bit targets. */
29709 void
29710 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29712 /* C code for the stuff we expand below.
29713 double xa = fabs (x), xa2, x2;
29714 if (!isless (xa, TWO52))
29715 return x;
29716 Using the absolute value and copying back sign makes
29717 -0.0 -> -0.0 correct.
29718 xa2 = xa + TWO52 - TWO52;
29719 Compensate.
29720 dxa = xa2 - xa;
29721 if (dxa <= -0.5)
29722 xa2 += 1;
29723 else if (dxa > 0.5)
29724 xa2 -= 1;
29725 x2 = copysign (xa2, x);
29726 return x2;
29728 enum machine_mode mode = GET_MODE (operand0);
29729 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29731 TWO52 = ix86_gen_TWO52 (mode);
29733 /* Temporary for holding the result, initialized to the input
29734 operand to ease control flow. */
29735 res = gen_reg_rtx (mode);
29736 emit_move_insn (res, operand1);
29738 /* xa = abs (operand1) */
29739 xa = ix86_expand_sse_fabs (res, &mask);
29741 /* if (!isless (xa, TWO52)) goto label; */
29742 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29744 /* xa2 = xa + TWO52 - TWO52; */
29745 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29746 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29748 /* dxa = xa2 - xa; */
29749 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29751 /* generate 0.5, 1.0 and -0.5 */
29752 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29753 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29754 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29755 0, OPTAB_DIRECT);
29757 /* Compensate. */
29758 tmp = gen_reg_rtx (mode);
29759 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29760 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29761 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29762 gen_rtx_AND (mode, one, tmp)));
29763 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29764 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29765 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29766 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29767 gen_rtx_AND (mode, one, tmp)));
29768 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29770 /* res = copysign (xa2, operand1) */
29771 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29773 emit_label (label);
29774 LABEL_NUSES (label) = 1;
29776 emit_move_insn (operand0, res);
29779 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29780 into OPERAND0. */
29781 void
29782 ix86_expand_trunc (rtx operand0, rtx operand1)
29784 /* C code for SSE variant we expand below.
29785 double xa = fabs (x), x2;
29786 if (!isless (xa, TWO52))
29787 return x;
29788 x2 = (double)(long)x;
29789 if (HONOR_SIGNED_ZEROS (mode))
29790 return copysign (x2, x);
29791 return x2;
29793 enum machine_mode mode = GET_MODE (operand0);
29794 rtx xa, xi, TWO52, label, res, mask;
29796 TWO52 = ix86_gen_TWO52 (mode);
29798 /* Temporary for holding the result, initialized to the input
29799 operand to ease control flow. */
29800 res = gen_reg_rtx (mode);
29801 emit_move_insn (res, operand1);
29803 /* xa = abs (operand1) */
29804 xa = ix86_expand_sse_fabs (res, &mask);
29806 /* if (!isless (xa, TWO52)) goto label; */
29807 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29809 /* x = (double)(long)x */
29810 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29811 expand_fix (xi, res, 0);
29812 expand_float (res, xi, 0);
29814 if (HONOR_SIGNED_ZEROS (mode))
29815 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29817 emit_label (label);
29818 LABEL_NUSES (label) = 1;
29820 emit_move_insn (operand0, res);
29823 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29824 into OPERAND0. */
29825 void
29826 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29828 enum machine_mode mode = GET_MODE (operand0);
29829 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29831 /* C code for SSE variant we expand below.
29832 double xa = fabs (x), x2;
29833 if (!isless (xa, TWO52))
29834 return x;
29835 xa2 = xa + TWO52 - TWO52;
29836 Compensate:
29837 if (xa2 > xa)
29838 xa2 -= 1.0;
29839 x2 = copysign (xa2, x);
29840 return x2;
29843 TWO52 = ix86_gen_TWO52 (mode);
29845 /* Temporary for holding the result, initialized to the input
29846 operand to ease control flow. */
29847 res = gen_reg_rtx (mode);
29848 emit_move_insn (res, operand1);
29850 /* xa = abs (operand1) */
29851 xa = ix86_expand_sse_fabs (res, &smask);
29853 /* if (!isless (xa, TWO52)) goto label; */
29854 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29856 /* res = xa + TWO52 - TWO52; */
29857 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29858 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29859 emit_move_insn (res, tmp);
29861 /* generate 1.0 */
29862 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29864 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29865 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29866 emit_insn (gen_rtx_SET (VOIDmode, mask,
29867 gen_rtx_AND (mode, mask, one)));
29868 tmp = expand_simple_binop (mode, MINUS,
29869 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29870 emit_move_insn (res, tmp);
29872 /* res = copysign (res, operand1) */
29873 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29875 emit_label (label);
29876 LABEL_NUSES (label) = 1;
29878 emit_move_insn (operand0, res);
29881 /* Expand SSE sequence for computing round from OPERAND1 storing
29882 into OPERAND0. */
29883 void
29884 ix86_expand_round (rtx operand0, rtx operand1)
29886 /* C code for the stuff we're doing below:
29887 double xa = fabs (x);
29888 if (!isless (xa, TWO52))
29889 return x;
29890 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29891 return copysign (xa, x);
29893 enum machine_mode mode = GET_MODE (operand0);
29894 rtx res, TWO52, xa, label, xi, half, mask;
29895 const struct real_format *fmt;
29896 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29898 /* Temporary for holding the result, initialized to the input
29899 operand to ease control flow. */
29900 res = gen_reg_rtx (mode);
29901 emit_move_insn (res, operand1);
29903 TWO52 = ix86_gen_TWO52 (mode);
29904 xa = ix86_expand_sse_fabs (res, &mask);
29905 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29907 /* load nextafter (0.5, 0.0) */
29908 fmt = REAL_MODE_FORMAT (mode);
29909 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29910 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29912 /* xa = xa + 0.5 */
29913 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29914 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29916 /* xa = (double)(int64_t)xa */
29917 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29918 expand_fix (xi, xa, 0);
29919 expand_float (xa, xi, 0);
29921 /* res = copysign (xa, operand1) */
29922 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29924 emit_label (label);
29925 LABEL_NUSES (label) = 1;
29927 emit_move_insn (operand0, res);
29931 /* Validate whether a SSE5 instruction is valid or not.
29932 OPERANDS is the array of operands.
29933 NUM is the number of operands.
29934 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29935 NUM_MEMORY is the maximum number of memory operands to accept.
29936 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29938 bool
29939 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29940 bool uses_oc0, int num_memory, bool commutative)
29942 int mem_mask;
29943 int mem_count;
29944 int i;
29946 /* Count the number of memory arguments */
29947 mem_mask = 0;
29948 mem_count = 0;
29949 for (i = 0; i < num; i++)
29951 enum machine_mode mode = GET_MODE (operands[i]);
29952 if (register_operand (operands[i], mode))
29955 else if (memory_operand (operands[i], mode))
29957 mem_mask |= (1 << i);
29958 mem_count++;
29961 else
29963 rtx pattern = PATTERN (insn);
29965 /* allow 0 for pcmov */
29966 if (GET_CODE (pattern) != SET
29967 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29968 || i < 2
29969 || operands[i] != CONST0_RTX (mode))
29970 return false;
29974 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29975 a memory operation. */
29976 if (num_memory < 0)
29978 num_memory = -num_memory;
29979 if ((mem_mask & (1 << (num-1))) != 0)
29981 mem_mask &= ~(1 << (num-1));
29982 mem_count--;
29986 /* If there were no memory operations, allow the insn */
29987 if (mem_mask == 0)
29988 return true;
29990 /* Do not allow the destination register to be a memory operand. */
29991 else if (mem_mask & (1 << 0))
29992 return false;
29994 /* If there are too many memory operations, disallow the instruction. While
29995 the hardware only allows 1 memory reference, before register allocation
29996 for some insns, we allow two memory operations sometimes in order to allow
29997 code like the following to be optimized:
29999 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
30001 or similar cases that are vectorized into using the fmaddss
30002 instruction. */
30003 else if (mem_count > num_memory)
30004 return false;
30006 /* Don't allow more than one memory operation if not optimizing. */
30007 else if (mem_count > 1 && !optimize)
30008 return false;
30010 else if (num == 4 && mem_count == 1)
30012 /* formats (destination is the first argument), example fmaddss:
30013 xmm1, xmm1, xmm2, xmm3/mem
30014 xmm1, xmm1, xmm2/mem, xmm3
30015 xmm1, xmm2, xmm3/mem, xmm1
30016 xmm1, xmm2/mem, xmm3, xmm1 */
30017 if (uses_oc0)
30018 return ((mem_mask == (1 << 1))
30019 || (mem_mask == (1 << 2))
30020 || (mem_mask == (1 << 3)));
30022 /* format, example pmacsdd:
30023 xmm1, xmm2, xmm3/mem, xmm1 */
30024 if (commutative)
30025 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
30026 else
30027 return (mem_mask == (1 << 2));
30030 else if (num == 4 && num_memory == 2)
30032 /* If there are two memory operations, we can load one of the memory ops
30033 into the destination register. This is for optimizing the
30034 multiply/add ops, which the combiner has optimized both the multiply
30035 and the add insns to have a memory operation. We have to be careful
30036 that the destination doesn't overlap with the inputs. */
30037 rtx op0 = operands[0];
30039 if (reg_mentioned_p (op0, operands[1])
30040 || reg_mentioned_p (op0, operands[2])
30041 || reg_mentioned_p (op0, operands[3]))
30042 return false;
30044 /* formats (destination is the first argument), example fmaddss:
30045 xmm1, xmm1, xmm2, xmm3/mem
30046 xmm1, xmm1, xmm2/mem, xmm3
30047 xmm1, xmm2, xmm3/mem, xmm1
30048 xmm1, xmm2/mem, xmm3, xmm1
30050 For the oc0 case, we will load either operands[1] or operands[3] into
30051 operands[0], so any combination of 2 memory operands is ok. */
30052 if (uses_oc0)
30053 return true;
30055 /* format, example pmacsdd:
30056 xmm1, xmm2, xmm3/mem, xmm1
30058 For the integer multiply/add instructions be more restrictive and
30059 require operands[2] and operands[3] to be the memory operands. */
30060 if (commutative)
30061 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
30062 else
30063 return (mem_mask == ((1 << 2) | (1 << 3)));
30066 else if (num == 3 && num_memory == 1)
30068 /* formats, example protb:
30069 xmm1, xmm2, xmm3/mem
30070 xmm1, xmm2/mem, xmm3 */
30071 if (uses_oc0)
30072 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
30074 /* format, example comeq:
30075 xmm1, xmm2, xmm3/mem */
30076 else
30077 return (mem_mask == (1 << 2));
30080 else
30081 gcc_unreachable ();
30083 return false;
30087 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
30088 hardware will allow by using the destination register to load one of the
30089 memory operations. Presently this is used by the multiply/add routines to
30090 allow 2 memory references. */
30092 void
30093 ix86_expand_sse5_multiple_memory (rtx operands[],
30094 int num,
30095 enum machine_mode mode)
30097 rtx op0 = operands[0];
30098 if (num != 4
30099 || memory_operand (op0, mode)
30100 || reg_mentioned_p (op0, operands[1])
30101 || reg_mentioned_p (op0, operands[2])
30102 || reg_mentioned_p (op0, operands[3]))
30103 gcc_unreachable ();
30105 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
30106 the destination register. */
30107 if (memory_operand (operands[1], mode))
30109 emit_move_insn (op0, operands[1]);
30110 operands[1] = op0;
30112 else if (memory_operand (operands[3], mode))
30114 emit_move_insn (op0, operands[3]);
30115 operands[3] = op0;
30117 else
30118 gcc_unreachable ();
30120 return;
30124 /* Table of valid machine attributes. */
30125 static const struct attribute_spec ix86_attribute_table[] =
30127 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30128 /* Stdcall attribute says callee is responsible for popping arguments
30129 if they are not variable. */
30130 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30131 /* Fastcall attribute says callee is responsible for popping arguments
30132 if they are not variable. */
30133 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30134 /* Cdecl attribute says the callee is a normal C declaration */
30135 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30136 /* Regparm attribute specifies how many integer arguments are to be
30137 passed in registers. */
30138 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30139 /* Sseregparm attribute says we are using x86_64 calling conventions
30140 for FP arguments. */
30141 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30142 /* force_align_arg_pointer says this function realigns the stack at entry. */
30143 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30144 false, true, true, ix86_handle_cconv_attribute },
30145 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30146 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30147 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30148 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30149 #endif
30150 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30151 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30152 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30153 SUBTARGET_ATTRIBUTE_TABLE,
30154 #endif
30155 /* ms_abi and sysv_abi calling convention function attributes. */
30156 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30157 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30158 /* End element. */
30159 { NULL, 0, 0, false, false, false, NULL }
30162 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30163 static int
30164 x86_builtin_vectorization_cost (bool runtime_test)
30166 /* If the branch of the runtime test is taken - i.e. - the vectorized
30167 version is skipped - this incurs a misprediction cost (because the
30168 vectorized version is expected to be the fall-through). So we subtract
30169 the latency of a mispredicted branch from the costs that are incured
30170 when the vectorized version is executed.
30172 TODO: The values in individual target tables have to be tuned or new
30173 fields may be needed. For eg. on K8, the default branch path is the
30174 not-taken path. If the taken path is predicted correctly, the minimum
30175 penalty of going down the taken-path is 1 cycle. If the taken-path is
30176 not predicted correctly, then the minimum penalty is 10 cycles. */
30178 if (runtime_test)
30180 return (-(ix86_cost->cond_taken_branch_cost));
30182 else
30183 return 0;
30186 /* This function returns the calling abi specific va_list type node.
30187 It returns the FNDECL specific va_list type. */
30189 tree
30190 ix86_fn_abi_va_list (tree fndecl)
30192 if (!TARGET_64BIT)
30193 return va_list_type_node;
30194 gcc_assert (fndecl != NULL_TREE);
30196 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30197 return ms_va_list_type_node;
30198 else
30199 return sysv_va_list_type_node;
30202 /* Returns the canonical va_list type specified by TYPE. If there
30203 is no valid TYPE provided, it return NULL_TREE. */
30205 tree
30206 ix86_canonical_va_list_type (tree type)
30208 tree wtype, htype;
30210 /* Resolve references and pointers to va_list type. */
30211 if (INDIRECT_REF_P (type))
30212 type = TREE_TYPE (type);
30213 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30214 type = TREE_TYPE (type);
30216 if (TARGET_64BIT)
30218 wtype = va_list_type_node;
30219 gcc_assert (wtype != NULL_TREE);
30220 htype = type;
30221 if (TREE_CODE (wtype) == ARRAY_TYPE)
30223 /* If va_list is an array type, the argument may have decayed
30224 to a pointer type, e.g. by being passed to another function.
30225 In that case, unwrap both types so that we can compare the
30226 underlying records. */
30227 if (TREE_CODE (htype) == ARRAY_TYPE
30228 || POINTER_TYPE_P (htype))
30230 wtype = TREE_TYPE (wtype);
30231 htype = TREE_TYPE (htype);
30234 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30235 return va_list_type_node;
30236 wtype = sysv_va_list_type_node;
30237 gcc_assert (wtype != NULL_TREE);
30238 htype = type;
30239 if (TREE_CODE (wtype) == ARRAY_TYPE)
30241 /* If va_list is an array type, the argument may have decayed
30242 to a pointer type, e.g. by being passed to another function.
30243 In that case, unwrap both types so that we can compare the
30244 underlying records. */
30245 if (TREE_CODE (htype) == ARRAY_TYPE
30246 || POINTER_TYPE_P (htype))
30248 wtype = TREE_TYPE (wtype);
30249 htype = TREE_TYPE (htype);
30252 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30253 return sysv_va_list_type_node;
30254 wtype = ms_va_list_type_node;
30255 gcc_assert (wtype != NULL_TREE);
30256 htype = type;
30257 if (TREE_CODE (wtype) == ARRAY_TYPE)
30259 /* If va_list is an array type, the argument may have decayed
30260 to a pointer type, e.g. by being passed to another function.
30261 In that case, unwrap both types so that we can compare the
30262 underlying records. */
30263 if (TREE_CODE (htype) == ARRAY_TYPE
30264 || POINTER_TYPE_P (htype))
30266 wtype = TREE_TYPE (wtype);
30267 htype = TREE_TYPE (htype);
30270 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30271 return ms_va_list_type_node;
30272 return NULL_TREE;
30274 return std_canonical_va_list_type (type);
30277 /* Iterate through the target-specific builtin types for va_list.
30278 IDX denotes the iterator, *PTREE is set to the result type of
30279 the va_list builtin, and *PNAME to its internal type.
30280 Returns zero if there is no element for this index, otherwise
30281 IDX should be increased upon the next call.
30282 Note, do not iterate a base builtin's name like __builtin_va_list.
30283 Used from c_common_nodes_and_builtins. */
30286 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30288 if (!TARGET_64BIT)
30289 return 0;
30290 switch (idx) {
30291 case 0:
30292 *ptree = ms_va_list_type_node;
30293 *pname = "__builtin_ms_va_list";
30294 break;
30295 case 1:
30296 *ptree = sysv_va_list_type_node;
30297 *pname = "__builtin_sysv_va_list";
30298 break;
30299 default:
30300 return 0;
30302 return 1;
30305 /* Initialize the GCC target structure. */
30306 #undef TARGET_RETURN_IN_MEMORY
30307 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30309 #undef TARGET_LEGITIMIZE_ADDRESS
30310 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30312 #undef TARGET_ATTRIBUTE_TABLE
30313 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30314 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30315 # undef TARGET_MERGE_DECL_ATTRIBUTES
30316 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30317 #endif
30319 #undef TARGET_COMP_TYPE_ATTRIBUTES
30320 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30322 #undef TARGET_INIT_BUILTINS
30323 #define TARGET_INIT_BUILTINS ix86_init_builtins
30324 #undef TARGET_EXPAND_BUILTIN
30325 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30327 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30328 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30329 ix86_builtin_vectorized_function
30331 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30332 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30334 #undef TARGET_BUILTIN_RECIPROCAL
30335 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30337 #undef TARGET_ASM_FUNCTION_EPILOGUE
30338 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30340 #undef TARGET_ENCODE_SECTION_INFO
30341 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30342 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30343 #else
30344 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30345 #endif
30347 #undef TARGET_ASM_OPEN_PAREN
30348 #define TARGET_ASM_OPEN_PAREN ""
30349 #undef TARGET_ASM_CLOSE_PAREN
30350 #define TARGET_ASM_CLOSE_PAREN ""
30352 #undef TARGET_ASM_ALIGNED_HI_OP
30353 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30354 #undef TARGET_ASM_ALIGNED_SI_OP
30355 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30356 #ifdef ASM_QUAD
30357 #undef TARGET_ASM_ALIGNED_DI_OP
30358 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30359 #endif
30361 #undef TARGET_ASM_UNALIGNED_HI_OP
30362 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30363 #undef TARGET_ASM_UNALIGNED_SI_OP
30364 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30365 #undef TARGET_ASM_UNALIGNED_DI_OP
30366 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30368 #undef TARGET_SCHED_ADJUST_COST
30369 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30370 #undef TARGET_SCHED_ISSUE_RATE
30371 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30372 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30373 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30374 ia32_multipass_dfa_lookahead
30376 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30377 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30379 #ifdef HAVE_AS_TLS
30380 #undef TARGET_HAVE_TLS
30381 #define TARGET_HAVE_TLS true
30382 #endif
30383 #undef TARGET_CANNOT_FORCE_CONST_MEM
30384 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30385 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30386 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30388 #undef TARGET_DELEGITIMIZE_ADDRESS
30389 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30391 #undef TARGET_MS_BITFIELD_LAYOUT_P
30392 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30394 #if TARGET_MACHO
30395 #undef TARGET_BINDS_LOCAL_P
30396 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30397 #endif
30398 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30399 #undef TARGET_BINDS_LOCAL_P
30400 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30401 #endif
30403 #undef TARGET_ASM_OUTPUT_MI_THUNK
30404 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30405 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30406 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30408 #undef TARGET_ASM_FILE_START
30409 #define TARGET_ASM_FILE_START x86_file_start
30411 #undef TARGET_DEFAULT_TARGET_FLAGS
30412 #define TARGET_DEFAULT_TARGET_FLAGS \
30413 (TARGET_DEFAULT \
30414 | TARGET_SUBTARGET_DEFAULT \
30415 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30417 #undef TARGET_HANDLE_OPTION
30418 #define TARGET_HANDLE_OPTION ix86_handle_option
30420 #undef TARGET_RTX_COSTS
30421 #define TARGET_RTX_COSTS ix86_rtx_costs
30422 #undef TARGET_ADDRESS_COST
30423 #define TARGET_ADDRESS_COST ix86_address_cost
30425 #undef TARGET_FIXED_CONDITION_CODE_REGS
30426 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30427 #undef TARGET_CC_MODES_COMPATIBLE
30428 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30430 #undef TARGET_MACHINE_DEPENDENT_REORG
30431 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30433 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30434 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30436 #undef TARGET_BUILD_BUILTIN_VA_LIST
30437 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30439 #undef TARGET_FN_ABI_VA_LIST
30440 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30442 #undef TARGET_CANONICAL_VA_LIST_TYPE
30443 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30445 #undef TARGET_EXPAND_BUILTIN_VA_START
30446 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30448 #undef TARGET_MD_ASM_CLOBBERS
30449 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30451 #undef TARGET_PROMOTE_PROTOTYPES
30452 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30453 #undef TARGET_STRUCT_VALUE_RTX
30454 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30455 #undef TARGET_SETUP_INCOMING_VARARGS
30456 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30457 #undef TARGET_MUST_PASS_IN_STACK
30458 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30459 #undef TARGET_PASS_BY_REFERENCE
30460 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30461 #undef TARGET_INTERNAL_ARG_POINTER
30462 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30463 #undef TARGET_UPDATE_STACK_BOUNDARY
30464 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30465 #undef TARGET_GET_DRAP_RTX
30466 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30467 #undef TARGET_STRICT_ARGUMENT_NAMING
30468 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30470 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30471 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30473 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30474 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30476 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30477 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30479 #undef TARGET_C_MODE_FOR_SUFFIX
30480 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30482 #ifdef HAVE_AS_TLS
30483 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30484 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30485 #endif
30487 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30488 #undef TARGET_INSERT_ATTRIBUTES
30489 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30490 #endif
30492 #undef TARGET_MANGLE_TYPE
30493 #define TARGET_MANGLE_TYPE ix86_mangle_type
30495 #undef TARGET_STACK_PROTECT_FAIL
30496 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30498 #undef TARGET_FUNCTION_VALUE
30499 #define TARGET_FUNCTION_VALUE ix86_function_value
30501 #undef TARGET_SECONDARY_RELOAD
30502 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30504 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30505 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30507 #undef TARGET_SET_CURRENT_FUNCTION
30508 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30510 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30511 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30513 #undef TARGET_OPTION_SAVE
30514 #define TARGET_OPTION_SAVE ix86_function_specific_save
30516 #undef TARGET_OPTION_RESTORE
30517 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30519 #undef TARGET_OPTION_PRINT
30520 #define TARGET_OPTION_PRINT ix86_function_specific_print
30522 #undef TARGET_OPTION_CAN_INLINE_P
30523 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30525 #undef TARGET_EXPAND_TO_RTL_HOOK
30526 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30528 #undef TARGET_LEGITIMATE_ADDRESS_P
30529 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30531 struct gcc_target targetm = TARGET_INITIALIZER;
30533 #include "gt-i386.h"