Bring in all split-stack work done over on gccgo branch.
[official-gcc.git] / gcc / config / i386 / i386.c
blob1da1128b7d63ae554a80cd6a007a38f7cc67b60b
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55 #include "cselib.h"
56 #include "debug.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
63 #endif
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
71 : 4)
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 const
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
98 0, /* "large" insn */
99 2, /* MOVE_RATIO */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
125 2, /* Branch cost */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
150 static const
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
170 3, /* MOVE_RATIO */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
196 1, /* Branch cost */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
220 static const
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
240 3, /* MOVE_RATIO */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
268 1, /* Branch cost */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static const
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
312 6, /* MOVE_RATIO */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
338 2, /* Branch cost */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
362 static const
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
439 static const
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 4, /* MOVE_RATIO */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
486 1, /* Branch cost */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
510 static const
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
530 4, /* MOVE_RATIO */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
559 1, /* Branch cost */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
583 static const
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
603 9, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
629 5, /* Branch cost */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
656 static const
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
705 time). */
706 100, /* number of parallel prefetches */
707 3, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
754 9, /* MOVE_RATIO */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
776 /* On K8:
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
779 On AMDFAM10:
780 MOVD reg64, xmmreg Double FADD 3
781 1/1 1/1
782 MOVD reg32, xmmreg Double FADD 3
783 1/1 1/1 */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 2, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
909 static const
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
929 6, /* MOVE_RATIO */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
955 2, /* Branch cost */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
965 {-1, libcall}}},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
980 static const
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1037 {-1, libcall}}},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1053 static const
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1126 static const
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1200 static const
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1277 Athlon and K8. */
1278 static const
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1394 m_486 | m_PENT,
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1410 ~m_386,
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1429 m_PPRO,
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1441 m_K6,
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1447 m_PENT4,
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1450 m_PPRO,
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1453 ~m_PENT,
1455 /* X86_TUNE_READ_MODIFY */
1456 ~(m_PENT | m_PPRO),
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1475 ~m_PPRO,
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1481 m_PPRO,
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1489 m_PENT,
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1523 m_BDVER1,
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1526 m_BDVER1,
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1532 m_ATHLON_K8,
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1535 m_AMD_MULTIPLE,
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1550 ~m_486,
1552 /* X86_TUNE_USE_FFREEP */
1553 m_AMD_MULTIPLE,
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1564 | m_GENERIC,
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1568 | m_GENERIC,
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1580 m_ATOM,
1582 /* X86_TUNE_EXT_80387_CONSTANTS */
1583 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1584 | m_CORE2 | m_GENERIC,
1586 /* X86_TUNE_SHORTEN_X87_SSE */
1587 ~m_K8,
1589 /* X86_TUNE_AVOID_VECTOR_DECODE */
1590 m_K8 | m_GENERIC64,
1592 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1593 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1594 ~(m_386 | m_486),
1596 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1597 vector path on AMD machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1600 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1601 machines. */
1602 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1604 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 than a MOV. */
1606 m_PENT,
1608 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1609 but one byte longer. */
1610 m_PENT,
1612 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1613 operand that cannot be represented using a modRM byte. The XOR
1614 replacement is long decoded, so this split helps here as well. */
1615 m_K6,
1617 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1618 from FP to FP. */
1619 m_AMDFAM10 | m_GENERIC,
1621 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1622 from integer to FP. */
1623 m_AMDFAM10,
1625 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1626 with a subsequent conditional jump instruction into a single
1627 compare-and-branch uop. */
1628 m_CORE2 | m_BDVER1,
1630 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1631 will impact LEA instruction selection. */
1632 m_ATOM,
1634 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1635 instructions. */
1636 ~m_ATOM,
1639 /* Feature tests against the various architecture variations. */
1640 unsigned char ix86_arch_features[X86_ARCH_LAST];
1642 /* Feature tests against the various architecture variations, used to create
1643 ix86_arch_features based on the processor mask. */
1644 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1645 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1646 ~(m_386 | m_486 | m_PENT | m_K6),
1648 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1649 ~m_386,
1651 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1652 ~(m_386 | m_486),
1654 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1655 ~m_386,
1657 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1658 ~m_386,
1661 static const unsigned int x86_accumulate_outgoing_args
1662 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1663 | m_GENERIC;
1665 static const unsigned int x86_arch_always_fancy_math_387
1666 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1667 | m_NOCONA | m_CORE2 | m_GENERIC;
1669 static enum stringop_alg stringop_alg = no_stringop;
1671 /* In case the average insn count for single function invocation is
1672 lower than this constant, emit fast (but longer) prologue and
1673 epilogue code. */
1674 #define FAST_PROLOGUE_INSN_COUNT 20
1676 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1677 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1678 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1679 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1681 /* Array of the smallest class containing reg number REGNO, indexed by
1682 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1684 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1686 /* ax, dx, cx, bx */
1687 AREG, DREG, CREG, BREG,
1688 /* si, di, bp, sp */
1689 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1690 /* FP registers */
1691 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1692 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1693 /* arg pointer */
1694 NON_Q_REGS,
1695 /* flags, fpsr, fpcr, frame */
1696 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1697 /* SSE registers */
1698 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1699 SSE_REGS, SSE_REGS,
1700 /* MMX registers */
1701 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1702 MMX_REGS, MMX_REGS,
1703 /* REX registers */
1704 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1705 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1706 /* SSE REX registers */
1707 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1708 SSE_REGS, SSE_REGS,
1711 /* The "default" register map used in 32bit mode. */
1713 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1715 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1716 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1717 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1718 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1719 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1724 /* The "default" register map used in 64bit mode. */
1726 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1728 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1729 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1730 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1731 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1732 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1733 8,9,10,11,12,13,14,15, /* extended integer registers */
1734 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1737 /* Define the register numbers to be used in Dwarf debugging information.
1738 The SVR4 reference port C compiler uses the following register numbers
1739 in its Dwarf output code:
1740 0 for %eax (gcc regno = 0)
1741 1 for %ecx (gcc regno = 2)
1742 2 for %edx (gcc regno = 1)
1743 3 for %ebx (gcc regno = 3)
1744 4 for %esp (gcc regno = 7)
1745 5 for %ebp (gcc regno = 6)
1746 6 for %esi (gcc regno = 4)
1747 7 for %edi (gcc regno = 5)
1748 The following three DWARF register numbers are never generated by
1749 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1750 believes these numbers have these meanings.
1751 8 for %eip (no gcc equivalent)
1752 9 for %eflags (gcc regno = 17)
1753 10 for %trapno (no gcc equivalent)
1754 It is not at all clear how we should number the FP stack registers
1755 for the x86 architecture. If the version of SDB on x86/svr4 were
1756 a bit less brain dead with respect to floating-point then we would
1757 have a precedent to follow with respect to DWARF register numbers
1758 for x86 FP registers, but the SDB on x86/svr4 is so completely
1759 broken with respect to FP registers that it is hardly worth thinking
1760 of it as something to strive for compatibility with.
1761 The version of x86/svr4 SDB I have at the moment does (partially)
1762 seem to believe that DWARF register number 11 is associated with
1763 the x86 register %st(0), but that's about all. Higher DWARF
1764 register numbers don't seem to be associated with anything in
1765 particular, and even for DWARF regno 11, SDB only seems to under-
1766 stand that it should say that a variable lives in %st(0) (when
1767 asked via an `=' command) if we said it was in DWARF regno 11,
1768 but SDB still prints garbage when asked for the value of the
1769 variable in question (via a `/' command).
1770 (Also note that the labels SDB prints for various FP stack regs
1771 when doing an `x' command are all wrong.)
1772 Note that these problems generally don't affect the native SVR4
1773 C compiler because it doesn't allow the use of -O with -g and
1774 because when it is *not* optimizing, it allocates a memory
1775 location for each floating-point variable, and the memory
1776 location is what gets described in the DWARF AT_location
1777 attribute for the variable in question.
1778 Regardless of the severe mental illness of the x86/svr4 SDB, we
1779 do something sensible here and we use the following DWARF
1780 register numbers. Note that these are all stack-top-relative
1781 numbers.
1782 11 for %st(0) (gcc regno = 8)
1783 12 for %st(1) (gcc regno = 9)
1784 13 for %st(2) (gcc regno = 10)
1785 14 for %st(3) (gcc regno = 11)
1786 15 for %st(4) (gcc regno = 12)
1787 16 for %st(5) (gcc regno = 13)
1788 17 for %st(6) (gcc regno = 14)
1789 18 for %st(7) (gcc regno = 15)
1791 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1793 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1794 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1795 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1796 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1797 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1798 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1799 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1802 /* Define parameter passing and return registers. */
1804 static int const x86_64_int_parameter_registers[6] =
1806 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1809 static int const x86_64_ms_abi_int_parameter_registers[4] =
1811 CX_REG, DX_REG, R8_REG, R9_REG
1814 static int const x86_64_int_return_registers[4] =
1816 AX_REG, DX_REG, DI_REG, SI_REG
1819 /* Define the structure for the machine field in struct function. */
1821 struct GTY(()) stack_local_entry {
1822 unsigned short mode;
1823 unsigned short n;
1824 rtx rtl;
1825 struct stack_local_entry *next;
1828 /* Structure describing stack frame layout.
1829 Stack grows downward:
1831 [arguments]
1832 <- ARG_POINTER
1833 saved pc
1835 saved static chain if ix86_static_chain_on_stack
1837 saved frame pointer if frame_pointer_needed
1838 <- HARD_FRAME_POINTER
1839 [saved regs]
1840 <- regs_save_offset
1841 [padding0]
1843 [saved SSE regs]
1844 <- sse_regs_save_offset
1845 [padding1] |
1846 | <- FRAME_POINTER
1847 [va_arg registers] |
1849 [frame] |
1851 [padding2] | = to_allocate
1852 <- STACK_POINTER
1854 struct ix86_frame
1856 int nsseregs;
1857 int nregs;
1858 int va_arg_size;
1859 int red_zone_size;
1860 int outgoing_arguments_size;
1861 HOST_WIDE_INT frame;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1867 HOST_WIDE_INT reg_save_offset;
1868 HOST_WIDE_INT sse_reg_save_offset;
1870 /* When save_regs_using_mov is set, emit prologue using
1871 move instead of push instructions. */
1872 bool save_regs_using_mov;
1875 /* Code model option. */
1876 enum cmodel ix86_cmodel;
1877 /* Asm dialect. */
1878 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1879 /* TLS dialects. */
1880 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1882 /* Which unit we are generating floating point math for. */
1883 enum fpmath_unit ix86_fpmath;
1885 /* Which cpu are we scheduling for. */
1886 enum attr_cpu ix86_schedule;
1888 /* Which cpu are we optimizing for. */
1889 enum processor_type ix86_tune;
1891 /* Which instruction set architecture to use. */
1892 enum processor_type ix86_arch;
1894 /* true if sse prefetch instruction is not NOOP. */
1895 int x86_prefetch_sse;
1897 /* ix86_regparm_string as a number */
1898 static int ix86_regparm;
1900 /* -mstackrealign option */
1901 extern int ix86_force_align_arg_pointer;
1902 static const char ix86_force_align_arg_pointer_string[]
1903 = "force_align_arg_pointer";
1905 static rtx (*ix86_gen_leave) (void);
1906 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1908 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1909 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1910 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1912 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1913 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1914 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1916 /* Preferred alignment for stack boundary in bits. */
1917 unsigned int ix86_preferred_stack_boundary;
1919 /* Alignment for incoming stack boundary in bits specified at
1920 command line. */
1921 static unsigned int ix86_user_incoming_stack_boundary;
1923 /* Default alignment for incoming stack boundary in bits. */
1924 static unsigned int ix86_default_incoming_stack_boundary;
1926 /* Alignment for incoming stack boundary in bits. */
1927 unsigned int ix86_incoming_stack_boundary;
1929 /* The abi used by target. */
1930 enum calling_abi ix86_abi;
1932 /* Values 1-5: see jump.c */
1933 int ix86_branch_cost;
1935 /* Calling abi specific va_list type nodes. */
1936 static GTY(()) tree sysv_va_list_type_node;
1937 static GTY(()) tree ms_va_list_type_node;
1939 /* Variables which are this size or smaller are put in the data/bss
1940 or ldata/lbss sections. */
1942 int ix86_section_threshold = 65536;
1944 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1945 char internal_label_prefix[16];
1946 int internal_label_prefix_len;
1948 /* Fence to use after loop using movnt. */
1949 tree x86_mfence;
1951 /* Register class used for passing given 64bit part of the argument.
1952 These represent classes as documented by the PS ABI, with the exception
1953 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1954 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1956 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1957 whenever possible (upper half does contain padding). */
1958 enum x86_64_reg_class
1960 X86_64_NO_CLASS,
1961 X86_64_INTEGER_CLASS,
1962 X86_64_INTEGERSI_CLASS,
1963 X86_64_SSE_CLASS,
1964 X86_64_SSESF_CLASS,
1965 X86_64_SSEDF_CLASS,
1966 X86_64_SSEUP_CLASS,
1967 X86_64_X87_CLASS,
1968 X86_64_X87UP_CLASS,
1969 X86_64_COMPLEX_X87_CLASS,
1970 X86_64_MEMORY_CLASS
1973 #define MAX_CLASSES 4
1975 /* Table of constants used by fldpi, fldln2, etc.... */
1976 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1977 static bool ext_80387_constants_init = 0;
1980 static struct machine_function * ix86_init_machine_status (void);
1981 static rtx ix86_function_value (const_tree, const_tree, bool);
1982 static bool ix86_function_value_regno_p (const unsigned int);
1983 static rtx ix86_static_chain (const_tree, bool);
1984 static int ix86_function_regparm (const_tree, const_tree);
1985 static void ix86_compute_frame_layout (struct ix86_frame *);
1986 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1987 rtx, rtx, int);
1988 static void ix86_add_new_builtins (int);
1989 static rtx ix86_expand_vec_perm_builtin (tree);
1990 static tree ix86_canonical_va_list_type (tree);
1991 static void predict_jump (int);
1992 static unsigned int split_stack_prologue_scratch_regno (void);
1993 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
1995 enum ix86_function_specific_strings
1997 IX86_FUNCTION_SPECIFIC_ARCH,
1998 IX86_FUNCTION_SPECIFIC_TUNE,
1999 IX86_FUNCTION_SPECIFIC_FPMATH,
2000 IX86_FUNCTION_SPECIFIC_MAX
2003 static char *ix86_target_string (int, int, const char *, const char *,
2004 const char *, bool);
2005 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2006 static void ix86_function_specific_save (struct cl_target_option *);
2007 static void ix86_function_specific_restore (struct cl_target_option *);
2008 static void ix86_function_specific_print (FILE *, int,
2009 struct cl_target_option *);
2010 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2011 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2012 static bool ix86_can_inline_p (tree, tree);
2013 static void ix86_set_current_function (tree);
2014 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2016 static enum calling_abi ix86_function_abi (const_tree);
2019 #ifndef SUBTARGET32_DEFAULT_CPU
2020 #define SUBTARGET32_DEFAULT_CPU "i386"
2021 #endif
2023 /* The svr4 ABI for the i386 says that records and unions are returned
2024 in memory. */
2025 #ifndef DEFAULT_PCC_STRUCT_RETURN
2026 #define DEFAULT_PCC_STRUCT_RETURN 1
2027 #endif
2029 /* Whether -mtune= or -march= were specified */
2030 static int ix86_tune_defaulted;
2031 static int ix86_arch_specified;
2033 /* A mask of ix86_isa_flags that includes bit X if X
2034 was set or cleared on the command line. */
2035 static int ix86_isa_flags_explicit;
2037 /* Define a set of ISAs which are available when a given ISA is
2038 enabled. MMX and SSE ISAs are handled separately. */
2040 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2041 #define OPTION_MASK_ISA_3DNOW_SET \
2042 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2044 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2045 #define OPTION_MASK_ISA_SSE2_SET \
2046 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2047 #define OPTION_MASK_ISA_SSE3_SET \
2048 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2049 #define OPTION_MASK_ISA_SSSE3_SET \
2050 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2051 #define OPTION_MASK_ISA_SSE4_1_SET \
2052 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2053 #define OPTION_MASK_ISA_SSE4_2_SET \
2054 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2055 #define OPTION_MASK_ISA_AVX_SET \
2056 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2057 #define OPTION_MASK_ISA_FMA_SET \
2058 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2060 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2061 as -msse4.2. */
2062 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2064 #define OPTION_MASK_ISA_SSE4A_SET \
2065 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2066 #define OPTION_MASK_ISA_FMA4_SET \
2067 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2068 | OPTION_MASK_ISA_AVX_SET)
2069 #define OPTION_MASK_ISA_XOP_SET \
2070 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2071 #define OPTION_MASK_ISA_LWP_SET \
2072 OPTION_MASK_ISA_LWP
2074 /* AES and PCLMUL need SSE2 because they use xmm registers */
2075 #define OPTION_MASK_ISA_AES_SET \
2076 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2077 #define OPTION_MASK_ISA_PCLMUL_SET \
2078 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2080 #define OPTION_MASK_ISA_ABM_SET \
2081 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2083 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2084 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2085 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2086 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2087 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2089 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2090 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2091 #define OPTION_MASK_ISA_F16C_SET \
2092 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2094 /* Define a set of ISAs which aren't available when a given ISA is
2095 disabled. MMX and SSE ISAs are handled separately. */
2097 #define OPTION_MASK_ISA_MMX_UNSET \
2098 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2099 #define OPTION_MASK_ISA_3DNOW_UNSET \
2100 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2101 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2103 #define OPTION_MASK_ISA_SSE_UNSET \
2104 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2105 #define OPTION_MASK_ISA_SSE2_UNSET \
2106 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2107 #define OPTION_MASK_ISA_SSE3_UNSET \
2108 (OPTION_MASK_ISA_SSE3 \
2109 | OPTION_MASK_ISA_SSSE3_UNSET \
2110 | OPTION_MASK_ISA_SSE4A_UNSET )
2111 #define OPTION_MASK_ISA_SSSE3_UNSET \
2112 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2113 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2114 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2115 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2116 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2117 #define OPTION_MASK_ISA_AVX_UNSET \
2118 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2119 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2120 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2122 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2123 as -mno-sse4.1. */
2124 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2126 #define OPTION_MASK_ISA_SSE4A_UNSET \
2127 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2129 #define OPTION_MASK_ISA_FMA4_UNSET \
2130 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2131 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2132 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2134 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2135 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2136 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2137 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2138 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2139 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2140 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2141 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2143 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2144 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2145 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2147 /* Vectorization library interface and handlers. */
2148 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2151 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2153 /* Processor target table, indexed by processor number */
2154 struct ptt
2156 const struct processor_costs *cost; /* Processor costs */
2157 const int align_loop; /* Default alignments. */
2158 const int align_loop_max_skip;
2159 const int align_jump;
2160 const int align_jump_max_skip;
2161 const int align_func;
2164 static const struct ptt processor_target_table[PROCESSOR_max] =
2166 {&i386_cost, 4, 3, 4, 3, 4},
2167 {&i486_cost, 16, 15, 16, 15, 16},
2168 {&pentium_cost, 16, 7, 16, 7, 16},
2169 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2170 {&geode_cost, 0, 0, 0, 0, 0},
2171 {&k6_cost, 32, 7, 32, 7, 32},
2172 {&athlon_cost, 16, 7, 16, 7, 16},
2173 {&pentium4_cost, 0, 0, 0, 0, 0},
2174 {&k8_cost, 16, 7, 16, 7, 16},
2175 {&nocona_cost, 0, 0, 0, 0, 0},
2176 {&core2_cost, 16, 10, 16, 10, 16},
2177 {&generic32_cost, 16, 7, 16, 7, 16},
2178 {&generic64_cost, 16, 10, 16, 10, 16},
2179 {&amdfam10_cost, 32, 24, 32, 7, 32},
2180 {&bdver1_cost, 32, 24, 32, 7, 32},
2181 {&atom_cost, 16, 7, 16, 7, 16}
2184 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2186 "generic",
2187 "i386",
2188 "i486",
2189 "pentium",
2190 "pentium-mmx",
2191 "pentiumpro",
2192 "pentium2",
2193 "pentium3",
2194 "pentium4",
2195 "pentium-m",
2196 "prescott",
2197 "nocona",
2198 "core2",
2199 "atom",
2200 "geode",
2201 "k6",
2202 "k6-2",
2203 "k6-3",
2204 "athlon",
2205 "athlon-4",
2206 "k8",
2207 "amdfam10",
2208 "bdver1"
2211 /* Return true if a red-zone is in use. */
2213 static inline bool
2214 ix86_using_red_zone (void)
2216 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2219 /* Implement TARGET_HANDLE_OPTION. */
2221 static bool
2222 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2224 switch (code)
2226 case OPT_mmmx:
2227 if (value)
2229 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2230 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2232 else
2234 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2235 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2237 return true;
2239 case OPT_m3dnow:
2240 if (value)
2242 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2243 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2245 else
2247 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2248 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2250 return true;
2252 case OPT_m3dnowa:
2253 return false;
2255 case OPT_msse:
2256 if (value)
2258 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2259 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2261 else
2263 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2264 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2266 return true;
2268 case OPT_msse2:
2269 if (value)
2271 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2272 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2274 else
2276 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2277 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2279 return true;
2281 case OPT_msse3:
2282 if (value)
2284 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2285 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2287 else
2289 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2290 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2292 return true;
2294 case OPT_mssse3:
2295 if (value)
2297 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2298 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2300 else
2302 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2303 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2305 return true;
2307 case OPT_msse4_1:
2308 if (value)
2310 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2311 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2313 else
2315 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2316 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2318 return true;
2320 case OPT_msse4_2:
2321 if (value)
2323 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2324 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2326 else
2328 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2329 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2331 return true;
2333 case OPT_mavx:
2334 if (value)
2336 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2337 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2339 else
2341 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2342 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2344 return true;
2346 case OPT_mfma:
2347 if (value)
2349 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2350 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2352 else
2354 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2357 return true;
2359 case OPT_msse4:
2360 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2361 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2362 return true;
2364 case OPT_mno_sse4:
2365 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2366 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2367 return true;
2369 case OPT_msse4a:
2370 if (value)
2372 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2373 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2375 else
2377 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2378 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2380 return true;
2382 case OPT_mfma4:
2383 if (value)
2385 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2386 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2388 else
2390 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2391 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2393 return true;
2395 case OPT_mxop:
2396 if (value)
2398 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2399 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2401 else
2403 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2404 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2406 return true;
2408 case OPT_mlwp:
2409 if (value)
2411 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2412 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2414 else
2416 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2417 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2419 return true;
2421 case OPT_mabm:
2422 if (value)
2424 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2425 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2427 else
2429 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2430 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2432 return true;
2434 case OPT_mpopcnt:
2435 if (value)
2437 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2438 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2440 else
2442 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2443 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2445 return true;
2447 case OPT_msahf:
2448 if (value)
2450 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2451 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2453 else
2455 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2456 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2458 return true;
2460 case OPT_mcx16:
2461 if (value)
2463 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2464 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2466 else
2468 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2469 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2471 return true;
2473 case OPT_mmovbe:
2474 if (value)
2476 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2477 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2479 else
2481 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2482 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2484 return true;
2486 case OPT_mcrc32:
2487 if (value)
2489 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2490 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2492 else
2494 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2495 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2497 return true;
2499 case OPT_maes:
2500 if (value)
2502 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2503 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2505 else
2507 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2508 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2510 return true;
2512 case OPT_mpclmul:
2513 if (value)
2515 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2516 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2518 else
2520 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2521 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2523 return true;
2525 case OPT_mfsgsbase:
2526 if (value)
2528 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2529 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2531 else
2533 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2534 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2536 return true;
2538 case OPT_mrdrnd:
2539 if (value)
2541 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2542 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2544 else
2546 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2547 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2549 return true;
2551 case OPT_mf16c:
2552 if (value)
2554 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2555 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2557 else
2559 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2560 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2562 return true;
2564 default:
2565 return true;
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2572 static char *
2573 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2574 const char *fpmath, bool add_nl_p)
2576 struct ix86_target_opts
2578 const char *option; /* option string */
2579 int mask; /* isa mask options */
2582 /* This table is ordered so that options like -msse4.2 that imply
2583 preceding options while match those first. */
2584 static struct ix86_target_opts isa_opts[] =
2586 { "-m64", OPTION_MASK_ISA_64BIT },
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2592 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2593 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2594 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2595 { "-msse3", OPTION_MASK_ISA_SSE3 },
2596 { "-msse2", OPTION_MASK_ISA_SSE2 },
2597 { "-msse", OPTION_MASK_ISA_SSE },
2598 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2599 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2600 { "-mmmx", OPTION_MASK_ISA_MMX },
2601 { "-mabm", OPTION_MASK_ISA_ABM },
2602 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2603 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2604 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2605 { "-maes", OPTION_MASK_ISA_AES },
2606 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2607 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2608 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2609 { "-mf16c", OPTION_MASK_ISA_F16C },
2612 /* Flag options. */
2613 static struct ix86_target_opts flag_opts[] =
2615 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2616 { "-m80387", MASK_80387 },
2617 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2618 { "-malign-double", MASK_ALIGN_DOUBLE },
2619 { "-mcld", MASK_CLD },
2620 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2621 { "-mieee-fp", MASK_IEEE_FP },
2622 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2623 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2624 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2625 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2626 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2627 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2628 { "-mno-red-zone", MASK_NO_RED_ZONE },
2629 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2630 { "-mrecip", MASK_RECIP },
2631 { "-mrtd", MASK_RTD },
2632 { "-msseregparm", MASK_SSEREGPARM },
2633 { "-mstack-arg-probe", MASK_STACK_PROBE },
2634 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2635 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2638 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2640 char isa_other[40];
2641 char target_other[40];
2642 unsigned num = 0;
2643 unsigned i, j;
2644 char *ret;
2645 char *ptr;
2646 size_t len;
2647 size_t line_len;
2648 size_t sep_len;
2650 memset (opts, '\0', sizeof (opts));
2652 /* Add -march= option. */
2653 if (arch)
2655 opts[num][0] = "-march=";
2656 opts[num++][1] = arch;
2659 /* Add -mtune= option. */
2660 if (tune)
2662 opts[num][0] = "-mtune=";
2663 opts[num++][1] = tune;
2666 /* Pick out the options in isa options. */
2667 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2669 if ((isa & isa_opts[i].mask) != 0)
2671 opts[num++][0] = isa_opts[i].option;
2672 isa &= ~ isa_opts[i].mask;
2676 if (isa && add_nl_p)
2678 opts[num++][0] = isa_other;
2679 sprintf (isa_other, "(other isa: %#x)", isa);
2682 /* Add flag options. */
2683 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2685 if ((flags & flag_opts[i].mask) != 0)
2687 opts[num++][0] = flag_opts[i].option;
2688 flags &= ~ flag_opts[i].mask;
2692 if (flags && add_nl_p)
2694 opts[num++][0] = target_other;
2695 sprintf (target_other, "(other flags: %#x)", flags);
2698 /* Add -fpmath= option. */
2699 if (fpmath)
2701 opts[num][0] = "-mfpmath=";
2702 opts[num++][1] = fpmath;
2705 /* Any options? */
2706 if (num == 0)
2707 return NULL;
2709 gcc_assert (num < ARRAY_SIZE (opts));
2711 /* Size the string. */
2712 len = 0;
2713 sep_len = (add_nl_p) ? 3 : 1;
2714 for (i = 0; i < num; i++)
2716 len += sep_len;
2717 for (j = 0; j < 2; j++)
2718 if (opts[i][j])
2719 len += strlen (opts[i][j]);
2722 /* Build the string. */
2723 ret = ptr = (char *) xmalloc (len);
2724 line_len = 0;
2726 for (i = 0; i < num; i++)
2728 size_t len2[2];
2730 for (j = 0; j < 2; j++)
2731 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2733 if (i != 0)
2735 *ptr++ = ' ';
2736 line_len++;
2738 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2740 *ptr++ = '\\';
2741 *ptr++ = '\n';
2742 line_len = 0;
2746 for (j = 0; j < 2; j++)
2747 if (opts[i][j])
2749 memcpy (ptr, opts[i][j], len2[j]);
2750 ptr += len2[j];
2751 line_len += len2[j];
2755 *ptr = '\0';
2756 gcc_assert (ret + len >= ptr);
2758 return ret;
2761 /* Return TRUE if software prefetching is beneficial for the
2762 given CPU. */
2764 static bool
2765 software_prefetching_beneficial_p (void)
2767 switch (ix86_tune)
2769 case PROCESSOR_GEODE:
2770 case PROCESSOR_K6:
2771 case PROCESSOR_ATHLON:
2772 case PROCESSOR_K8:
2773 case PROCESSOR_AMDFAM10:
2774 return true;
2776 default:
2777 return false;
2781 /* Return true, if profiling code should be emitted before
2782 prologue. Otherwise it returns false.
2783 Note: For x86 with "hotfix" it is sorried. */
2784 static bool
2785 ix86_profile_before_prologue (void)
2787 return flag_fentry != 0;
2790 /* Function that is callable from the debugger to print the current
2791 options. */
2792 void
2793 ix86_debug_options (void)
2795 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2796 ix86_arch_string, ix86_tune_string,
2797 ix86_fpmath_string, true);
2799 if (opts)
2801 fprintf (stderr, "%s\n\n", opts);
2802 free (opts);
2804 else
2805 fputs ("<no options>\n\n", stderr);
2807 return;
2810 /* Override various settings based on options. If MAIN_ARGS_P, the
2811 options are from the command line, otherwise they are from
2812 attributes. */
2814 static void
2815 ix86_option_override_internal (bool main_args_p)
2817 int i;
2818 unsigned int ix86_arch_mask, ix86_tune_mask;
2819 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2820 const char *prefix;
2821 const char *suffix;
2822 const char *sw;
2824 /* Comes from final.c -- no real reason to change it. */
2825 #define MAX_CODE_ALIGN 16
2827 enum pta_flags
2829 PTA_SSE = 1 << 0,
2830 PTA_SSE2 = 1 << 1,
2831 PTA_SSE3 = 1 << 2,
2832 PTA_MMX = 1 << 3,
2833 PTA_PREFETCH_SSE = 1 << 4,
2834 PTA_3DNOW = 1 << 5,
2835 PTA_3DNOW_A = 1 << 6,
2836 PTA_64BIT = 1 << 7,
2837 PTA_SSSE3 = 1 << 8,
2838 PTA_CX16 = 1 << 9,
2839 PTA_POPCNT = 1 << 10,
2840 PTA_ABM = 1 << 11,
2841 PTA_SSE4A = 1 << 12,
2842 PTA_NO_SAHF = 1 << 13,
2843 PTA_SSE4_1 = 1 << 14,
2844 PTA_SSE4_2 = 1 << 15,
2845 PTA_AES = 1 << 16,
2846 PTA_PCLMUL = 1 << 17,
2847 PTA_AVX = 1 << 18,
2848 PTA_FMA = 1 << 19,
2849 PTA_MOVBE = 1 << 20,
2850 PTA_FMA4 = 1 << 21,
2851 PTA_XOP = 1 << 22,
2852 PTA_LWP = 1 << 23,
2853 PTA_FSGSBASE = 1 << 24,
2854 PTA_RDRND = 1 << 25,
2855 PTA_F16C = 1 << 26
2858 static struct pta
2860 const char *const name; /* processor name or nickname. */
2861 const enum processor_type processor;
2862 const enum attr_cpu schedule;
2863 const unsigned /*enum pta_flags*/ flags;
2865 const processor_alias_table[] =
2867 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2868 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2869 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2870 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2871 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2872 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2873 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2874 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2875 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2876 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2877 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2878 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2879 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2880 PTA_MMX | PTA_SSE},
2881 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2882 PTA_MMX | PTA_SSE},
2883 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2884 PTA_MMX | PTA_SSE | PTA_SSE2},
2885 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX |PTA_SSE | PTA_SSE2},
2887 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2},
2889 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2891 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2892 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2893 | PTA_CX16 | PTA_NO_SAHF},
2894 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2895 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2896 | PTA_SSSE3 | PTA_CX16},
2897 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2898 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2899 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2900 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2902 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2903 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2904 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2905 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2909 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2914 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2915 {"x86-64", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2917 {"k8", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_NO_SAHF},
2920 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2923 {"opteron", PROCESSOR_K8, CPU_K8,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_NO_SAHF},
2926 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2929 {"athlon64", PROCESSOR_K8, CPU_K8,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_NO_SAHF},
2932 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2933 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2934 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2935 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2936 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2937 | PTA_SSE2 | PTA_NO_SAHF},
2938 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2940 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2941 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2943 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2944 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2945 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2946 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2947 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2948 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2949 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2950 0 /* flags are only used for -march switch. */ },
2951 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2952 PTA_64BIT /* flags are only used for -march switch. */ },
2955 int const pta_size = ARRAY_SIZE (processor_alias_table);
2957 /* Set up prefix/suffix so the error messages refer to either the command
2958 line argument, or the attribute(target). */
2959 if (main_args_p)
2961 prefix = "-m";
2962 suffix = "";
2963 sw = "switch";
2965 else
2967 prefix = "option(\"";
2968 suffix = "\")";
2969 sw = "attribute";
2972 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2973 SUBTARGET_OVERRIDE_OPTIONS;
2974 #endif
2976 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2977 SUBSUBTARGET_OVERRIDE_OPTIONS;
2978 #endif
2980 /* -fPIC is the default for x86_64. */
2981 if (TARGET_MACHO && TARGET_64BIT)
2982 flag_pic = 2;
2984 /* Need to check -mtune=generic first. */
2985 if (ix86_tune_string)
2987 if (!strcmp (ix86_tune_string, "generic")
2988 || !strcmp (ix86_tune_string, "i686")
2989 /* As special support for cross compilers we read -mtune=native
2990 as -mtune=generic. With native compilers we won't see the
2991 -mtune=native, as it was changed by the driver. */
2992 || !strcmp (ix86_tune_string, "native"))
2994 if (TARGET_64BIT)
2995 ix86_tune_string = "generic64";
2996 else
2997 ix86_tune_string = "generic32";
2999 /* If this call is for setting the option attribute, allow the
3000 generic32/generic64 that was previously set. */
3001 else if (!main_args_p
3002 && (!strcmp (ix86_tune_string, "generic32")
3003 || !strcmp (ix86_tune_string, "generic64")))
3005 else if (!strncmp (ix86_tune_string, "generic", 7))
3006 error ("bad value (%s) for %stune=%s %s",
3007 ix86_tune_string, prefix, suffix, sw);
3008 else if (!strcmp (ix86_tune_string, "x86-64"))
3009 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3010 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3011 prefix, suffix, prefix, suffix, prefix, suffix);
3013 else
3015 if (ix86_arch_string)
3016 ix86_tune_string = ix86_arch_string;
3017 if (!ix86_tune_string)
3019 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3020 ix86_tune_defaulted = 1;
3023 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3024 need to use a sensible tune option. */
3025 if (!strcmp (ix86_tune_string, "generic")
3026 || !strcmp (ix86_tune_string, "x86-64")
3027 || !strcmp (ix86_tune_string, "i686"))
3029 if (TARGET_64BIT)
3030 ix86_tune_string = "generic64";
3031 else
3032 ix86_tune_string = "generic32";
3036 if (ix86_stringop_string)
3038 if (!strcmp (ix86_stringop_string, "rep_byte"))
3039 stringop_alg = rep_prefix_1_byte;
3040 else if (!strcmp (ix86_stringop_string, "libcall"))
3041 stringop_alg = libcall;
3042 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3043 stringop_alg = rep_prefix_4_byte;
3044 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3045 && TARGET_64BIT)
3046 /* rep; movq isn't available in 32-bit code. */
3047 stringop_alg = rep_prefix_8_byte;
3048 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3049 stringop_alg = loop_1_byte;
3050 else if (!strcmp (ix86_stringop_string, "loop"))
3051 stringop_alg = loop;
3052 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3053 stringop_alg = unrolled_loop;
3054 else
3055 error ("bad value (%s) for %sstringop-strategy=%s %s",
3056 ix86_stringop_string, prefix, suffix, sw);
3059 if (!ix86_arch_string)
3060 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3061 else
3062 ix86_arch_specified = 1;
3064 /* Validate -mabi= value. */
3065 if (ix86_abi_string)
3067 if (strcmp (ix86_abi_string, "sysv") == 0)
3068 ix86_abi = SYSV_ABI;
3069 else if (strcmp (ix86_abi_string, "ms") == 0)
3070 ix86_abi = MS_ABI;
3071 else
3072 error ("unknown ABI (%s) for %sabi=%s %s",
3073 ix86_abi_string, prefix, suffix, sw);
3075 else
3076 ix86_abi = DEFAULT_ABI;
3078 if (ix86_cmodel_string != 0)
3080 if (!strcmp (ix86_cmodel_string, "small"))
3081 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3082 else if (!strcmp (ix86_cmodel_string, "medium"))
3083 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3084 else if (!strcmp (ix86_cmodel_string, "large"))
3085 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3086 else if (flag_pic)
3087 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3088 else if (!strcmp (ix86_cmodel_string, "32"))
3089 ix86_cmodel = CM_32;
3090 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3091 ix86_cmodel = CM_KERNEL;
3092 else
3093 error ("bad value (%s) for %scmodel=%s %s",
3094 ix86_cmodel_string, prefix, suffix, sw);
3096 else
3098 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3099 use of rip-relative addressing. This eliminates fixups that
3100 would otherwise be needed if this object is to be placed in a
3101 DLL, and is essentially just as efficient as direct addressing. */
3102 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3103 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3104 else if (TARGET_64BIT)
3105 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3106 else
3107 ix86_cmodel = CM_32;
3109 if (ix86_asm_string != 0)
3111 if (! TARGET_MACHO
3112 && !strcmp (ix86_asm_string, "intel"))
3113 ix86_asm_dialect = ASM_INTEL;
3114 else if (!strcmp (ix86_asm_string, "att"))
3115 ix86_asm_dialect = ASM_ATT;
3116 else
3117 error ("bad value (%s) for %sasm=%s %s",
3118 ix86_asm_string, prefix, suffix, sw);
3120 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3121 error ("code model %qs not supported in the %s bit mode",
3122 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3123 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3124 sorry ("%i-bit mode not compiled in",
3125 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3127 for (i = 0; i < pta_size; i++)
3128 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3130 ix86_schedule = processor_alias_table[i].schedule;
3131 ix86_arch = processor_alias_table[i].processor;
3132 /* Default cpu tuning to the architecture. */
3133 ix86_tune = ix86_arch;
3135 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3136 error ("CPU you selected does not support x86-64 "
3137 "instruction set");
3139 if (processor_alias_table[i].flags & PTA_MMX
3140 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3141 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3142 if (processor_alias_table[i].flags & PTA_3DNOW
3143 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3144 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3145 if (processor_alias_table[i].flags & PTA_3DNOW_A
3146 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3147 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3148 if (processor_alias_table[i].flags & PTA_SSE
3149 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3150 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3151 if (processor_alias_table[i].flags & PTA_SSE2
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3153 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3154 if (processor_alias_table[i].flags & PTA_SSE3
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3156 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3157 if (processor_alias_table[i].flags & PTA_SSSE3
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3159 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3160 if (processor_alias_table[i].flags & PTA_SSE4_1
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3163 if (processor_alias_table[i].flags & PTA_SSE4_2
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3166 if (processor_alias_table[i].flags & PTA_AVX
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3168 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3169 if (processor_alias_table[i].flags & PTA_FMA
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3171 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3172 if (processor_alias_table[i].flags & PTA_SSE4A
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3175 if (processor_alias_table[i].flags & PTA_FMA4
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3177 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3178 if (processor_alias_table[i].flags & PTA_XOP
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3180 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3181 if (processor_alias_table[i].flags & PTA_LWP
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3183 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3184 if (processor_alias_table[i].flags & PTA_ABM
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3186 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3187 if (processor_alias_table[i].flags & PTA_CX16
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3189 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3190 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3192 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3193 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3195 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3196 if (processor_alias_table[i].flags & PTA_MOVBE
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3198 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3199 if (processor_alias_table[i].flags & PTA_AES
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3201 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3202 if (processor_alias_table[i].flags & PTA_PCLMUL
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3204 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3205 if (processor_alias_table[i].flags & PTA_FSGSBASE
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3207 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3208 if (processor_alias_table[i].flags & PTA_RDRND
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3210 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3211 if (processor_alias_table[i].flags & PTA_F16C
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3213 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3214 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3215 x86_prefetch_sse = true;
3217 break;
3220 if (!strcmp (ix86_arch_string, "generic"))
3221 error ("generic CPU can be used only for %stune=%s %s",
3222 prefix, suffix, sw);
3223 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3224 error ("bad value (%s) for %sarch=%s %s",
3225 ix86_arch_string, prefix, suffix, sw);
3227 ix86_arch_mask = 1u << ix86_arch;
3228 for (i = 0; i < X86_ARCH_LAST; ++i)
3229 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3231 for (i = 0; i < pta_size; i++)
3232 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3234 ix86_schedule = processor_alias_table[i].schedule;
3235 ix86_tune = processor_alias_table[i].processor;
3236 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3238 if (ix86_tune_defaulted)
3240 ix86_tune_string = "x86-64";
3241 for (i = 0; i < pta_size; i++)
3242 if (! strcmp (ix86_tune_string,
3243 processor_alias_table[i].name))
3244 break;
3245 ix86_schedule = processor_alias_table[i].schedule;
3246 ix86_tune = processor_alias_table[i].processor;
3248 else
3249 error ("CPU you selected does not support x86-64 "
3250 "instruction set");
3252 /* Intel CPUs have always interpreted SSE prefetch instructions as
3253 NOPs; so, we can enable SSE prefetch instructions even when
3254 -mtune (rather than -march) points us to a processor that has them.
3255 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3256 higher processors. */
3257 if (TARGET_CMOVE
3258 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3259 x86_prefetch_sse = true;
3260 break;
3263 if (ix86_tune_specified && i == pta_size)
3264 error ("bad value (%s) for %stune=%s %s",
3265 ix86_tune_string, prefix, suffix, sw);
3267 ix86_tune_mask = 1u << ix86_tune;
3268 for (i = 0; i < X86_TUNE_LAST; ++i)
3269 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3271 #ifndef USE_IX86_FRAME_POINTER
3272 #define USE_IX86_FRAME_POINTER 0
3273 #endif
3275 /* Set the default values for switches whose default depends on TARGET_64BIT
3276 in case they weren't overwritten by command line options. */
3277 if (TARGET_64BIT)
3279 if (flag_zee == 2)
3280 flag_zee = 1;
3281 if (flag_omit_frame_pointer == 2)
3282 flag_omit_frame_pointer = 1;
3283 if (flag_asynchronous_unwind_tables == 2)
3284 flag_asynchronous_unwind_tables = 1;
3285 if (flag_pcc_struct_return == 2)
3286 flag_pcc_struct_return = 0;
3288 else
3290 if (flag_zee == 2)
3291 flag_zee = 0;
3292 if (flag_omit_frame_pointer == 2)
3293 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3294 if (flag_asynchronous_unwind_tables == 2)
3295 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3296 if (flag_pcc_struct_return == 2)
3297 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3300 if (optimize_size)
3301 ix86_cost = &ix86_size_cost;
3302 else
3303 ix86_cost = processor_target_table[ix86_tune].cost;
3305 /* Arrange to set up i386_stack_locals for all functions. */
3306 init_machine_status = ix86_init_machine_status;
3308 /* Validate -mregparm= value. */
3309 if (ix86_regparm_string)
3311 if (TARGET_64BIT)
3312 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3313 i = atoi (ix86_regparm_string);
3314 if (i < 0 || i > REGPARM_MAX)
3315 error ("%sregparm=%d%s is not between 0 and %d",
3316 prefix, i, suffix, REGPARM_MAX);
3317 else
3318 ix86_regparm = i;
3320 if (TARGET_64BIT)
3321 ix86_regparm = REGPARM_MAX;
3323 /* If the user has provided any of the -malign-* options,
3324 warn and use that value only if -falign-* is not set.
3325 Remove this code in GCC 3.2 or later. */
3326 if (ix86_align_loops_string)
3328 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3329 prefix, suffix, suffix);
3330 if (align_loops == 0)
3332 i = atoi (ix86_align_loops_string);
3333 if (i < 0 || i > MAX_CODE_ALIGN)
3334 error ("%salign-loops=%d%s is not between 0 and %d",
3335 prefix, i, suffix, MAX_CODE_ALIGN);
3336 else
3337 align_loops = 1 << i;
3341 if (ix86_align_jumps_string)
3343 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3344 prefix, suffix, suffix);
3345 if (align_jumps == 0)
3347 i = atoi (ix86_align_jumps_string);
3348 if (i < 0 || i > MAX_CODE_ALIGN)
3349 error ("%salign-loops=%d%s is not between 0 and %d",
3350 prefix, i, suffix, MAX_CODE_ALIGN);
3351 else
3352 align_jumps = 1 << i;
3356 if (ix86_align_funcs_string)
3358 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3359 prefix, suffix, suffix);
3360 if (align_functions == 0)
3362 i = atoi (ix86_align_funcs_string);
3363 if (i < 0 || i > MAX_CODE_ALIGN)
3364 error ("%salign-loops=%d%s is not between 0 and %d",
3365 prefix, i, suffix, MAX_CODE_ALIGN);
3366 else
3367 align_functions = 1 << i;
3371 /* Default align_* from the processor table. */
3372 if (align_loops == 0)
3374 align_loops = processor_target_table[ix86_tune].align_loop;
3375 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3377 if (align_jumps == 0)
3379 align_jumps = processor_target_table[ix86_tune].align_jump;
3380 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3382 if (align_functions == 0)
3384 align_functions = processor_target_table[ix86_tune].align_func;
3387 /* Validate -mbranch-cost= value, or provide default. */
3388 ix86_branch_cost = ix86_cost->branch_cost;
3389 if (ix86_branch_cost_string)
3391 i = atoi (ix86_branch_cost_string);
3392 if (i < 0 || i > 5)
3393 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3394 else
3395 ix86_branch_cost = i;
3397 if (ix86_section_threshold_string)
3399 i = atoi (ix86_section_threshold_string);
3400 if (i < 0)
3401 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3402 else
3403 ix86_section_threshold = i;
3406 if (ix86_tls_dialect_string)
3408 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3409 ix86_tls_dialect = TLS_DIALECT_GNU;
3410 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3411 ix86_tls_dialect = TLS_DIALECT_GNU2;
3412 else
3413 error ("bad value (%s) for %stls-dialect=%s %s",
3414 ix86_tls_dialect_string, prefix, suffix, sw);
3417 if (ix87_precision_string)
3419 i = atoi (ix87_precision_string);
3420 if (i != 32 && i != 64 && i != 80)
3421 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3424 if (TARGET_64BIT)
3426 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3428 /* Enable by default the SSE and MMX builtins. Do allow the user to
3429 explicitly disable any of these. In particular, disabling SSE and
3430 MMX for kernel code is extremely useful. */
3431 if (!ix86_arch_specified)
3432 ix86_isa_flags
3433 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3434 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3436 if (TARGET_RTD)
3437 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3439 else
3441 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3443 if (!ix86_arch_specified)
3444 ix86_isa_flags
3445 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3447 /* i386 ABI does not specify red zone. It still makes sense to use it
3448 when programmer takes care to stack from being destroyed. */
3449 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3450 target_flags |= MASK_NO_RED_ZONE;
3453 /* Keep nonleaf frame pointers. */
3454 if (flag_omit_frame_pointer)
3455 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3456 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3457 flag_omit_frame_pointer = 1;
3459 /* If we're doing fast math, we don't care about comparison order
3460 wrt NaNs. This lets us use a shorter comparison sequence. */
3461 if (flag_finite_math_only)
3462 target_flags &= ~MASK_IEEE_FP;
3464 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3465 since the insns won't need emulation. */
3466 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3467 target_flags &= ~MASK_NO_FANCY_MATH_387;
3469 /* Likewise, if the target doesn't have a 387, or we've specified
3470 software floating point, don't use 387 inline intrinsics. */
3471 if (!TARGET_80387)
3472 target_flags |= MASK_NO_FANCY_MATH_387;
3474 /* Turn on MMX builtins for -msse. */
3475 if (TARGET_SSE)
3477 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3478 x86_prefetch_sse = true;
3481 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3482 if (TARGET_SSE4_2 || TARGET_ABM)
3483 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3485 /* Validate -mpreferred-stack-boundary= value or default it to
3486 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3487 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3488 if (ix86_preferred_stack_boundary_string)
3490 i = atoi (ix86_preferred_stack_boundary_string);
3491 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3492 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3493 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3494 else
3495 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3498 /* Set the default value for -mstackrealign. */
3499 if (ix86_force_align_arg_pointer == -1)
3500 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3502 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3504 /* Validate -mincoming-stack-boundary= value or default it to
3505 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3506 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3507 if (ix86_incoming_stack_boundary_string)
3509 i = atoi (ix86_incoming_stack_boundary_string);
3510 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3511 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3512 i, TARGET_64BIT ? 4 : 2);
3513 else
3515 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3516 ix86_incoming_stack_boundary
3517 = ix86_user_incoming_stack_boundary;
3521 /* Accept -msseregparm only if at least SSE support is enabled. */
3522 if (TARGET_SSEREGPARM
3523 && ! TARGET_SSE)
3524 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3526 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3527 if (ix86_fpmath_string != 0)
3529 if (! strcmp (ix86_fpmath_string, "387"))
3530 ix86_fpmath = FPMATH_387;
3531 else if (! strcmp (ix86_fpmath_string, "sse"))
3533 if (!TARGET_SSE)
3535 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3536 ix86_fpmath = FPMATH_387;
3538 else
3539 ix86_fpmath = FPMATH_SSE;
3541 else if (! strcmp (ix86_fpmath_string, "387,sse")
3542 || ! strcmp (ix86_fpmath_string, "387+sse")
3543 || ! strcmp (ix86_fpmath_string, "sse,387")
3544 || ! strcmp (ix86_fpmath_string, "sse+387")
3545 || ! strcmp (ix86_fpmath_string, "both"))
3547 if (!TARGET_SSE)
3549 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3550 ix86_fpmath = FPMATH_387;
3552 else if (!TARGET_80387)
3554 warning (0, "387 instruction set disabled, using SSE arithmetics");
3555 ix86_fpmath = FPMATH_SSE;
3557 else
3558 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3560 else
3561 error ("bad value (%s) for %sfpmath=%s %s",
3562 ix86_fpmath_string, prefix, suffix, sw);
3565 /* If the i387 is disabled, then do not return values in it. */
3566 if (!TARGET_80387)
3567 target_flags &= ~MASK_FLOAT_RETURNS;
3569 /* Use external vectorized library in vectorizing intrinsics. */
3570 if (ix86_veclibabi_string)
3572 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3573 ix86_veclib_handler = ix86_veclibabi_svml;
3574 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3575 ix86_veclib_handler = ix86_veclibabi_acml;
3576 else
3577 error ("unknown vectorization library ABI type (%s) for "
3578 "%sveclibabi=%s %s", ix86_veclibabi_string,
3579 prefix, suffix, sw);
3582 if ((!USE_IX86_FRAME_POINTER
3583 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3584 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3585 && !optimize_size)
3586 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3588 /* ??? Unwind info is not correct around the CFG unless either a frame
3589 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3590 unwind info generation to be aware of the CFG and propagating states
3591 around edges. */
3592 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3593 || flag_exceptions || flag_non_call_exceptions)
3594 && flag_omit_frame_pointer
3595 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3597 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3598 warning (0, "unwind tables currently require either a frame pointer "
3599 "or %saccumulate-outgoing-args%s for correctness",
3600 prefix, suffix);
3601 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3604 /* If stack probes are required, the space used for large function
3605 arguments on the stack must also be probed, so enable
3606 -maccumulate-outgoing-args so this happens in the prologue. */
3607 if (TARGET_STACK_PROBE
3608 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3610 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3611 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3612 "for correctness", prefix, suffix);
3613 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3616 /* For sane SSE instruction set generation we need fcomi instruction.
3617 It is safe to enable all CMOVE instructions. */
3618 if (TARGET_SSE)
3619 TARGET_CMOVE = 1;
3621 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3623 char *p;
3624 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3625 p = strchr (internal_label_prefix, 'X');
3626 internal_label_prefix_len = p - internal_label_prefix;
3627 *p = '\0';
3630 /* When scheduling description is not available, disable scheduler pass
3631 so it won't slow down the compilation and make x87 code slower. */
3632 if (!TARGET_SCHEDULE)
3633 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3635 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3636 set_param_value ("simultaneous-prefetches",
3637 ix86_cost->simultaneous_prefetches);
3638 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3639 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3640 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3641 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3642 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3643 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3645 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3646 if (flag_prefetch_loop_arrays < 0
3647 && HAVE_prefetch
3648 && optimize >= 3
3649 && software_prefetching_beneficial_p ())
3650 flag_prefetch_loop_arrays = 1;
3652 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3653 can be optimized to ap = __builtin_next_arg (0). */
3654 if (!TARGET_64BIT)
3655 targetm.expand_builtin_va_start = NULL;
3657 if (TARGET_64BIT)
3659 ix86_gen_leave = gen_leave_rex64;
3660 ix86_gen_add3 = gen_adddi3;
3661 ix86_gen_sub3 = gen_subdi3;
3662 ix86_gen_sub3_carry = gen_subdi3_carry;
3663 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3664 ix86_gen_monitor = gen_sse3_monitor64;
3665 ix86_gen_andsp = gen_anddi3;
3666 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3667 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3668 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3670 else
3672 ix86_gen_leave = gen_leave;
3673 ix86_gen_add3 = gen_addsi3;
3674 ix86_gen_sub3 = gen_subsi3;
3675 ix86_gen_sub3_carry = gen_subsi3_carry;
3676 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3677 ix86_gen_monitor = gen_sse3_monitor;
3678 ix86_gen_andsp = gen_andsi3;
3679 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3680 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3681 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3684 #ifdef USE_IX86_CLD
3685 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3686 if (!TARGET_64BIT)
3687 target_flags |= MASK_CLD & ~target_flags_explicit;
3688 #endif
3690 if (!TARGET_64BIT && flag_pic)
3692 if (flag_fentry > 0)
3693 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3694 flag_fentry = 0;
3696 if (flag_fentry < 0)
3698 #if defined(PROFILE_BEFORE_PROLOGUE)
3699 flag_fentry = 1;
3700 #else
3701 flag_fentry = 0;
3702 #endif
3705 /* Save the initial options in case the user does function specific options */
3706 if (main_args_p)
3707 target_option_default_node = target_option_current_node
3708 = build_target_option_node ();
3711 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3713 static void
3714 ix86_option_override (void)
3716 ix86_option_override_internal (true);
3719 /* Update register usage after having seen the compiler flags. */
3721 void
3722 ix86_conditional_register_usage (void)
3724 int i;
3725 unsigned int j;
3727 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3729 if (fixed_regs[i] > 1)
3730 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3731 if (call_used_regs[i] > 1)
3732 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3735 /* The PIC register, if it exists, is fixed. */
3736 j = PIC_OFFSET_TABLE_REGNUM;
3737 if (j != INVALID_REGNUM)
3738 fixed_regs[j] = call_used_regs[j] = 1;
3740 /* The MS_ABI changes the set of call-used registers. */
3741 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3743 call_used_regs[SI_REG] = 0;
3744 call_used_regs[DI_REG] = 0;
3745 call_used_regs[XMM6_REG] = 0;
3746 call_used_regs[XMM7_REG] = 0;
3747 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3748 call_used_regs[i] = 0;
3751 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3752 other call-clobbered regs for 64-bit. */
3753 if (TARGET_64BIT)
3755 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3757 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3758 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3759 && call_used_regs[i])
3760 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3763 /* If MMX is disabled, squash the registers. */
3764 if (! TARGET_MMX)
3765 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3766 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3767 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3769 /* If SSE is disabled, squash the registers. */
3770 if (! TARGET_SSE)
3771 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3772 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3773 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3775 /* If the FPU is disabled, squash the registers. */
3776 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3777 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3778 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3779 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3781 /* If 32-bit, squash the 64-bit registers. */
3782 if (! TARGET_64BIT)
3784 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3785 reg_names[i] = "";
3786 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3787 reg_names[i] = "";
3792 /* Save the current options */
3794 static void
3795 ix86_function_specific_save (struct cl_target_option *ptr)
3797 ptr->arch = ix86_arch;
3798 ptr->schedule = ix86_schedule;
3799 ptr->tune = ix86_tune;
3800 ptr->fpmath = ix86_fpmath;
3801 ptr->branch_cost = ix86_branch_cost;
3802 ptr->tune_defaulted = ix86_tune_defaulted;
3803 ptr->arch_specified = ix86_arch_specified;
3804 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3805 ptr->target_flags_explicit = target_flags_explicit;
3807 /* The fields are char but the variables are not; make sure the
3808 values fit in the fields. */
3809 gcc_assert (ptr->arch == ix86_arch);
3810 gcc_assert (ptr->schedule == ix86_schedule);
3811 gcc_assert (ptr->tune == ix86_tune);
3812 gcc_assert (ptr->fpmath == ix86_fpmath);
3813 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3816 /* Restore the current options */
3818 static void
3819 ix86_function_specific_restore (struct cl_target_option *ptr)
3821 enum processor_type old_tune = ix86_tune;
3822 enum processor_type old_arch = ix86_arch;
3823 unsigned int ix86_arch_mask, ix86_tune_mask;
3824 int i;
3826 ix86_arch = (enum processor_type) ptr->arch;
3827 ix86_schedule = (enum attr_cpu) ptr->schedule;
3828 ix86_tune = (enum processor_type) ptr->tune;
3829 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3830 ix86_branch_cost = ptr->branch_cost;
3831 ix86_tune_defaulted = ptr->tune_defaulted;
3832 ix86_arch_specified = ptr->arch_specified;
3833 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3834 target_flags_explicit = ptr->target_flags_explicit;
3836 /* Recreate the arch feature tests if the arch changed */
3837 if (old_arch != ix86_arch)
3839 ix86_arch_mask = 1u << ix86_arch;
3840 for (i = 0; i < X86_ARCH_LAST; ++i)
3841 ix86_arch_features[i]
3842 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3845 /* Recreate the tune optimization tests */
3846 if (old_tune != ix86_tune)
3848 ix86_tune_mask = 1u << ix86_tune;
3849 for (i = 0; i < X86_TUNE_LAST; ++i)
3850 ix86_tune_features[i]
3851 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3855 /* Print the current options */
3857 static void
3858 ix86_function_specific_print (FILE *file, int indent,
3859 struct cl_target_option *ptr)
3861 char *target_string
3862 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3863 NULL, NULL, NULL, false);
3865 fprintf (file, "%*sarch = %d (%s)\n",
3866 indent, "",
3867 ptr->arch,
3868 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3869 ? cpu_names[ptr->arch]
3870 : "<unknown>"));
3872 fprintf (file, "%*stune = %d (%s)\n",
3873 indent, "",
3874 ptr->tune,
3875 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3876 ? cpu_names[ptr->tune]
3877 : "<unknown>"));
3879 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3880 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3881 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3882 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3884 if (target_string)
3886 fprintf (file, "%*s%s\n", indent, "", target_string);
3887 free (target_string);
3892 /* Inner function to process the attribute((target(...))), take an argument and
3893 set the current options from the argument. If we have a list, recursively go
3894 over the list. */
3896 static bool
3897 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3899 char *next_optstr;
3900 bool ret = true;
3902 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3903 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3904 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3905 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3907 enum ix86_opt_type
3909 ix86_opt_unknown,
3910 ix86_opt_yes,
3911 ix86_opt_no,
3912 ix86_opt_str,
3913 ix86_opt_isa
3916 static const struct
3918 const char *string;
3919 size_t len;
3920 enum ix86_opt_type type;
3921 int opt;
3922 int mask;
3923 } attrs[] = {
3924 /* isa options */
3925 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3926 IX86_ATTR_ISA ("abm", OPT_mabm),
3927 IX86_ATTR_ISA ("aes", OPT_maes),
3928 IX86_ATTR_ISA ("avx", OPT_mavx),
3929 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3930 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3931 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3932 IX86_ATTR_ISA ("sse", OPT_msse),
3933 IX86_ATTR_ISA ("sse2", OPT_msse2),
3934 IX86_ATTR_ISA ("sse3", OPT_msse3),
3935 IX86_ATTR_ISA ("sse4", OPT_msse4),
3936 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3937 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3938 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3939 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3940 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3941 IX86_ATTR_ISA ("xop", OPT_mxop),
3942 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3943 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3944 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3945 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3947 /* string options */
3948 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3949 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3950 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3952 /* flag options */
3953 IX86_ATTR_YES ("cld",
3954 OPT_mcld,
3955 MASK_CLD),
3957 IX86_ATTR_NO ("fancy-math-387",
3958 OPT_mfancy_math_387,
3959 MASK_NO_FANCY_MATH_387),
3961 IX86_ATTR_YES ("ieee-fp",
3962 OPT_mieee_fp,
3963 MASK_IEEE_FP),
3965 IX86_ATTR_YES ("inline-all-stringops",
3966 OPT_minline_all_stringops,
3967 MASK_INLINE_ALL_STRINGOPS),
3969 IX86_ATTR_YES ("inline-stringops-dynamically",
3970 OPT_minline_stringops_dynamically,
3971 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3973 IX86_ATTR_NO ("align-stringops",
3974 OPT_mno_align_stringops,
3975 MASK_NO_ALIGN_STRINGOPS),
3977 IX86_ATTR_YES ("recip",
3978 OPT_mrecip,
3979 MASK_RECIP),
3983 /* If this is a list, recurse to get the options. */
3984 if (TREE_CODE (args) == TREE_LIST)
3986 bool ret = true;
3988 for (; args; args = TREE_CHAIN (args))
3989 if (TREE_VALUE (args)
3990 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3991 ret = false;
3993 return ret;
3996 else if (TREE_CODE (args) != STRING_CST)
3997 gcc_unreachable ();
3999 /* Handle multiple arguments separated by commas. */
4000 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4002 while (next_optstr && *next_optstr != '\0')
4004 char *p = next_optstr;
4005 char *orig_p = p;
4006 char *comma = strchr (next_optstr, ',');
4007 const char *opt_string;
4008 size_t len, opt_len;
4009 int opt;
4010 bool opt_set_p;
4011 char ch;
4012 unsigned i;
4013 enum ix86_opt_type type = ix86_opt_unknown;
4014 int mask = 0;
4016 if (comma)
4018 *comma = '\0';
4019 len = comma - next_optstr;
4020 next_optstr = comma + 1;
4022 else
4024 len = strlen (p);
4025 next_optstr = NULL;
4028 /* Recognize no-xxx. */
4029 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4031 opt_set_p = false;
4032 p += 3;
4033 len -= 3;
4035 else
4036 opt_set_p = true;
4038 /* Find the option. */
4039 ch = *p;
4040 opt = N_OPTS;
4041 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4043 type = attrs[i].type;
4044 opt_len = attrs[i].len;
4045 if (ch == attrs[i].string[0]
4046 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4047 && memcmp (p, attrs[i].string, opt_len) == 0)
4049 opt = attrs[i].opt;
4050 mask = attrs[i].mask;
4051 opt_string = attrs[i].string;
4052 break;
4056 /* Process the option. */
4057 if (opt == N_OPTS)
4059 error ("attribute(target(\"%s\")) is unknown", orig_p);
4060 ret = false;
4063 else if (type == ix86_opt_isa)
4064 ix86_handle_option (opt, p, opt_set_p);
4066 else if (type == ix86_opt_yes || type == ix86_opt_no)
4068 if (type == ix86_opt_no)
4069 opt_set_p = !opt_set_p;
4071 if (opt_set_p)
4072 target_flags |= mask;
4073 else
4074 target_flags &= ~mask;
4077 else if (type == ix86_opt_str)
4079 if (p_strings[opt])
4081 error ("option(\"%s\") was already specified", opt_string);
4082 ret = false;
4084 else
4085 p_strings[opt] = xstrdup (p + opt_len);
4088 else
4089 gcc_unreachable ();
4092 return ret;
4095 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4097 tree
4098 ix86_valid_target_attribute_tree (tree args)
4100 const char *orig_arch_string = ix86_arch_string;
4101 const char *orig_tune_string = ix86_tune_string;
4102 const char *orig_fpmath_string = ix86_fpmath_string;
4103 int orig_tune_defaulted = ix86_tune_defaulted;
4104 int orig_arch_specified = ix86_arch_specified;
4105 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4106 tree t = NULL_TREE;
4107 int i;
4108 struct cl_target_option *def
4109 = TREE_TARGET_OPTION (target_option_default_node);
4111 /* Process each of the options on the chain. */
4112 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4113 return NULL_TREE;
4115 /* If the changed options are different from the default, rerun
4116 ix86_option_override_internal, and then save the options away.
4117 The string options are are attribute options, and will be undone
4118 when we copy the save structure. */
4119 if (ix86_isa_flags != def->ix86_isa_flags
4120 || target_flags != def->target_flags
4121 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4122 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4123 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4125 /* If we are using the default tune= or arch=, undo the string assigned,
4126 and use the default. */
4127 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4128 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4129 else if (!orig_arch_specified)
4130 ix86_arch_string = NULL;
4132 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4133 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4134 else if (orig_tune_defaulted)
4135 ix86_tune_string = NULL;
4137 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4138 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4139 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4140 else if (!TARGET_64BIT && TARGET_SSE)
4141 ix86_fpmath_string = "sse,387";
4143 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4144 ix86_option_override_internal (false);
4146 /* Add any builtin functions with the new isa if any. */
4147 ix86_add_new_builtins (ix86_isa_flags);
4149 /* Save the current options unless we are validating options for
4150 #pragma. */
4151 t = build_target_option_node ();
4153 ix86_arch_string = orig_arch_string;
4154 ix86_tune_string = orig_tune_string;
4155 ix86_fpmath_string = orig_fpmath_string;
4157 /* Free up memory allocated to hold the strings */
4158 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4159 if (option_strings[i])
4160 free (option_strings[i]);
4163 return t;
4166 /* Hook to validate attribute((target("string"))). */
4168 static bool
4169 ix86_valid_target_attribute_p (tree fndecl,
4170 tree ARG_UNUSED (name),
4171 tree args,
4172 int ARG_UNUSED (flags))
4174 struct cl_target_option cur_target;
4175 bool ret = true;
4176 tree old_optimize = build_optimization_node ();
4177 tree new_target, new_optimize;
4178 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4180 /* If the function changed the optimization levels as well as setting target
4181 options, start with the optimizations specified. */
4182 if (func_optimize && func_optimize != old_optimize)
4183 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4185 /* The target attributes may also change some optimization flags, so update
4186 the optimization options if necessary. */
4187 cl_target_option_save (&cur_target);
4188 new_target = ix86_valid_target_attribute_tree (args);
4189 new_optimize = build_optimization_node ();
4191 if (!new_target)
4192 ret = false;
4194 else if (fndecl)
4196 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4198 if (old_optimize != new_optimize)
4199 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4202 cl_target_option_restore (&cur_target);
4204 if (old_optimize != new_optimize)
4205 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4207 return ret;
4211 /* Hook to determine if one function can safely inline another. */
4213 static bool
4214 ix86_can_inline_p (tree caller, tree callee)
4216 bool ret = false;
4217 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4218 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4220 /* If callee has no option attributes, then it is ok to inline. */
4221 if (!callee_tree)
4222 ret = true;
4224 /* If caller has no option attributes, but callee does then it is not ok to
4225 inline. */
4226 else if (!caller_tree)
4227 ret = false;
4229 else
4231 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4232 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4234 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4235 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4236 function. */
4237 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4238 != callee_opts->ix86_isa_flags)
4239 ret = false;
4241 /* See if we have the same non-isa options. */
4242 else if (caller_opts->target_flags != callee_opts->target_flags)
4243 ret = false;
4245 /* See if arch, tune, etc. are the same. */
4246 else if (caller_opts->arch != callee_opts->arch)
4247 ret = false;
4249 else if (caller_opts->tune != callee_opts->tune)
4250 ret = false;
4252 else if (caller_opts->fpmath != callee_opts->fpmath)
4253 ret = false;
4255 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4256 ret = false;
4258 else
4259 ret = true;
4262 return ret;
4266 /* Remember the last target of ix86_set_current_function. */
4267 static GTY(()) tree ix86_previous_fndecl;
4269 /* Establish appropriate back-end context for processing the function
4270 FNDECL. The argument might be NULL to indicate processing at top
4271 level, outside of any function scope. */
4272 static void
4273 ix86_set_current_function (tree fndecl)
4275 /* Only change the context if the function changes. This hook is called
4276 several times in the course of compiling a function, and we don't want to
4277 slow things down too much or call target_reinit when it isn't safe. */
4278 if (fndecl && fndecl != ix86_previous_fndecl)
4280 tree old_tree = (ix86_previous_fndecl
4281 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4282 : NULL_TREE);
4284 tree new_tree = (fndecl
4285 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4286 : NULL_TREE);
4288 ix86_previous_fndecl = fndecl;
4289 if (old_tree == new_tree)
4292 else if (new_tree)
4294 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4295 target_reinit ();
4298 else if (old_tree)
4300 struct cl_target_option *def
4301 = TREE_TARGET_OPTION (target_option_current_node);
4303 cl_target_option_restore (def);
4304 target_reinit ();
4310 /* Return true if this goes in large data/bss. */
4312 static bool
4313 ix86_in_large_data_p (tree exp)
4315 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4316 return false;
4318 /* Functions are never large data. */
4319 if (TREE_CODE (exp) == FUNCTION_DECL)
4320 return false;
4322 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4324 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4325 if (strcmp (section, ".ldata") == 0
4326 || strcmp (section, ".lbss") == 0)
4327 return true;
4328 return false;
4330 else
4332 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4334 /* If this is an incomplete type with size 0, then we can't put it
4335 in data because it might be too big when completed. */
4336 if (!size || size > ix86_section_threshold)
4337 return true;
4340 return false;
4343 /* Switch to the appropriate section for output of DECL.
4344 DECL is either a `VAR_DECL' node or a constant of some sort.
4345 RELOC indicates whether forming the initial value of DECL requires
4346 link-time relocations. */
4348 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4349 ATTRIBUTE_UNUSED;
4351 static section *
4352 x86_64_elf_select_section (tree decl, int reloc,
4353 unsigned HOST_WIDE_INT align)
4355 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4356 && ix86_in_large_data_p (decl))
4358 const char *sname = NULL;
4359 unsigned int flags = SECTION_WRITE;
4360 switch (categorize_decl_for_section (decl, reloc))
4362 case SECCAT_DATA:
4363 sname = ".ldata";
4364 break;
4365 case SECCAT_DATA_REL:
4366 sname = ".ldata.rel";
4367 break;
4368 case SECCAT_DATA_REL_LOCAL:
4369 sname = ".ldata.rel.local";
4370 break;
4371 case SECCAT_DATA_REL_RO:
4372 sname = ".ldata.rel.ro";
4373 break;
4374 case SECCAT_DATA_REL_RO_LOCAL:
4375 sname = ".ldata.rel.ro.local";
4376 break;
4377 case SECCAT_BSS:
4378 sname = ".lbss";
4379 flags |= SECTION_BSS;
4380 break;
4381 case SECCAT_RODATA:
4382 case SECCAT_RODATA_MERGE_STR:
4383 case SECCAT_RODATA_MERGE_STR_INIT:
4384 case SECCAT_RODATA_MERGE_CONST:
4385 sname = ".lrodata";
4386 flags = 0;
4387 break;
4388 case SECCAT_SRODATA:
4389 case SECCAT_SDATA:
4390 case SECCAT_SBSS:
4391 gcc_unreachable ();
4392 case SECCAT_TEXT:
4393 case SECCAT_TDATA:
4394 case SECCAT_TBSS:
4395 /* We don't split these for medium model. Place them into
4396 default sections and hope for best. */
4397 break;
4399 if (sname)
4401 /* We might get called with string constants, but get_named_section
4402 doesn't like them as they are not DECLs. Also, we need to set
4403 flags in that case. */
4404 if (!DECL_P (decl))
4405 return get_section (sname, flags, NULL);
4406 return get_named_section (decl, sname, reloc);
4409 return default_elf_select_section (decl, reloc, align);
4412 /* Build up a unique section name, expressed as a
4413 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4414 RELOC indicates whether the initial value of EXP requires
4415 link-time relocations. */
4417 static void ATTRIBUTE_UNUSED
4418 x86_64_elf_unique_section (tree decl, int reloc)
4420 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4421 && ix86_in_large_data_p (decl))
4423 const char *prefix = NULL;
4424 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4425 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4427 switch (categorize_decl_for_section (decl, reloc))
4429 case SECCAT_DATA:
4430 case SECCAT_DATA_REL:
4431 case SECCAT_DATA_REL_LOCAL:
4432 case SECCAT_DATA_REL_RO:
4433 case SECCAT_DATA_REL_RO_LOCAL:
4434 prefix = one_only ? ".ld" : ".ldata";
4435 break;
4436 case SECCAT_BSS:
4437 prefix = one_only ? ".lb" : ".lbss";
4438 break;
4439 case SECCAT_RODATA:
4440 case SECCAT_RODATA_MERGE_STR:
4441 case SECCAT_RODATA_MERGE_STR_INIT:
4442 case SECCAT_RODATA_MERGE_CONST:
4443 prefix = one_only ? ".lr" : ".lrodata";
4444 break;
4445 case SECCAT_SRODATA:
4446 case SECCAT_SDATA:
4447 case SECCAT_SBSS:
4448 gcc_unreachable ();
4449 case SECCAT_TEXT:
4450 case SECCAT_TDATA:
4451 case SECCAT_TBSS:
4452 /* We don't split these for medium model. Place them into
4453 default sections and hope for best. */
4454 break;
4456 if (prefix)
4458 const char *name, *linkonce;
4459 char *string;
4461 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4462 name = targetm.strip_name_encoding (name);
4464 /* If we're using one_only, then there needs to be a .gnu.linkonce
4465 prefix to the section name. */
4466 linkonce = one_only ? ".gnu.linkonce" : "";
4468 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4470 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4471 return;
4474 default_unique_section (decl, reloc);
4477 #ifdef COMMON_ASM_OP
4478 /* This says how to output assembler code to declare an
4479 uninitialized external linkage data object.
4481 For medium model x86-64 we need to use .largecomm opcode for
4482 large objects. */
4483 void
4484 x86_elf_aligned_common (FILE *file,
4485 const char *name, unsigned HOST_WIDE_INT size,
4486 int align)
4488 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4489 && size > (unsigned int)ix86_section_threshold)
4490 fputs (".largecomm\t", file);
4491 else
4492 fputs (COMMON_ASM_OP, file);
4493 assemble_name (file, name);
4494 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4495 size, align / BITS_PER_UNIT);
4497 #endif
4499 /* Utility function for targets to use in implementing
4500 ASM_OUTPUT_ALIGNED_BSS. */
4502 void
4503 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4504 const char *name, unsigned HOST_WIDE_INT size,
4505 int align)
4507 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4508 && size > (unsigned int)ix86_section_threshold)
4509 switch_to_section (get_named_section (decl, ".lbss", 0));
4510 else
4511 switch_to_section (bss_section);
4512 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4513 #ifdef ASM_DECLARE_OBJECT_NAME
4514 last_assemble_variable_decl = decl;
4515 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4516 #else
4517 /* Standard thing is just output label for the object. */
4518 ASM_OUTPUT_LABEL (file, name);
4519 #endif /* ASM_DECLARE_OBJECT_NAME */
4520 ASM_OUTPUT_SKIP (file, size ? size : 1);
4523 static void
4524 ix86_option_optimization (int level, int size ATTRIBUTE_UNUSED)
4526 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4527 make the problem with not enough registers even worse. */
4528 #ifdef INSN_SCHEDULING
4529 if (level > 1)
4530 flag_schedule_insns = 0;
4531 #endif
4533 if (TARGET_MACHO)
4534 /* The Darwin libraries never set errno, so we might as well
4535 avoid calling them when that's the only reason we would. */
4536 flag_errno_math = 0;
4538 /* The default values of these switches depend on the TARGET_64BIT
4539 that is not known at this moment. Mark these values with 2 and
4540 let user the to override these. In case there is no command line
4541 option specifying them, we will set the defaults in
4542 ix86_option_override_internal. */
4543 if (optimize >= 1)
4544 flag_omit_frame_pointer = 2;
4546 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4547 if (level > 1)
4548 flag_zee = 2;
4550 flag_pcc_struct_return = 2;
4551 flag_asynchronous_unwind_tables = 2;
4552 flag_vect_cost_model = 1;
4553 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4554 SUBTARGET_OPTIMIZATION_OPTIONS;
4555 #endif
4558 /* Decide whether we must probe the stack before any space allocation
4559 on this target. It's essentially TARGET_STACK_PROBE except when
4560 -fstack-check causes the stack to be already probed differently. */
4562 bool
4563 ix86_target_stack_probe (void)
4565 /* Do not probe the stack twice if static stack checking is enabled. */
4566 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4567 return false;
4569 return TARGET_STACK_PROBE;
4572 /* Decide whether we can make a sibling call to a function. DECL is the
4573 declaration of the function being targeted by the call and EXP is the
4574 CALL_EXPR representing the call. */
4576 static bool
4577 ix86_function_ok_for_sibcall (tree decl, tree exp)
4579 tree type, decl_or_type;
4580 rtx a, b;
4582 /* If we are generating position-independent code, we cannot sibcall
4583 optimize any indirect call, or a direct call to a global function,
4584 as the PLT requires %ebx be live. */
4585 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4586 return false;
4588 /* If we need to align the outgoing stack, then sibcalling would
4589 unalign the stack, which may break the called function. */
4590 if (ix86_minimum_incoming_stack_boundary (true)
4591 < PREFERRED_STACK_BOUNDARY)
4592 return false;
4594 if (decl)
4596 decl_or_type = decl;
4597 type = TREE_TYPE (decl);
4599 else
4601 /* We're looking at the CALL_EXPR, we need the type of the function. */
4602 type = CALL_EXPR_FN (exp); /* pointer expression */
4603 type = TREE_TYPE (type); /* pointer type */
4604 type = TREE_TYPE (type); /* function type */
4605 decl_or_type = type;
4608 /* Check that the return value locations are the same. Like
4609 if we are returning floats on the 80387 register stack, we cannot
4610 make a sibcall from a function that doesn't return a float to a
4611 function that does or, conversely, from a function that does return
4612 a float to a function that doesn't; the necessary stack adjustment
4613 would not be executed. This is also the place we notice
4614 differences in the return value ABI. Note that it is ok for one
4615 of the functions to have void return type as long as the return
4616 value of the other is passed in a register. */
4617 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4618 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4619 cfun->decl, false);
4620 if (STACK_REG_P (a) || STACK_REG_P (b))
4622 if (!rtx_equal_p (a, b))
4623 return false;
4625 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4627 else if (!rtx_equal_p (a, b))
4628 return false;
4630 if (TARGET_64BIT)
4632 /* The SYSV ABI has more call-clobbered registers;
4633 disallow sibcalls from MS to SYSV. */
4634 if (cfun->machine->call_abi == MS_ABI
4635 && ix86_function_type_abi (type) == SYSV_ABI)
4636 return false;
4638 else
4640 /* If this call is indirect, we'll need to be able to use a
4641 call-clobbered register for the address of the target function.
4642 Make sure that all such registers are not used for passing
4643 parameters. Note that DLLIMPORT functions are indirect. */
4644 if (!decl
4645 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4647 if (ix86_function_regparm (type, NULL) >= 3)
4649 /* ??? Need to count the actual number of registers to be used,
4650 not the possible number of registers. Fix later. */
4651 return false;
4656 /* Otherwise okay. That also includes certain types of indirect calls. */
4657 return true;
4660 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4661 and "sseregparm" calling convention attributes;
4662 arguments as in struct attribute_spec.handler. */
4664 static tree
4665 ix86_handle_cconv_attribute (tree *node, tree name,
4666 tree args,
4667 int flags ATTRIBUTE_UNUSED,
4668 bool *no_add_attrs)
4670 if (TREE_CODE (*node) != FUNCTION_TYPE
4671 && TREE_CODE (*node) != METHOD_TYPE
4672 && TREE_CODE (*node) != FIELD_DECL
4673 && TREE_CODE (*node) != TYPE_DECL)
4675 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4676 name);
4677 *no_add_attrs = true;
4678 return NULL_TREE;
4681 /* Can combine regparm with all attributes but fastcall. */
4682 if (is_attribute_p ("regparm", name))
4684 tree cst;
4686 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4688 error ("fastcall and regparm attributes are not compatible");
4691 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4693 error ("regparam and thiscall attributes are not compatible");
4696 cst = TREE_VALUE (args);
4697 if (TREE_CODE (cst) != INTEGER_CST)
4699 warning (OPT_Wattributes,
4700 "%qE attribute requires an integer constant argument",
4701 name);
4702 *no_add_attrs = true;
4704 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4706 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4707 name, REGPARM_MAX);
4708 *no_add_attrs = true;
4711 return NULL_TREE;
4714 if (TARGET_64BIT)
4716 /* Do not warn when emulating the MS ABI. */
4717 if ((TREE_CODE (*node) != FUNCTION_TYPE
4718 && TREE_CODE (*node) != METHOD_TYPE)
4719 || ix86_function_type_abi (*node) != MS_ABI)
4720 warning (OPT_Wattributes, "%qE attribute ignored",
4721 name);
4722 *no_add_attrs = true;
4723 return NULL_TREE;
4726 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4727 if (is_attribute_p ("fastcall", name))
4729 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4731 error ("fastcall and cdecl attributes are not compatible");
4733 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4735 error ("fastcall and stdcall attributes are not compatible");
4737 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4739 error ("fastcall and regparm attributes are not compatible");
4741 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4743 error ("fastcall and thiscall attributes are not compatible");
4747 /* Can combine stdcall with fastcall (redundant), regparm and
4748 sseregparm. */
4749 else if (is_attribute_p ("stdcall", name))
4751 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4753 error ("stdcall and cdecl attributes are not compatible");
4755 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4757 error ("stdcall and fastcall attributes are not compatible");
4759 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4761 error ("stdcall and thiscall attributes are not compatible");
4765 /* Can combine cdecl with regparm and sseregparm. */
4766 else if (is_attribute_p ("cdecl", name))
4768 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4770 error ("stdcall and cdecl attributes are not compatible");
4772 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4774 error ("fastcall and cdecl attributes are not compatible");
4776 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4778 error ("cdecl and thiscall attributes are not compatible");
4781 else if (is_attribute_p ("thiscall", name))
4783 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4784 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4785 name);
4786 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4788 error ("stdcall and thiscall attributes are not compatible");
4790 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4792 error ("fastcall and thiscall attributes are not compatible");
4794 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4796 error ("cdecl and thiscall attributes are not compatible");
4800 /* Can combine sseregparm with all attributes. */
4802 return NULL_TREE;
4805 /* Return 0 if the attributes for two types are incompatible, 1 if they
4806 are compatible, and 2 if they are nearly compatible (which causes a
4807 warning to be generated). */
4809 static int
4810 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4812 /* Check for mismatch of non-default calling convention. */
4813 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4815 if (TREE_CODE (type1) != FUNCTION_TYPE
4816 && TREE_CODE (type1) != METHOD_TYPE)
4817 return 1;
4819 /* Check for mismatched fastcall/regparm types. */
4820 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4821 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4822 || (ix86_function_regparm (type1, NULL)
4823 != ix86_function_regparm (type2, NULL)))
4824 return 0;
4826 /* Check for mismatched sseregparm types. */
4827 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4828 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4829 return 0;
4831 /* Check for mismatched thiscall types. */
4832 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4833 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4834 return 0;
4836 /* Check for mismatched return types (cdecl vs stdcall). */
4837 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4838 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4839 return 0;
4841 return 1;
4844 /* Return the regparm value for a function with the indicated TYPE and DECL.
4845 DECL may be NULL when calling function indirectly
4846 or considering a libcall. */
4848 static int
4849 ix86_function_regparm (const_tree type, const_tree decl)
4851 tree attr;
4852 int regparm;
4854 if (TARGET_64BIT)
4855 return (ix86_function_type_abi (type) == SYSV_ABI
4856 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4858 regparm = ix86_regparm;
4859 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4860 if (attr)
4862 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4863 return regparm;
4866 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4867 return 2;
4869 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4870 return 1;
4872 /* Use register calling convention for local functions when possible. */
4873 if (decl
4874 && TREE_CODE (decl) == FUNCTION_DECL
4875 && optimize
4876 && !(profile_flag && !flag_fentry))
4878 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4879 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4880 if (i && i->local)
4882 int local_regparm, globals = 0, regno;
4884 /* Make sure no regparm register is taken by a
4885 fixed register variable. */
4886 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4887 if (fixed_regs[local_regparm])
4888 break;
4890 /* We don't want to use regparm(3) for nested functions as
4891 these use a static chain pointer in the third argument. */
4892 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4893 local_regparm = 2;
4895 /* In 32-bit mode save a register for the split stack. */
4896 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
4897 local_regparm = 2;
4899 /* Each fixed register usage increases register pressure,
4900 so less registers should be used for argument passing.
4901 This functionality can be overriden by an explicit
4902 regparm value. */
4903 for (regno = 0; regno <= DI_REG; regno++)
4904 if (fixed_regs[regno])
4905 globals++;
4907 local_regparm
4908 = globals < local_regparm ? local_regparm - globals : 0;
4910 if (local_regparm > regparm)
4911 regparm = local_regparm;
4915 return regparm;
4918 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4919 DFmode (2) arguments in SSE registers for a function with the
4920 indicated TYPE and DECL. DECL may be NULL when calling function
4921 indirectly or considering a libcall. Otherwise return 0. */
4923 static int
4924 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4926 gcc_assert (!TARGET_64BIT);
4928 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4929 by the sseregparm attribute. */
4930 if (TARGET_SSEREGPARM
4931 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4933 if (!TARGET_SSE)
4935 if (warn)
4937 if (decl)
4938 error ("Calling %qD with attribute sseregparm without "
4939 "SSE/SSE2 enabled", decl);
4940 else
4941 error ("Calling %qT with attribute sseregparm without "
4942 "SSE/SSE2 enabled", type);
4944 return 0;
4947 return 2;
4950 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4951 (and DFmode for SSE2) arguments in SSE registers. */
4952 if (decl && TARGET_SSE_MATH && optimize
4953 && !(profile_flag && !flag_fentry))
4955 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4956 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4957 if (i && i->local)
4958 return TARGET_SSE2 ? 2 : 1;
4961 return 0;
4964 /* Return true if EAX is live at the start of the function. Used by
4965 ix86_expand_prologue to determine if we need special help before
4966 calling allocate_stack_worker. */
4968 static bool
4969 ix86_eax_live_at_start_p (void)
4971 /* Cheat. Don't bother working forward from ix86_function_regparm
4972 to the function type to whether an actual argument is located in
4973 eax. Instead just look at cfg info, which is still close enough
4974 to correct at this point. This gives false positives for broken
4975 functions that might use uninitialized data that happens to be
4976 allocated in eax, but who cares? */
4977 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4980 /* Value is the number of bytes of arguments automatically
4981 popped when returning from a subroutine call.
4982 FUNDECL is the declaration node of the function (as a tree),
4983 FUNTYPE is the data type of the function (as a tree),
4984 or for a library call it is an identifier node for the subroutine name.
4985 SIZE is the number of bytes of arguments passed on the stack.
4987 On the 80386, the RTD insn may be used to pop them if the number
4988 of args is fixed, but if the number is variable then the caller
4989 must pop them all. RTD can't be used for library calls now
4990 because the library is compiled with the Unix compiler.
4991 Use of RTD is a selectable option, since it is incompatible with
4992 standard Unix calling sequences. If the option is not selected,
4993 the caller must always pop the args.
4995 The attribute stdcall is equivalent to RTD on a per module basis. */
4997 static int
4998 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5000 int rtd;
5002 /* None of the 64-bit ABIs pop arguments. */
5003 if (TARGET_64BIT)
5004 return 0;
5006 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5008 /* Cdecl functions override -mrtd, and never pop the stack. */
5009 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5011 /* Stdcall and fastcall functions will pop the stack if not
5012 variable args. */
5013 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5014 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5015 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5016 rtd = 1;
5018 if (rtd && ! stdarg_p (funtype))
5019 return size;
5022 /* Lose any fake structure return argument if it is passed on the stack. */
5023 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5024 && !KEEP_AGGREGATE_RETURN_POINTER)
5026 int nregs = ix86_function_regparm (funtype, fundecl);
5027 if (nregs == 0)
5028 return GET_MODE_SIZE (Pmode);
5031 return 0;
5034 /* Argument support functions. */
5036 /* Return true when register may be used to pass function parameters. */
5037 bool
5038 ix86_function_arg_regno_p (int regno)
5040 int i;
5041 const int *parm_regs;
5043 if (!TARGET_64BIT)
5045 if (TARGET_MACHO)
5046 return (regno < REGPARM_MAX
5047 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5048 else
5049 return (regno < REGPARM_MAX
5050 || (TARGET_MMX && MMX_REGNO_P (regno)
5051 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5052 || (TARGET_SSE && SSE_REGNO_P (regno)
5053 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5056 if (TARGET_MACHO)
5058 if (SSE_REGNO_P (regno) && TARGET_SSE)
5059 return true;
5061 else
5063 if (TARGET_SSE && SSE_REGNO_P (regno)
5064 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5065 return true;
5068 /* TODO: The function should depend on current function ABI but
5069 builtins.c would need updating then. Therefore we use the
5070 default ABI. */
5072 /* RAX is used as hidden argument to va_arg functions. */
5073 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5074 return true;
5076 if (ix86_abi == MS_ABI)
5077 parm_regs = x86_64_ms_abi_int_parameter_registers;
5078 else
5079 parm_regs = x86_64_int_parameter_registers;
5080 for (i = 0; i < (ix86_abi == MS_ABI
5081 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5082 if (regno == parm_regs[i])
5083 return true;
5084 return false;
5087 /* Return if we do not know how to pass TYPE solely in registers. */
5089 static bool
5090 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5092 if (must_pass_in_stack_var_size_or_pad (mode, type))
5093 return true;
5095 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5096 The layout_type routine is crafty and tries to trick us into passing
5097 currently unsupported vector types on the stack by using TImode. */
5098 return (!TARGET_64BIT && mode == TImode
5099 && type && TREE_CODE (type) != VECTOR_TYPE);
5102 /* It returns the size, in bytes, of the area reserved for arguments passed
5103 in registers for the function represented by fndecl dependent to the used
5104 abi format. */
5106 ix86_reg_parm_stack_space (const_tree fndecl)
5108 enum calling_abi call_abi = SYSV_ABI;
5109 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5110 call_abi = ix86_function_abi (fndecl);
5111 else
5112 call_abi = ix86_function_type_abi (fndecl);
5113 if (call_abi == MS_ABI)
5114 return 32;
5115 return 0;
5118 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5119 call abi used. */
5120 enum calling_abi
5121 ix86_function_type_abi (const_tree fntype)
5123 if (TARGET_64BIT && fntype != NULL)
5125 enum calling_abi abi = ix86_abi;
5126 if (abi == SYSV_ABI)
5128 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5129 abi = MS_ABI;
5131 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5132 abi = SYSV_ABI;
5133 return abi;
5135 return ix86_abi;
5138 static bool
5139 ix86_function_ms_hook_prologue (const_tree fn)
5141 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5143 if (decl_function_context (fn) != NULL_TREE)
5144 error_at (DECL_SOURCE_LOCATION (fn),
5145 "ms_hook_prologue is not compatible with nested function");
5146 else
5147 return true;
5149 return false;
5152 static enum calling_abi
5153 ix86_function_abi (const_tree fndecl)
5155 if (! fndecl)
5156 return ix86_abi;
5157 return ix86_function_type_abi (TREE_TYPE (fndecl));
5160 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5161 call abi used. */
5162 enum calling_abi
5163 ix86_cfun_abi (void)
5165 if (! cfun || ! TARGET_64BIT)
5166 return ix86_abi;
5167 return cfun->machine->call_abi;
5170 /* Write the extra assembler code needed to declare a function properly. */
5172 void
5173 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5174 tree decl)
5176 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5178 if (is_ms_hook)
5180 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5181 unsigned int filler_cc = 0xcccccccc;
5183 for (i = 0; i < filler_count; i += 4)
5184 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5187 ASM_OUTPUT_LABEL (asm_out_file, fname);
5189 /* Output magic byte marker, if hot-patch attribute is set. */
5190 if (is_ms_hook)
5192 if (TARGET_64BIT)
5194 /* leaq [%rsp + 0], %rsp */
5195 asm_fprintf (asm_out_file, ASM_BYTE
5196 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5198 else
5200 /* movl.s %edi, %edi
5201 push %ebp
5202 movl.s %esp, %ebp */
5203 asm_fprintf (asm_out_file, ASM_BYTE
5204 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5209 /* regclass.c */
5210 extern void init_regs (void);
5212 /* Implementation of call abi switching target hook. Specific to FNDECL
5213 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5214 for more details. */
5215 void
5216 ix86_call_abi_override (const_tree fndecl)
5218 if (fndecl == NULL_TREE)
5219 cfun->machine->call_abi = ix86_abi;
5220 else
5221 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5224 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5225 re-initialization of init_regs each time we switch function context since
5226 this is needed only during RTL expansion. */
5227 static void
5228 ix86_maybe_switch_abi (void)
5230 if (TARGET_64BIT &&
5231 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5232 reinit_regs ();
5235 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5236 for a call to a function whose data type is FNTYPE.
5237 For a library call, FNTYPE is 0. */
5239 void
5240 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5241 tree fntype, /* tree ptr for function decl */
5242 rtx libname, /* SYMBOL_REF of library name or 0 */
5243 tree fndecl)
5245 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5246 memset (cum, 0, sizeof (*cum));
5248 if (fndecl)
5249 cum->call_abi = ix86_function_abi (fndecl);
5250 else
5251 cum->call_abi = ix86_function_type_abi (fntype);
5252 /* Set up the number of registers to use for passing arguments. */
5254 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5255 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5256 "or subtarget optimization implying it");
5257 cum->nregs = ix86_regparm;
5258 if (TARGET_64BIT)
5260 cum->nregs = (cum->call_abi == SYSV_ABI
5261 ? X86_64_REGPARM_MAX
5262 : X86_64_MS_REGPARM_MAX);
5264 if (TARGET_SSE)
5266 cum->sse_nregs = SSE_REGPARM_MAX;
5267 if (TARGET_64BIT)
5269 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5270 ? X86_64_SSE_REGPARM_MAX
5271 : X86_64_MS_SSE_REGPARM_MAX);
5274 if (TARGET_MMX)
5275 cum->mmx_nregs = MMX_REGPARM_MAX;
5276 cum->warn_avx = true;
5277 cum->warn_sse = true;
5278 cum->warn_mmx = true;
5280 /* Because type might mismatch in between caller and callee, we need to
5281 use actual type of function for local calls.
5282 FIXME: cgraph_analyze can be told to actually record if function uses
5283 va_start so for local functions maybe_vaarg can be made aggressive
5284 helping K&R code.
5285 FIXME: once typesytem is fixed, we won't need this code anymore. */
5286 if (i && i->local)
5287 fntype = TREE_TYPE (fndecl);
5288 cum->maybe_vaarg = (fntype
5289 ? (!prototype_p (fntype) || stdarg_p (fntype))
5290 : !libname);
5292 if (!TARGET_64BIT)
5294 /* If there are variable arguments, then we won't pass anything
5295 in registers in 32-bit mode. */
5296 if (stdarg_p (fntype))
5298 cum->nregs = 0;
5299 cum->sse_nregs = 0;
5300 cum->mmx_nregs = 0;
5301 cum->warn_avx = 0;
5302 cum->warn_sse = 0;
5303 cum->warn_mmx = 0;
5304 return;
5307 /* Use ecx and edx registers if function has fastcall attribute,
5308 else look for regparm information. */
5309 if (fntype)
5311 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5313 cum->nregs = 1;
5314 cum->fastcall = 1; /* Same first register as in fastcall. */
5316 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5318 cum->nregs = 2;
5319 cum->fastcall = 1;
5321 else
5322 cum->nregs = ix86_function_regparm (fntype, fndecl);
5325 /* Set up the number of SSE registers used for passing SFmode
5326 and DFmode arguments. Warn for mismatching ABI. */
5327 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5331 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5332 But in the case of vector types, it is some vector mode.
5334 When we have only some of our vector isa extensions enabled, then there
5335 are some modes for which vector_mode_supported_p is false. For these
5336 modes, the generic vector support in gcc will choose some non-vector mode
5337 in order to implement the type. By computing the natural mode, we'll
5338 select the proper ABI location for the operand and not depend on whatever
5339 the middle-end decides to do with these vector types.
5341 The midde-end can't deal with the vector types > 16 bytes. In this
5342 case, we return the original mode and warn ABI change if CUM isn't
5343 NULL. */
5345 static enum machine_mode
5346 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5348 enum machine_mode mode = TYPE_MODE (type);
5350 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5352 HOST_WIDE_INT size = int_size_in_bytes (type);
5353 if ((size == 8 || size == 16 || size == 32)
5354 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5355 && TYPE_VECTOR_SUBPARTS (type) > 1)
5357 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5359 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5360 mode = MIN_MODE_VECTOR_FLOAT;
5361 else
5362 mode = MIN_MODE_VECTOR_INT;
5364 /* Get the mode which has this inner mode and number of units. */
5365 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5366 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5367 && GET_MODE_INNER (mode) == innermode)
5369 if (size == 32 && !TARGET_AVX)
5371 static bool warnedavx;
5373 if (cum
5374 && !warnedavx
5375 && cum->warn_avx)
5377 warnedavx = true;
5378 warning (0, "AVX vector argument without AVX "
5379 "enabled changes the ABI");
5381 return TYPE_MODE (type);
5383 else
5384 return mode;
5387 gcc_unreachable ();
5391 return mode;
5394 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5395 this may not agree with the mode that the type system has chosen for the
5396 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5397 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5399 static rtx
5400 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5401 unsigned int regno)
5403 rtx tmp;
5405 if (orig_mode != BLKmode)
5406 tmp = gen_rtx_REG (orig_mode, regno);
5407 else
5409 tmp = gen_rtx_REG (mode, regno);
5410 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5411 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5414 return tmp;
5417 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5418 of this code is to classify each 8bytes of incoming argument by the register
5419 class and assign registers accordingly. */
5421 /* Return the union class of CLASS1 and CLASS2.
5422 See the x86-64 PS ABI for details. */
5424 static enum x86_64_reg_class
5425 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5427 /* Rule #1: If both classes are equal, this is the resulting class. */
5428 if (class1 == class2)
5429 return class1;
5431 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5432 the other class. */
5433 if (class1 == X86_64_NO_CLASS)
5434 return class2;
5435 if (class2 == X86_64_NO_CLASS)
5436 return class1;
5438 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5439 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5440 return X86_64_MEMORY_CLASS;
5442 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5443 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5444 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5445 return X86_64_INTEGERSI_CLASS;
5446 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5447 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5448 return X86_64_INTEGER_CLASS;
5450 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5451 MEMORY is used. */
5452 if (class1 == X86_64_X87_CLASS
5453 || class1 == X86_64_X87UP_CLASS
5454 || class1 == X86_64_COMPLEX_X87_CLASS
5455 || class2 == X86_64_X87_CLASS
5456 || class2 == X86_64_X87UP_CLASS
5457 || class2 == X86_64_COMPLEX_X87_CLASS)
5458 return X86_64_MEMORY_CLASS;
5460 /* Rule #6: Otherwise class SSE is used. */
5461 return X86_64_SSE_CLASS;
5464 /* Classify the argument of type TYPE and mode MODE.
5465 CLASSES will be filled by the register class used to pass each word
5466 of the operand. The number of words is returned. In case the parameter
5467 should be passed in memory, 0 is returned. As a special case for zero
5468 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5470 BIT_OFFSET is used internally for handling records and specifies offset
5471 of the offset in bits modulo 256 to avoid overflow cases.
5473 See the x86-64 PS ABI for details.
5476 static int
5477 classify_argument (enum machine_mode mode, const_tree type,
5478 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5480 HOST_WIDE_INT bytes =
5481 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5482 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5484 /* Variable sized entities are always passed/returned in memory. */
5485 if (bytes < 0)
5486 return 0;
5488 if (mode != VOIDmode
5489 && targetm.calls.must_pass_in_stack (mode, type))
5490 return 0;
5492 if (type && AGGREGATE_TYPE_P (type))
5494 int i;
5495 tree field;
5496 enum x86_64_reg_class subclasses[MAX_CLASSES];
5498 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5499 if (bytes > 32)
5500 return 0;
5502 for (i = 0; i < words; i++)
5503 classes[i] = X86_64_NO_CLASS;
5505 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5506 signalize memory class, so handle it as special case. */
5507 if (!words)
5509 classes[0] = X86_64_NO_CLASS;
5510 return 1;
5513 /* Classify each field of record and merge classes. */
5514 switch (TREE_CODE (type))
5516 case RECORD_TYPE:
5517 /* And now merge the fields of structure. */
5518 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5520 if (TREE_CODE (field) == FIELD_DECL)
5522 int num;
5524 if (TREE_TYPE (field) == error_mark_node)
5525 continue;
5527 /* Bitfields are always classified as integer. Handle them
5528 early, since later code would consider them to be
5529 misaligned integers. */
5530 if (DECL_BIT_FIELD (field))
5532 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5533 i < ((int_bit_position (field) + (bit_offset % 64))
5534 + tree_low_cst (DECL_SIZE (field), 0)
5535 + 63) / 8 / 8; i++)
5536 classes[i] =
5537 merge_classes (X86_64_INTEGER_CLASS,
5538 classes[i]);
5540 else
5542 int pos;
5544 type = TREE_TYPE (field);
5546 /* Flexible array member is ignored. */
5547 if (TYPE_MODE (type) == BLKmode
5548 && TREE_CODE (type) == ARRAY_TYPE
5549 && TYPE_SIZE (type) == NULL_TREE
5550 && TYPE_DOMAIN (type) != NULL_TREE
5551 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5552 == NULL_TREE))
5554 static bool warned;
5556 if (!warned && warn_psabi)
5558 warned = true;
5559 inform (input_location,
5560 "The ABI of passing struct with"
5561 " a flexible array member has"
5562 " changed in GCC 4.4");
5564 continue;
5566 num = classify_argument (TYPE_MODE (type), type,
5567 subclasses,
5568 (int_bit_position (field)
5569 + bit_offset) % 256);
5570 if (!num)
5571 return 0;
5572 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5573 for (i = 0; i < num && (i + pos) < words; i++)
5574 classes[i + pos] =
5575 merge_classes (subclasses[i], classes[i + pos]);
5579 break;
5581 case ARRAY_TYPE:
5582 /* Arrays are handled as small records. */
5584 int num;
5585 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5586 TREE_TYPE (type), subclasses, bit_offset);
5587 if (!num)
5588 return 0;
5590 /* The partial classes are now full classes. */
5591 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5592 subclasses[0] = X86_64_SSE_CLASS;
5593 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5594 && !((bit_offset % 64) == 0 && bytes == 4))
5595 subclasses[0] = X86_64_INTEGER_CLASS;
5597 for (i = 0; i < words; i++)
5598 classes[i] = subclasses[i % num];
5600 break;
5602 case UNION_TYPE:
5603 case QUAL_UNION_TYPE:
5604 /* Unions are similar to RECORD_TYPE but offset is always 0.
5606 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5608 if (TREE_CODE (field) == FIELD_DECL)
5610 int num;
5612 if (TREE_TYPE (field) == error_mark_node)
5613 continue;
5615 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5616 TREE_TYPE (field), subclasses,
5617 bit_offset);
5618 if (!num)
5619 return 0;
5620 for (i = 0; i < num; i++)
5621 classes[i] = merge_classes (subclasses[i], classes[i]);
5624 break;
5626 default:
5627 gcc_unreachable ();
5630 if (words > 2)
5632 /* When size > 16 bytes, if the first one isn't
5633 X86_64_SSE_CLASS or any other ones aren't
5634 X86_64_SSEUP_CLASS, everything should be passed in
5635 memory. */
5636 if (classes[0] != X86_64_SSE_CLASS)
5637 return 0;
5639 for (i = 1; i < words; i++)
5640 if (classes[i] != X86_64_SSEUP_CLASS)
5641 return 0;
5644 /* Final merger cleanup. */
5645 for (i = 0; i < words; i++)
5647 /* If one class is MEMORY, everything should be passed in
5648 memory. */
5649 if (classes[i] == X86_64_MEMORY_CLASS)
5650 return 0;
5652 /* The X86_64_SSEUP_CLASS should be always preceded by
5653 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5654 if (classes[i] == X86_64_SSEUP_CLASS
5655 && classes[i - 1] != X86_64_SSE_CLASS
5656 && classes[i - 1] != X86_64_SSEUP_CLASS)
5658 /* The first one should never be X86_64_SSEUP_CLASS. */
5659 gcc_assert (i != 0);
5660 classes[i] = X86_64_SSE_CLASS;
5663 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5664 everything should be passed in memory. */
5665 if (classes[i] == X86_64_X87UP_CLASS
5666 && (classes[i - 1] != X86_64_X87_CLASS))
5668 static bool warned;
5670 /* The first one should never be X86_64_X87UP_CLASS. */
5671 gcc_assert (i != 0);
5672 if (!warned && warn_psabi)
5674 warned = true;
5675 inform (input_location,
5676 "The ABI of passing union with long double"
5677 " has changed in GCC 4.4");
5679 return 0;
5682 return words;
5685 /* Compute alignment needed. We align all types to natural boundaries with
5686 exception of XFmode that is aligned to 64bits. */
5687 if (mode != VOIDmode && mode != BLKmode)
5689 int mode_alignment = GET_MODE_BITSIZE (mode);
5691 if (mode == XFmode)
5692 mode_alignment = 128;
5693 else if (mode == XCmode)
5694 mode_alignment = 256;
5695 if (COMPLEX_MODE_P (mode))
5696 mode_alignment /= 2;
5697 /* Misaligned fields are always returned in memory. */
5698 if (bit_offset % mode_alignment)
5699 return 0;
5702 /* for V1xx modes, just use the base mode */
5703 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5704 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5705 mode = GET_MODE_INNER (mode);
5707 /* Classification of atomic types. */
5708 switch (mode)
5710 case SDmode:
5711 case DDmode:
5712 classes[0] = X86_64_SSE_CLASS;
5713 return 1;
5714 case TDmode:
5715 classes[0] = X86_64_SSE_CLASS;
5716 classes[1] = X86_64_SSEUP_CLASS;
5717 return 2;
5718 case DImode:
5719 case SImode:
5720 case HImode:
5721 case QImode:
5722 case CSImode:
5723 case CHImode:
5724 case CQImode:
5726 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5728 if (size <= 32)
5730 classes[0] = X86_64_INTEGERSI_CLASS;
5731 return 1;
5733 else if (size <= 64)
5735 classes[0] = X86_64_INTEGER_CLASS;
5736 return 1;
5738 else if (size <= 64+32)
5740 classes[0] = X86_64_INTEGER_CLASS;
5741 classes[1] = X86_64_INTEGERSI_CLASS;
5742 return 2;
5744 else if (size <= 64+64)
5746 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5747 return 2;
5749 else
5750 gcc_unreachable ();
5752 case CDImode:
5753 case TImode:
5754 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5755 return 2;
5756 case COImode:
5757 case OImode:
5758 /* OImode shouldn't be used directly. */
5759 gcc_unreachable ();
5760 case CTImode:
5761 return 0;
5762 case SFmode:
5763 if (!(bit_offset % 64))
5764 classes[0] = X86_64_SSESF_CLASS;
5765 else
5766 classes[0] = X86_64_SSE_CLASS;
5767 return 1;
5768 case DFmode:
5769 classes[0] = X86_64_SSEDF_CLASS;
5770 return 1;
5771 case XFmode:
5772 classes[0] = X86_64_X87_CLASS;
5773 classes[1] = X86_64_X87UP_CLASS;
5774 return 2;
5775 case TFmode:
5776 classes[0] = X86_64_SSE_CLASS;
5777 classes[1] = X86_64_SSEUP_CLASS;
5778 return 2;
5779 case SCmode:
5780 classes[0] = X86_64_SSE_CLASS;
5781 if (!(bit_offset % 64))
5782 return 1;
5783 else
5785 static bool warned;
5787 if (!warned && warn_psabi)
5789 warned = true;
5790 inform (input_location,
5791 "The ABI of passing structure with complex float"
5792 " member has changed in GCC 4.4");
5794 classes[1] = X86_64_SSESF_CLASS;
5795 return 2;
5797 case DCmode:
5798 classes[0] = X86_64_SSEDF_CLASS;
5799 classes[1] = X86_64_SSEDF_CLASS;
5800 return 2;
5801 case XCmode:
5802 classes[0] = X86_64_COMPLEX_X87_CLASS;
5803 return 1;
5804 case TCmode:
5805 /* This modes is larger than 16 bytes. */
5806 return 0;
5807 case V8SFmode:
5808 case V8SImode:
5809 case V32QImode:
5810 case V16HImode:
5811 case V4DFmode:
5812 case V4DImode:
5813 classes[0] = X86_64_SSE_CLASS;
5814 classes[1] = X86_64_SSEUP_CLASS;
5815 classes[2] = X86_64_SSEUP_CLASS;
5816 classes[3] = X86_64_SSEUP_CLASS;
5817 return 4;
5818 case V4SFmode:
5819 case V4SImode:
5820 case V16QImode:
5821 case V8HImode:
5822 case V2DFmode:
5823 case V2DImode:
5824 classes[0] = X86_64_SSE_CLASS;
5825 classes[1] = X86_64_SSEUP_CLASS;
5826 return 2;
5827 case V1TImode:
5828 case V1DImode:
5829 case V2SFmode:
5830 case V2SImode:
5831 case V4HImode:
5832 case V8QImode:
5833 classes[0] = X86_64_SSE_CLASS;
5834 return 1;
5835 case BLKmode:
5836 case VOIDmode:
5837 return 0;
5838 default:
5839 gcc_assert (VECTOR_MODE_P (mode));
5841 if (bytes > 16)
5842 return 0;
5844 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5846 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5847 classes[0] = X86_64_INTEGERSI_CLASS;
5848 else
5849 classes[0] = X86_64_INTEGER_CLASS;
5850 classes[1] = X86_64_INTEGER_CLASS;
5851 return 1 + (bytes > 8);
5855 /* Examine the argument and return set number of register required in each
5856 class. Return 0 iff parameter should be passed in memory. */
5857 static int
5858 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5859 int *int_nregs, int *sse_nregs)
5861 enum x86_64_reg_class regclass[MAX_CLASSES];
5862 int n = classify_argument (mode, type, regclass, 0);
5864 *int_nregs = 0;
5865 *sse_nregs = 0;
5866 if (!n)
5867 return 0;
5868 for (n--; n >= 0; n--)
5869 switch (regclass[n])
5871 case X86_64_INTEGER_CLASS:
5872 case X86_64_INTEGERSI_CLASS:
5873 (*int_nregs)++;
5874 break;
5875 case X86_64_SSE_CLASS:
5876 case X86_64_SSESF_CLASS:
5877 case X86_64_SSEDF_CLASS:
5878 (*sse_nregs)++;
5879 break;
5880 case X86_64_NO_CLASS:
5881 case X86_64_SSEUP_CLASS:
5882 break;
5883 case X86_64_X87_CLASS:
5884 case X86_64_X87UP_CLASS:
5885 if (!in_return)
5886 return 0;
5887 break;
5888 case X86_64_COMPLEX_X87_CLASS:
5889 return in_return ? 2 : 0;
5890 case X86_64_MEMORY_CLASS:
5891 gcc_unreachable ();
5893 return 1;
5896 /* Construct container for the argument used by GCC interface. See
5897 FUNCTION_ARG for the detailed description. */
5899 static rtx
5900 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5901 const_tree type, int in_return, int nintregs, int nsseregs,
5902 const int *intreg, int sse_regno)
5904 /* The following variables hold the static issued_error state. */
5905 static bool issued_sse_arg_error;
5906 static bool issued_sse_ret_error;
5907 static bool issued_x87_ret_error;
5909 enum machine_mode tmpmode;
5910 int bytes =
5911 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5912 enum x86_64_reg_class regclass[MAX_CLASSES];
5913 int n;
5914 int i;
5915 int nexps = 0;
5916 int needed_sseregs, needed_intregs;
5917 rtx exp[MAX_CLASSES];
5918 rtx ret;
5920 n = classify_argument (mode, type, regclass, 0);
5921 if (!n)
5922 return NULL;
5923 if (!examine_argument (mode, type, in_return, &needed_intregs,
5924 &needed_sseregs))
5925 return NULL;
5926 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5927 return NULL;
5929 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5930 some less clueful developer tries to use floating-point anyway. */
5931 if (needed_sseregs && !TARGET_SSE)
5933 if (in_return)
5935 if (!issued_sse_ret_error)
5937 error ("SSE register return with SSE disabled");
5938 issued_sse_ret_error = true;
5941 else if (!issued_sse_arg_error)
5943 error ("SSE register argument with SSE disabled");
5944 issued_sse_arg_error = true;
5946 return NULL;
5949 /* Likewise, error if the ABI requires us to return values in the
5950 x87 registers and the user specified -mno-80387. */
5951 if (!TARGET_80387 && in_return)
5952 for (i = 0; i < n; i++)
5953 if (regclass[i] == X86_64_X87_CLASS
5954 || regclass[i] == X86_64_X87UP_CLASS
5955 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5957 if (!issued_x87_ret_error)
5959 error ("x87 register return with x87 disabled");
5960 issued_x87_ret_error = true;
5962 return NULL;
5965 /* First construct simple cases. Avoid SCmode, since we want to use
5966 single register to pass this type. */
5967 if (n == 1 && mode != SCmode)
5968 switch (regclass[0])
5970 case X86_64_INTEGER_CLASS:
5971 case X86_64_INTEGERSI_CLASS:
5972 return gen_rtx_REG (mode, intreg[0]);
5973 case X86_64_SSE_CLASS:
5974 case X86_64_SSESF_CLASS:
5975 case X86_64_SSEDF_CLASS:
5976 if (mode != BLKmode)
5977 return gen_reg_or_parallel (mode, orig_mode,
5978 SSE_REGNO (sse_regno));
5979 break;
5980 case X86_64_X87_CLASS:
5981 case X86_64_COMPLEX_X87_CLASS:
5982 return gen_rtx_REG (mode, FIRST_STACK_REG);
5983 case X86_64_NO_CLASS:
5984 /* Zero sized array, struct or class. */
5985 return NULL;
5986 default:
5987 gcc_unreachable ();
5989 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5990 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5991 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5992 if (n == 4
5993 && regclass[0] == X86_64_SSE_CLASS
5994 && regclass[1] == X86_64_SSEUP_CLASS
5995 && regclass[2] == X86_64_SSEUP_CLASS
5996 && regclass[3] == X86_64_SSEUP_CLASS
5997 && mode != BLKmode)
5998 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6000 if (n == 2
6001 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6002 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6003 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6004 && regclass[1] == X86_64_INTEGER_CLASS
6005 && (mode == CDImode || mode == TImode || mode == TFmode)
6006 && intreg[0] + 1 == intreg[1])
6007 return gen_rtx_REG (mode, intreg[0]);
6009 /* Otherwise figure out the entries of the PARALLEL. */
6010 for (i = 0; i < n; i++)
6012 int pos;
6014 switch (regclass[i])
6016 case X86_64_NO_CLASS:
6017 break;
6018 case X86_64_INTEGER_CLASS:
6019 case X86_64_INTEGERSI_CLASS:
6020 /* Merge TImodes on aligned occasions here too. */
6021 if (i * 8 + 8 > bytes)
6022 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6023 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6024 tmpmode = SImode;
6025 else
6026 tmpmode = DImode;
6027 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6028 if (tmpmode == BLKmode)
6029 tmpmode = DImode;
6030 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6031 gen_rtx_REG (tmpmode, *intreg),
6032 GEN_INT (i*8));
6033 intreg++;
6034 break;
6035 case X86_64_SSESF_CLASS:
6036 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6037 gen_rtx_REG (SFmode,
6038 SSE_REGNO (sse_regno)),
6039 GEN_INT (i*8));
6040 sse_regno++;
6041 break;
6042 case X86_64_SSEDF_CLASS:
6043 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6044 gen_rtx_REG (DFmode,
6045 SSE_REGNO (sse_regno)),
6046 GEN_INT (i*8));
6047 sse_regno++;
6048 break;
6049 case X86_64_SSE_CLASS:
6050 pos = i;
6051 switch (n)
6053 case 1:
6054 tmpmode = DImode;
6055 break;
6056 case 2:
6057 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6059 tmpmode = TImode;
6060 i++;
6062 else
6063 tmpmode = DImode;
6064 break;
6065 case 4:
6066 gcc_assert (i == 0
6067 && regclass[1] == X86_64_SSEUP_CLASS
6068 && regclass[2] == X86_64_SSEUP_CLASS
6069 && regclass[3] == X86_64_SSEUP_CLASS);
6070 tmpmode = OImode;
6071 i += 3;
6072 break;
6073 default:
6074 gcc_unreachable ();
6076 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6077 gen_rtx_REG (tmpmode,
6078 SSE_REGNO (sse_regno)),
6079 GEN_INT (pos*8));
6080 sse_regno++;
6081 break;
6082 default:
6083 gcc_unreachable ();
6087 /* Empty aligned struct, union or class. */
6088 if (nexps == 0)
6089 return NULL;
6091 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6092 for (i = 0; i < nexps; i++)
6093 XVECEXP (ret, 0, i) = exp [i];
6094 return ret;
6097 /* Update the data in CUM to advance over an argument of mode MODE
6098 and data type TYPE. (TYPE is null for libcalls where that information
6099 may not be available.) */
6101 static void
6102 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6103 const_tree type, HOST_WIDE_INT bytes,
6104 HOST_WIDE_INT words)
6106 switch (mode)
6108 default:
6109 break;
6111 case BLKmode:
6112 if (bytes < 0)
6113 break;
6114 /* FALLTHRU */
6116 case DImode:
6117 case SImode:
6118 case HImode:
6119 case QImode:
6120 cum->words += words;
6121 cum->nregs -= words;
6122 cum->regno += words;
6124 if (cum->nregs <= 0)
6126 cum->nregs = 0;
6127 cum->regno = 0;
6129 break;
6131 case OImode:
6132 /* OImode shouldn't be used directly. */
6133 gcc_unreachable ();
6135 case DFmode:
6136 if (cum->float_in_sse < 2)
6137 break;
6138 case SFmode:
6139 if (cum->float_in_sse < 1)
6140 break;
6141 /* FALLTHRU */
6143 case V8SFmode:
6144 case V8SImode:
6145 case V32QImode:
6146 case V16HImode:
6147 case V4DFmode:
6148 case V4DImode:
6149 case TImode:
6150 case V16QImode:
6151 case V8HImode:
6152 case V4SImode:
6153 case V2DImode:
6154 case V4SFmode:
6155 case V2DFmode:
6156 if (!type || !AGGREGATE_TYPE_P (type))
6158 cum->sse_words += words;
6159 cum->sse_nregs -= 1;
6160 cum->sse_regno += 1;
6161 if (cum->sse_nregs <= 0)
6163 cum->sse_nregs = 0;
6164 cum->sse_regno = 0;
6167 break;
6169 case V8QImode:
6170 case V4HImode:
6171 case V2SImode:
6172 case V2SFmode:
6173 case V1TImode:
6174 case V1DImode:
6175 if (!type || !AGGREGATE_TYPE_P (type))
6177 cum->mmx_words += words;
6178 cum->mmx_nregs -= 1;
6179 cum->mmx_regno += 1;
6180 if (cum->mmx_nregs <= 0)
6182 cum->mmx_nregs = 0;
6183 cum->mmx_regno = 0;
6186 break;
6190 static void
6191 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6192 const_tree type, HOST_WIDE_INT words, bool named)
6194 int int_nregs, sse_nregs;
6196 /* Unnamed 256bit vector mode parameters are passed on stack. */
6197 if (!named && VALID_AVX256_REG_MODE (mode))
6198 return;
6200 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6201 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6203 cum->nregs -= int_nregs;
6204 cum->sse_nregs -= sse_nregs;
6205 cum->regno += int_nregs;
6206 cum->sse_regno += sse_nregs;
6208 else
6210 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6211 cum->words = (cum->words + align - 1) & ~(align - 1);
6212 cum->words += words;
6216 static void
6217 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6218 HOST_WIDE_INT words)
6220 /* Otherwise, this should be passed indirect. */
6221 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6223 cum->words += words;
6224 if (cum->nregs > 0)
6226 cum->nregs -= 1;
6227 cum->regno += 1;
6231 /* Update the data in CUM to advance over an argument of mode MODE and
6232 data type TYPE. (TYPE is null for libcalls where that information
6233 may not be available.) */
6235 static void
6236 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6237 const_tree type, bool named)
6239 HOST_WIDE_INT bytes, words;
6241 if (mode == BLKmode)
6242 bytes = int_size_in_bytes (type);
6243 else
6244 bytes = GET_MODE_SIZE (mode);
6245 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6247 if (type)
6248 mode = type_natural_mode (type, NULL);
6250 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6251 function_arg_advance_ms_64 (cum, bytes, words);
6252 else if (TARGET_64BIT)
6253 function_arg_advance_64 (cum, mode, type, words, named);
6254 else
6255 function_arg_advance_32 (cum, mode, type, bytes, words);
6258 /* Define where to put the arguments to a function.
6259 Value is zero to push the argument on the stack,
6260 or a hard register in which to store the argument.
6262 MODE is the argument's machine mode.
6263 TYPE is the data type of the argument (as a tree).
6264 This is null for libcalls where that information may
6265 not be available.
6266 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6267 the preceding args and about the function being called.
6268 NAMED is nonzero if this argument is a named parameter
6269 (otherwise it is an extra parameter matching an ellipsis). */
6271 static rtx
6272 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6273 enum machine_mode orig_mode, const_tree type,
6274 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6276 static bool warnedsse, warnedmmx;
6278 /* Avoid the AL settings for the Unix64 ABI. */
6279 if (mode == VOIDmode)
6280 return constm1_rtx;
6282 switch (mode)
6284 default:
6285 break;
6287 case BLKmode:
6288 if (bytes < 0)
6289 break;
6290 /* FALLTHRU */
6291 case DImode:
6292 case SImode:
6293 case HImode:
6294 case QImode:
6295 if (words <= cum->nregs)
6297 int regno = cum->regno;
6299 /* Fastcall allocates the first two DWORD (SImode) or
6300 smaller arguments to ECX and EDX if it isn't an
6301 aggregate type . */
6302 if (cum->fastcall)
6304 if (mode == BLKmode
6305 || mode == DImode
6306 || (type && AGGREGATE_TYPE_P (type)))
6307 break;
6309 /* ECX not EAX is the first allocated register. */
6310 if (regno == AX_REG)
6311 regno = CX_REG;
6313 return gen_rtx_REG (mode, regno);
6315 break;
6317 case DFmode:
6318 if (cum->float_in_sse < 2)
6319 break;
6320 case SFmode:
6321 if (cum->float_in_sse < 1)
6322 break;
6323 /* FALLTHRU */
6324 case TImode:
6325 /* In 32bit, we pass TImode in xmm registers. */
6326 case V16QImode:
6327 case V8HImode:
6328 case V4SImode:
6329 case V2DImode:
6330 case V4SFmode:
6331 case V2DFmode:
6332 if (!type || !AGGREGATE_TYPE_P (type))
6334 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6336 warnedsse = true;
6337 warning (0, "SSE vector argument without SSE enabled "
6338 "changes the ABI");
6340 if (cum->sse_nregs)
6341 return gen_reg_or_parallel (mode, orig_mode,
6342 cum->sse_regno + FIRST_SSE_REG);
6344 break;
6346 case OImode:
6347 /* OImode shouldn't be used directly. */
6348 gcc_unreachable ();
6350 case V8SFmode:
6351 case V8SImode:
6352 case V32QImode:
6353 case V16HImode:
6354 case V4DFmode:
6355 case V4DImode:
6356 if (!type || !AGGREGATE_TYPE_P (type))
6358 if (cum->sse_nregs)
6359 return gen_reg_or_parallel (mode, orig_mode,
6360 cum->sse_regno + FIRST_SSE_REG);
6362 break;
6364 case V8QImode:
6365 case V4HImode:
6366 case V2SImode:
6367 case V2SFmode:
6368 case V1TImode:
6369 case V1DImode:
6370 if (!type || !AGGREGATE_TYPE_P (type))
6372 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6374 warnedmmx = true;
6375 warning (0, "MMX vector argument without MMX enabled "
6376 "changes the ABI");
6378 if (cum->mmx_nregs)
6379 return gen_reg_or_parallel (mode, orig_mode,
6380 cum->mmx_regno + FIRST_MMX_REG);
6382 break;
6385 return NULL_RTX;
6388 static rtx
6389 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6390 enum machine_mode orig_mode, const_tree type, bool named)
6392 /* Handle a hidden AL argument containing number of registers
6393 for varargs x86-64 functions. */
6394 if (mode == VOIDmode)
6395 return GEN_INT (cum->maybe_vaarg
6396 ? (cum->sse_nregs < 0
6397 ? X86_64_SSE_REGPARM_MAX
6398 : cum->sse_regno)
6399 : -1);
6401 switch (mode)
6403 default:
6404 break;
6406 case V8SFmode:
6407 case V8SImode:
6408 case V32QImode:
6409 case V16HImode:
6410 case V4DFmode:
6411 case V4DImode:
6412 /* Unnamed 256bit vector mode parameters are passed on stack. */
6413 if (!named)
6414 return NULL;
6415 break;
6418 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6419 cum->sse_nregs,
6420 &x86_64_int_parameter_registers [cum->regno],
6421 cum->sse_regno);
6424 static rtx
6425 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6426 enum machine_mode orig_mode, bool named,
6427 HOST_WIDE_INT bytes)
6429 unsigned int regno;
6431 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6432 We use value of -2 to specify that current function call is MSABI. */
6433 if (mode == VOIDmode)
6434 return GEN_INT (-2);
6436 /* If we've run out of registers, it goes on the stack. */
6437 if (cum->nregs == 0)
6438 return NULL_RTX;
6440 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6442 /* Only floating point modes are passed in anything but integer regs. */
6443 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6445 if (named)
6446 regno = cum->regno + FIRST_SSE_REG;
6447 else
6449 rtx t1, t2;
6451 /* Unnamed floating parameters are passed in both the
6452 SSE and integer registers. */
6453 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6454 t2 = gen_rtx_REG (mode, regno);
6455 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6456 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6457 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6460 /* Handle aggregated types passed in register. */
6461 if (orig_mode == BLKmode)
6463 if (bytes > 0 && bytes <= 8)
6464 mode = (bytes > 4 ? DImode : SImode);
6465 if (mode == BLKmode)
6466 mode = DImode;
6469 return gen_reg_or_parallel (mode, orig_mode, regno);
6472 /* Return where to put the arguments to a function.
6473 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6475 MODE is the argument's machine mode. TYPE is the data type of the
6476 argument. It is null for libcalls where that information may not be
6477 available. CUM gives information about the preceding args and about
6478 the function being called. NAMED is nonzero if this argument is a
6479 named parameter (otherwise it is an extra parameter matching an
6480 ellipsis). */
6482 static rtx
6483 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6484 const_tree type, bool named)
6486 enum machine_mode mode = omode;
6487 HOST_WIDE_INT bytes, words;
6489 if (mode == BLKmode)
6490 bytes = int_size_in_bytes (type);
6491 else
6492 bytes = GET_MODE_SIZE (mode);
6493 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6495 /* To simplify the code below, represent vector types with a vector mode
6496 even if MMX/SSE are not active. */
6497 if (type && TREE_CODE (type) == VECTOR_TYPE)
6498 mode = type_natural_mode (type, cum);
6500 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6501 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6502 else if (TARGET_64BIT)
6503 return function_arg_64 (cum, mode, omode, type, named);
6504 else
6505 return function_arg_32 (cum, mode, omode, type, bytes, words);
6508 /* A C expression that indicates when an argument must be passed by
6509 reference. If nonzero for an argument, a copy of that argument is
6510 made in memory and a pointer to the argument is passed instead of
6511 the argument itself. The pointer is passed in whatever way is
6512 appropriate for passing a pointer to that type. */
6514 static bool
6515 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6516 enum machine_mode mode ATTRIBUTE_UNUSED,
6517 const_tree type, bool named ATTRIBUTE_UNUSED)
6519 /* See Windows x64 Software Convention. */
6520 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6522 int msize = (int) GET_MODE_SIZE (mode);
6523 if (type)
6525 /* Arrays are passed by reference. */
6526 if (TREE_CODE (type) == ARRAY_TYPE)
6527 return true;
6529 if (AGGREGATE_TYPE_P (type))
6531 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6532 are passed by reference. */
6533 msize = int_size_in_bytes (type);
6537 /* __m128 is passed by reference. */
6538 switch (msize) {
6539 case 1: case 2: case 4: case 8:
6540 break;
6541 default:
6542 return true;
6545 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6546 return 1;
6548 return 0;
6551 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6552 ABI. */
6553 static bool
6554 contains_aligned_value_p (const_tree type)
6556 enum machine_mode mode = TYPE_MODE (type);
6557 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6558 || mode == TDmode
6559 || mode == TFmode
6560 || mode == TCmode)
6561 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6562 return true;
6563 if (TYPE_ALIGN (type) < 128)
6564 return false;
6566 if (AGGREGATE_TYPE_P (type))
6568 /* Walk the aggregates recursively. */
6569 switch (TREE_CODE (type))
6571 case RECORD_TYPE:
6572 case UNION_TYPE:
6573 case QUAL_UNION_TYPE:
6575 tree field;
6577 /* Walk all the structure fields. */
6578 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6580 if (TREE_CODE (field) == FIELD_DECL
6581 && contains_aligned_value_p (TREE_TYPE (field)))
6582 return true;
6584 break;
6587 case ARRAY_TYPE:
6588 /* Just for use if some languages passes arrays by value. */
6589 if (contains_aligned_value_p (TREE_TYPE (type)))
6590 return true;
6591 break;
6593 default:
6594 gcc_unreachable ();
6597 return false;
6600 /* Gives the alignment boundary, in bits, of an argument with the
6601 specified mode and type. */
6604 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6606 int align;
6607 if (type)
6609 /* Since the main variant type is used for call, we convert it to
6610 the main variant type. */
6611 type = TYPE_MAIN_VARIANT (type);
6612 align = TYPE_ALIGN (type);
6614 else
6615 align = GET_MODE_ALIGNMENT (mode);
6616 if (align < PARM_BOUNDARY)
6617 align = PARM_BOUNDARY;
6618 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6619 natural boundaries. */
6620 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6622 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6623 make an exception for SSE modes since these require 128bit
6624 alignment.
6626 The handling here differs from field_alignment. ICC aligns MMX
6627 arguments to 4 byte boundaries, while structure fields are aligned
6628 to 8 byte boundaries. */
6629 if (!type)
6631 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6632 align = PARM_BOUNDARY;
6634 else
6636 if (!contains_aligned_value_p (type))
6637 align = PARM_BOUNDARY;
6640 if (align > BIGGEST_ALIGNMENT)
6641 align = BIGGEST_ALIGNMENT;
6642 return align;
6645 /* Return true if N is a possible register number of function value. */
6647 static bool
6648 ix86_function_value_regno_p (const unsigned int regno)
6650 switch (regno)
6652 case 0:
6653 return true;
6655 case FIRST_FLOAT_REG:
6656 /* TODO: The function should depend on current function ABI but
6657 builtins.c would need updating then. Therefore we use the
6658 default ABI. */
6659 if (TARGET_64BIT && ix86_abi == MS_ABI)
6660 return false;
6661 return TARGET_FLOAT_RETURNS_IN_80387;
6663 case FIRST_SSE_REG:
6664 return TARGET_SSE;
6666 case FIRST_MMX_REG:
6667 if (TARGET_MACHO || TARGET_64BIT)
6668 return false;
6669 return TARGET_MMX;
6672 return false;
6675 /* Define how to find the value returned by a function.
6676 VALTYPE is the data type of the value (as a tree).
6677 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6678 otherwise, FUNC is 0. */
6680 static rtx
6681 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6682 const_tree fntype, const_tree fn)
6684 unsigned int regno;
6686 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6687 we normally prevent this case when mmx is not available. However
6688 some ABIs may require the result to be returned like DImode. */
6689 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6690 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6692 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6693 we prevent this case when sse is not available. However some ABIs
6694 may require the result to be returned like integer TImode. */
6695 else if (mode == TImode
6696 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6697 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6699 /* 32-byte vector modes in %ymm0. */
6700 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6701 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6703 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6704 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6705 regno = FIRST_FLOAT_REG;
6706 else
6707 /* Most things go in %eax. */
6708 regno = AX_REG;
6710 /* Override FP return register with %xmm0 for local functions when
6711 SSE math is enabled or for functions with sseregparm attribute. */
6712 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6714 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6715 if ((sse_level >= 1 && mode == SFmode)
6716 || (sse_level == 2 && mode == DFmode))
6717 regno = FIRST_SSE_REG;
6720 /* OImode shouldn't be used directly. */
6721 gcc_assert (mode != OImode);
6723 return gen_rtx_REG (orig_mode, regno);
6726 static rtx
6727 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6728 const_tree valtype)
6730 rtx ret;
6732 /* Handle libcalls, which don't provide a type node. */
6733 if (valtype == NULL)
6735 switch (mode)
6737 case SFmode:
6738 case SCmode:
6739 case DFmode:
6740 case DCmode:
6741 case TFmode:
6742 case SDmode:
6743 case DDmode:
6744 case TDmode:
6745 return gen_rtx_REG (mode, FIRST_SSE_REG);
6746 case XFmode:
6747 case XCmode:
6748 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6749 case TCmode:
6750 return NULL;
6751 default:
6752 return gen_rtx_REG (mode, AX_REG);
6756 ret = construct_container (mode, orig_mode, valtype, 1,
6757 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6758 x86_64_int_return_registers, 0);
6760 /* For zero sized structures, construct_container returns NULL, but we
6761 need to keep rest of compiler happy by returning meaningful value. */
6762 if (!ret)
6763 ret = gen_rtx_REG (orig_mode, AX_REG);
6765 return ret;
6768 static rtx
6769 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6771 unsigned int regno = AX_REG;
6773 if (TARGET_SSE)
6775 switch (GET_MODE_SIZE (mode))
6777 case 16:
6778 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6779 && !COMPLEX_MODE_P (mode))
6780 regno = FIRST_SSE_REG;
6781 break;
6782 case 8:
6783 case 4:
6784 if (mode == SFmode || mode == DFmode)
6785 regno = FIRST_SSE_REG;
6786 break;
6787 default:
6788 break;
6791 return gen_rtx_REG (orig_mode, regno);
6794 static rtx
6795 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6796 enum machine_mode orig_mode, enum machine_mode mode)
6798 const_tree fn, fntype;
6800 fn = NULL_TREE;
6801 if (fntype_or_decl && DECL_P (fntype_or_decl))
6802 fn = fntype_or_decl;
6803 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6805 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6806 return function_value_ms_64 (orig_mode, mode);
6807 else if (TARGET_64BIT)
6808 return function_value_64 (orig_mode, mode, valtype);
6809 else
6810 return function_value_32 (orig_mode, mode, fntype, fn);
6813 static rtx
6814 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6815 bool outgoing ATTRIBUTE_UNUSED)
6817 enum machine_mode mode, orig_mode;
6819 orig_mode = TYPE_MODE (valtype);
6820 mode = type_natural_mode (valtype, NULL);
6821 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6825 ix86_libcall_value (enum machine_mode mode)
6827 return ix86_function_value_1 (NULL, NULL, mode, mode);
6830 /* Return true iff type is returned in memory. */
6832 static bool ATTRIBUTE_UNUSED
6833 return_in_memory_32 (const_tree type, enum machine_mode mode)
6835 HOST_WIDE_INT size;
6837 if (mode == BLKmode)
6838 return true;
6840 size = int_size_in_bytes (type);
6842 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6843 return false;
6845 if (VECTOR_MODE_P (mode) || mode == TImode)
6847 /* User-created vectors small enough to fit in EAX. */
6848 if (size < 8)
6849 return false;
6851 /* MMX/3dNow values are returned in MM0,
6852 except when it doesn't exits. */
6853 if (size == 8)
6854 return !TARGET_MMX;
6856 /* SSE values are returned in XMM0, except when it doesn't exist. */
6857 if (size == 16)
6858 return !TARGET_SSE;
6860 /* AVX values are returned in YMM0, except when it doesn't exist. */
6861 if (size == 32)
6862 return !TARGET_AVX;
6865 if (mode == XFmode)
6866 return false;
6868 if (size > 12)
6869 return true;
6871 /* OImode shouldn't be used directly. */
6872 gcc_assert (mode != OImode);
6874 return false;
6877 static bool ATTRIBUTE_UNUSED
6878 return_in_memory_64 (const_tree type, enum machine_mode mode)
6880 int needed_intregs, needed_sseregs;
6881 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6884 static bool ATTRIBUTE_UNUSED
6885 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6887 HOST_WIDE_INT size = int_size_in_bytes (type);
6889 /* __m128 is returned in xmm0. */
6890 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6891 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6892 return false;
6894 /* Otherwise, the size must be exactly in [1248]. */
6895 return size != 1 && size != 2 && size != 4 && size != 8;
6898 static bool
6899 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6901 #ifdef SUBTARGET_RETURN_IN_MEMORY
6902 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6903 #else
6904 const enum machine_mode mode = type_natural_mode (type, NULL);
6906 if (TARGET_64BIT)
6908 if (ix86_function_type_abi (fntype) == MS_ABI)
6909 return return_in_memory_ms_64 (type, mode);
6910 else
6911 return return_in_memory_64 (type, mode);
6913 else
6914 return return_in_memory_32 (type, mode);
6915 #endif
6918 /* Return false iff TYPE is returned in memory. This version is used
6919 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6920 but differs notably in that when MMX is available, 8-byte vectors
6921 are returned in memory, rather than in MMX registers. */
6923 bool
6924 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6926 int size;
6927 enum machine_mode mode = type_natural_mode (type, NULL);
6929 if (TARGET_64BIT)
6930 return return_in_memory_64 (type, mode);
6932 if (mode == BLKmode)
6933 return 1;
6935 size = int_size_in_bytes (type);
6937 if (VECTOR_MODE_P (mode))
6939 /* Return in memory only if MMX registers *are* available. This
6940 seems backwards, but it is consistent with the existing
6941 Solaris x86 ABI. */
6942 if (size == 8)
6943 return TARGET_MMX;
6944 if (size == 16)
6945 return !TARGET_SSE;
6947 else if (mode == TImode)
6948 return !TARGET_SSE;
6949 else if (mode == XFmode)
6950 return 0;
6952 return size > 12;
6955 /* When returning SSE vector types, we have a choice of either
6956 (1) being abi incompatible with a -march switch, or
6957 (2) generating an error.
6958 Given no good solution, I think the safest thing is one warning.
6959 The user won't be able to use -Werror, but....
6961 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6962 called in response to actually generating a caller or callee that
6963 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6964 via aggregate_value_p for general type probing from tree-ssa. */
6966 static rtx
6967 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6969 static bool warnedsse, warnedmmx;
6971 if (!TARGET_64BIT && type)
6973 /* Look at the return type of the function, not the function type. */
6974 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6976 if (!TARGET_SSE && !warnedsse)
6978 if (mode == TImode
6979 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6981 warnedsse = true;
6982 warning (0, "SSE vector return without SSE enabled "
6983 "changes the ABI");
6987 if (!TARGET_MMX && !warnedmmx)
6989 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6991 warnedmmx = true;
6992 warning (0, "MMX vector return without MMX enabled "
6993 "changes the ABI");
6998 return NULL;
7002 /* Create the va_list data type. */
7004 /* Returns the calling convention specific va_list date type.
7005 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7007 static tree
7008 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7010 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7012 /* For i386 we use plain pointer to argument area. */
7013 if (!TARGET_64BIT || abi == MS_ABI)
7014 return build_pointer_type (char_type_node);
7016 record = lang_hooks.types.make_type (RECORD_TYPE);
7017 type_decl = build_decl (BUILTINS_LOCATION,
7018 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7020 f_gpr = build_decl (BUILTINS_LOCATION,
7021 FIELD_DECL, get_identifier ("gp_offset"),
7022 unsigned_type_node);
7023 f_fpr = build_decl (BUILTINS_LOCATION,
7024 FIELD_DECL, get_identifier ("fp_offset"),
7025 unsigned_type_node);
7026 f_ovf = build_decl (BUILTINS_LOCATION,
7027 FIELD_DECL, get_identifier ("overflow_arg_area"),
7028 ptr_type_node);
7029 f_sav = build_decl (BUILTINS_LOCATION,
7030 FIELD_DECL, get_identifier ("reg_save_area"),
7031 ptr_type_node);
7033 va_list_gpr_counter_field = f_gpr;
7034 va_list_fpr_counter_field = f_fpr;
7036 DECL_FIELD_CONTEXT (f_gpr) = record;
7037 DECL_FIELD_CONTEXT (f_fpr) = record;
7038 DECL_FIELD_CONTEXT (f_ovf) = record;
7039 DECL_FIELD_CONTEXT (f_sav) = record;
7041 TREE_CHAIN (record) = type_decl;
7042 TYPE_NAME (record) = type_decl;
7043 TYPE_FIELDS (record) = f_gpr;
7044 DECL_CHAIN (f_gpr) = f_fpr;
7045 DECL_CHAIN (f_fpr) = f_ovf;
7046 DECL_CHAIN (f_ovf) = f_sav;
7048 layout_type (record);
7050 /* The correct type is an array type of one element. */
7051 return build_array_type (record, build_index_type (size_zero_node));
7054 /* Setup the builtin va_list data type and for 64-bit the additional
7055 calling convention specific va_list data types. */
7057 static tree
7058 ix86_build_builtin_va_list (void)
7060 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7062 /* Initialize abi specific va_list builtin types. */
7063 if (TARGET_64BIT)
7065 tree t;
7066 if (ix86_abi == MS_ABI)
7068 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7069 if (TREE_CODE (t) != RECORD_TYPE)
7070 t = build_variant_type_copy (t);
7071 sysv_va_list_type_node = t;
7073 else
7075 t = ret;
7076 if (TREE_CODE (t) != RECORD_TYPE)
7077 t = build_variant_type_copy (t);
7078 sysv_va_list_type_node = t;
7080 if (ix86_abi != MS_ABI)
7082 t = ix86_build_builtin_va_list_abi (MS_ABI);
7083 if (TREE_CODE (t) != RECORD_TYPE)
7084 t = build_variant_type_copy (t);
7085 ms_va_list_type_node = t;
7087 else
7089 t = ret;
7090 if (TREE_CODE (t) != RECORD_TYPE)
7091 t = build_variant_type_copy (t);
7092 ms_va_list_type_node = t;
7096 return ret;
7099 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7101 static void
7102 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7104 rtx save_area, mem;
7105 alias_set_type set;
7106 int i, max;
7108 /* GPR size of varargs save area. */
7109 if (cfun->va_list_gpr_size)
7110 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7111 else
7112 ix86_varargs_gpr_size = 0;
7114 /* FPR size of varargs save area. We don't need it if we don't pass
7115 anything in SSE registers. */
7116 if (TARGET_SSE && cfun->va_list_fpr_size)
7117 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7118 else
7119 ix86_varargs_fpr_size = 0;
7121 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7122 return;
7124 save_area = frame_pointer_rtx;
7125 set = get_varargs_alias_set ();
7127 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7128 if (max > X86_64_REGPARM_MAX)
7129 max = X86_64_REGPARM_MAX;
7131 for (i = cum->regno; i < max; i++)
7133 mem = gen_rtx_MEM (Pmode,
7134 plus_constant (save_area, i * UNITS_PER_WORD));
7135 MEM_NOTRAP_P (mem) = 1;
7136 set_mem_alias_set (mem, set);
7137 emit_move_insn (mem, gen_rtx_REG (Pmode,
7138 x86_64_int_parameter_registers[i]));
7141 if (ix86_varargs_fpr_size)
7143 enum machine_mode smode;
7144 rtx label, test;
7146 /* Now emit code to save SSE registers. The AX parameter contains number
7147 of SSE parameter registers used to call this function, though all we
7148 actually check here is the zero/non-zero status. */
7150 label = gen_label_rtx ();
7151 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7152 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7153 label));
7155 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7156 we used movdqa (i.e. TImode) instead? Perhaps even better would
7157 be if we could determine the real mode of the data, via a hook
7158 into pass_stdarg. Ignore all that for now. */
7159 smode = V4SFmode;
7160 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7161 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7163 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7164 if (max > X86_64_SSE_REGPARM_MAX)
7165 max = X86_64_SSE_REGPARM_MAX;
7167 for (i = cum->sse_regno; i < max; ++i)
7169 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7170 mem = gen_rtx_MEM (smode, mem);
7171 MEM_NOTRAP_P (mem) = 1;
7172 set_mem_alias_set (mem, set);
7173 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7175 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7178 emit_label (label);
7182 static void
7183 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7185 alias_set_type set = get_varargs_alias_set ();
7186 int i;
7188 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7190 rtx reg, mem;
7192 mem = gen_rtx_MEM (Pmode,
7193 plus_constant (virtual_incoming_args_rtx,
7194 i * UNITS_PER_WORD));
7195 MEM_NOTRAP_P (mem) = 1;
7196 set_mem_alias_set (mem, set);
7198 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7199 emit_move_insn (mem, reg);
7203 static void
7204 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7205 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7206 int no_rtl)
7208 CUMULATIVE_ARGS next_cum;
7209 tree fntype;
7211 /* This argument doesn't appear to be used anymore. Which is good,
7212 because the old code here didn't suppress rtl generation. */
7213 gcc_assert (!no_rtl);
7215 if (!TARGET_64BIT)
7216 return;
7218 fntype = TREE_TYPE (current_function_decl);
7220 /* For varargs, we do not want to skip the dummy va_dcl argument.
7221 For stdargs, we do want to skip the last named argument. */
7222 next_cum = *cum;
7223 if (stdarg_p (fntype))
7224 ix86_function_arg_advance (&next_cum, mode, type, true);
7226 if (cum->call_abi == MS_ABI)
7227 setup_incoming_varargs_ms_64 (&next_cum);
7228 else
7229 setup_incoming_varargs_64 (&next_cum);
7232 /* Checks if TYPE is of kind va_list char *. */
7234 static bool
7235 is_va_list_char_pointer (tree type)
7237 tree canonic;
7239 /* For 32-bit it is always true. */
7240 if (!TARGET_64BIT)
7241 return true;
7242 canonic = ix86_canonical_va_list_type (type);
7243 return (canonic == ms_va_list_type_node
7244 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7247 /* Implement va_start. */
7249 static void
7250 ix86_va_start (tree valist, rtx nextarg)
7252 HOST_WIDE_INT words, n_gpr, n_fpr;
7253 tree f_gpr, f_fpr, f_ovf, f_sav;
7254 tree gpr, fpr, ovf, sav, t;
7255 tree type;
7257 rtx ovf_rtx;
7259 if (flag_split_stack
7260 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7262 rtx reg, seq;
7263 unsigned int scratch_regno;
7265 /* When we are splitting the stack, we can't refer to the stack
7266 arguments using internal_arg_pointer, because they may be on
7267 the old stack. The split stack prologue will arrange to
7268 leave a pointer to the old stack arguments in a scratch
7269 register, which we here copy to a pseudo-register. The split
7270 stack prologue can't set the pseudo-register directly because
7271 it (the prologue) runs before any registers have been saved. */
7273 reg = gen_reg_rtx (Pmode);
7274 cfun->machine->split_stack_varargs_pointer = reg;
7275 scratch_regno = split_stack_prologue_scratch_regno ();
7276 start_sequence ();
7277 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7278 seq = get_insns ();
7279 end_sequence ();
7281 push_topmost_sequence ();
7282 emit_insn_after (seq, entry_of_function ());
7283 pop_topmost_sequence ();
7286 /* Only 64bit target needs something special. */
7287 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7289 if (!flag_split_stack)
7290 std_expand_builtin_va_start (valist, nextarg);
7291 else
7293 rtx va_r, next;
7295 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7296 next = expand_binop (ptr_mode, add_optab,
7297 cfun->machine->split_stack_varargs_pointer,
7298 crtl->args.arg_offset_rtx,
7299 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7300 convert_move (va_r, next, 0);
7302 return;
7305 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7306 f_fpr = DECL_CHAIN (f_gpr);
7307 f_ovf = DECL_CHAIN (f_fpr);
7308 f_sav = DECL_CHAIN (f_ovf);
7310 valist = build_simple_mem_ref (valist);
7311 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7312 /* The following should be folded into the MEM_REF offset. */
7313 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7314 f_gpr, NULL_TREE);
7315 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7316 f_fpr, NULL_TREE);
7317 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7318 f_ovf, NULL_TREE);
7319 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7320 f_sav, NULL_TREE);
7322 /* Count number of gp and fp argument registers used. */
7323 words = crtl->args.info.words;
7324 n_gpr = crtl->args.info.regno;
7325 n_fpr = crtl->args.info.sse_regno;
7327 if (cfun->va_list_gpr_size)
7329 type = TREE_TYPE (gpr);
7330 t = build2 (MODIFY_EXPR, type,
7331 gpr, build_int_cst (type, n_gpr * 8));
7332 TREE_SIDE_EFFECTS (t) = 1;
7333 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7336 if (TARGET_SSE && cfun->va_list_fpr_size)
7338 type = TREE_TYPE (fpr);
7339 t = build2 (MODIFY_EXPR, type, fpr,
7340 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7341 TREE_SIDE_EFFECTS (t) = 1;
7342 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7345 /* Find the overflow area. */
7346 type = TREE_TYPE (ovf);
7347 if (!flag_split_stack)
7348 ovf_rtx = crtl->args.internal_arg_pointer;
7349 else
7350 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7351 t = make_tree (type, ovf_rtx);
7352 if (words != 0)
7353 t = build2 (POINTER_PLUS_EXPR, type, t,
7354 size_int (words * UNITS_PER_WORD));
7355 t = build2 (MODIFY_EXPR, type, ovf, t);
7356 TREE_SIDE_EFFECTS (t) = 1;
7357 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7359 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7361 /* Find the register save area.
7362 Prologue of the function save it right above stack frame. */
7363 type = TREE_TYPE (sav);
7364 t = make_tree (type, frame_pointer_rtx);
7365 if (!ix86_varargs_gpr_size)
7366 t = build2 (POINTER_PLUS_EXPR, type, t,
7367 size_int (-8 * X86_64_REGPARM_MAX));
7368 t = build2 (MODIFY_EXPR, type, sav, t);
7369 TREE_SIDE_EFFECTS (t) = 1;
7370 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7374 /* Implement va_arg. */
7376 static tree
7377 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7378 gimple_seq *post_p)
7380 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7381 tree f_gpr, f_fpr, f_ovf, f_sav;
7382 tree gpr, fpr, ovf, sav, t;
7383 int size, rsize;
7384 tree lab_false, lab_over = NULL_TREE;
7385 tree addr, t2;
7386 rtx container;
7387 int indirect_p = 0;
7388 tree ptrtype;
7389 enum machine_mode nat_mode;
7390 unsigned int arg_boundary;
7392 /* Only 64bit target needs something special. */
7393 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7394 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7396 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7397 f_fpr = DECL_CHAIN (f_gpr);
7398 f_ovf = DECL_CHAIN (f_fpr);
7399 f_sav = DECL_CHAIN (f_ovf);
7401 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7402 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7403 valist = build_va_arg_indirect_ref (valist);
7404 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7405 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7406 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7408 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7409 if (indirect_p)
7410 type = build_pointer_type (type);
7411 size = int_size_in_bytes (type);
7412 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7414 nat_mode = type_natural_mode (type, NULL);
7415 switch (nat_mode)
7417 case V8SFmode:
7418 case V8SImode:
7419 case V32QImode:
7420 case V16HImode:
7421 case V4DFmode:
7422 case V4DImode:
7423 /* Unnamed 256bit vector mode parameters are passed on stack. */
7424 if (ix86_cfun_abi () == SYSV_ABI)
7426 container = NULL;
7427 break;
7430 default:
7431 container = construct_container (nat_mode, TYPE_MODE (type),
7432 type, 0, X86_64_REGPARM_MAX,
7433 X86_64_SSE_REGPARM_MAX, intreg,
7435 break;
7438 /* Pull the value out of the saved registers. */
7440 addr = create_tmp_var (ptr_type_node, "addr");
7442 if (container)
7444 int needed_intregs, needed_sseregs;
7445 bool need_temp;
7446 tree int_addr, sse_addr;
7448 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7449 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7451 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7453 need_temp = (!REG_P (container)
7454 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7455 || TYPE_ALIGN (type) > 128));
7457 /* In case we are passing structure, verify that it is consecutive block
7458 on the register save area. If not we need to do moves. */
7459 if (!need_temp && !REG_P (container))
7461 /* Verify that all registers are strictly consecutive */
7462 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7464 int i;
7466 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7468 rtx slot = XVECEXP (container, 0, i);
7469 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7470 || INTVAL (XEXP (slot, 1)) != i * 16)
7471 need_temp = 1;
7474 else
7476 int i;
7478 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7480 rtx slot = XVECEXP (container, 0, i);
7481 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7482 || INTVAL (XEXP (slot, 1)) != i * 8)
7483 need_temp = 1;
7487 if (!need_temp)
7489 int_addr = addr;
7490 sse_addr = addr;
7492 else
7494 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7495 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7498 /* First ensure that we fit completely in registers. */
7499 if (needed_intregs)
7501 t = build_int_cst (TREE_TYPE (gpr),
7502 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7503 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7504 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7505 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7506 gimplify_and_add (t, pre_p);
7508 if (needed_sseregs)
7510 t = build_int_cst (TREE_TYPE (fpr),
7511 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7512 + X86_64_REGPARM_MAX * 8);
7513 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7514 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7515 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7516 gimplify_and_add (t, pre_p);
7519 /* Compute index to start of area used for integer regs. */
7520 if (needed_intregs)
7522 /* int_addr = gpr + sav; */
7523 t = fold_convert (sizetype, gpr);
7524 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7525 gimplify_assign (int_addr, t, pre_p);
7527 if (needed_sseregs)
7529 /* sse_addr = fpr + sav; */
7530 t = fold_convert (sizetype, fpr);
7531 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7532 gimplify_assign (sse_addr, t, pre_p);
7534 if (need_temp)
7536 int i, prev_size = 0;
7537 tree temp = create_tmp_var (type, "va_arg_tmp");
7539 /* addr = &temp; */
7540 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7541 gimplify_assign (addr, t, pre_p);
7543 for (i = 0; i < XVECLEN (container, 0); i++)
7545 rtx slot = XVECEXP (container, 0, i);
7546 rtx reg = XEXP (slot, 0);
7547 enum machine_mode mode = GET_MODE (reg);
7548 tree piece_type;
7549 tree addr_type;
7550 tree daddr_type;
7551 tree src_addr, src;
7552 int src_offset;
7553 tree dest_addr, dest;
7554 int cur_size = GET_MODE_SIZE (mode);
7556 if (prev_size + cur_size > size)
7558 cur_size = size - prev_size;
7559 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7560 if (mode == BLKmode)
7561 mode = QImode;
7563 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7564 if (mode == GET_MODE (reg))
7565 addr_type = build_pointer_type (piece_type);
7566 else
7567 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7568 true);
7569 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7570 true);
7572 if (SSE_REGNO_P (REGNO (reg)))
7574 src_addr = sse_addr;
7575 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7577 else
7579 src_addr = int_addr;
7580 src_offset = REGNO (reg) * 8;
7582 src_addr = fold_convert (addr_type, src_addr);
7583 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7584 size_int (src_offset));
7586 dest_addr = fold_convert (daddr_type, addr);
7587 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7588 size_int (INTVAL (XEXP (slot, 1))));
7589 if (cur_size == GET_MODE_SIZE (mode))
7591 src = build_va_arg_indirect_ref (src_addr);
7592 dest = build_va_arg_indirect_ref (dest_addr);
7594 gimplify_assign (dest, src, pre_p);
7596 else
7598 tree copy
7599 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7600 3, dest_addr, src_addr,
7601 size_int (cur_size));
7602 gimplify_and_add (copy, pre_p);
7604 prev_size += cur_size;
7608 if (needed_intregs)
7610 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7611 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7612 gimplify_assign (gpr, t, pre_p);
7615 if (needed_sseregs)
7617 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7618 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7619 gimplify_assign (fpr, t, pre_p);
7622 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7624 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7627 /* ... otherwise out of the overflow area. */
7629 /* When we align parameter on stack for caller, if the parameter
7630 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7631 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7632 here with caller. */
7633 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7634 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7635 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7637 /* Care for on-stack alignment if needed. */
7638 if (arg_boundary <= 64 || size == 0)
7639 t = ovf;
7640 else
7642 HOST_WIDE_INT align = arg_boundary / 8;
7643 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7644 size_int (align - 1));
7645 t = fold_convert (sizetype, t);
7646 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7647 size_int (-align));
7648 t = fold_convert (TREE_TYPE (ovf), t);
7651 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7652 gimplify_assign (addr, t, pre_p);
7654 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7655 size_int (rsize * UNITS_PER_WORD));
7656 gimplify_assign (unshare_expr (ovf), t, pre_p);
7658 if (container)
7659 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7661 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7662 addr = fold_convert (ptrtype, addr);
7664 if (indirect_p)
7665 addr = build_va_arg_indirect_ref (addr);
7666 return build_va_arg_indirect_ref (addr);
7669 /* Return true if OPNUM's MEM should be matched
7670 in movabs* patterns. */
7672 bool
7673 ix86_check_movabs (rtx insn, int opnum)
7675 rtx set, mem;
7677 set = PATTERN (insn);
7678 if (GET_CODE (set) == PARALLEL)
7679 set = XVECEXP (set, 0, 0);
7680 gcc_assert (GET_CODE (set) == SET);
7681 mem = XEXP (set, opnum);
7682 while (GET_CODE (mem) == SUBREG)
7683 mem = SUBREG_REG (mem);
7684 gcc_assert (MEM_P (mem));
7685 return volatile_ok || !MEM_VOLATILE_P (mem);
7688 /* Initialize the table of extra 80387 mathematical constants. */
7690 static void
7691 init_ext_80387_constants (void)
7693 static const char * cst[5] =
7695 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7696 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7697 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7698 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7699 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7701 int i;
7703 for (i = 0; i < 5; i++)
7705 real_from_string (&ext_80387_constants_table[i], cst[i]);
7706 /* Ensure each constant is rounded to XFmode precision. */
7707 real_convert (&ext_80387_constants_table[i],
7708 XFmode, &ext_80387_constants_table[i]);
7711 ext_80387_constants_init = 1;
7714 /* Return non-zero if the constant is something that
7715 can be loaded with a special instruction. */
7718 standard_80387_constant_p (rtx x)
7720 enum machine_mode mode = GET_MODE (x);
7722 REAL_VALUE_TYPE r;
7724 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7725 return -1;
7727 if (x == CONST0_RTX (mode))
7728 return 1;
7729 if (x == CONST1_RTX (mode))
7730 return 2;
7732 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7734 /* For XFmode constants, try to find a special 80387 instruction when
7735 optimizing for size or on those CPUs that benefit from them. */
7736 if (mode == XFmode
7737 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7739 int i;
7741 if (! ext_80387_constants_init)
7742 init_ext_80387_constants ();
7744 for (i = 0; i < 5; i++)
7745 if (real_identical (&r, &ext_80387_constants_table[i]))
7746 return i + 3;
7749 /* Load of the constant -0.0 or -1.0 will be split as
7750 fldz;fchs or fld1;fchs sequence. */
7751 if (real_isnegzero (&r))
7752 return 8;
7753 if (real_identical (&r, &dconstm1))
7754 return 9;
7756 return 0;
7759 /* Return the opcode of the special instruction to be used to load
7760 the constant X. */
7762 const char *
7763 standard_80387_constant_opcode (rtx x)
7765 switch (standard_80387_constant_p (x))
7767 case 1:
7768 return "fldz";
7769 case 2:
7770 return "fld1";
7771 case 3:
7772 return "fldlg2";
7773 case 4:
7774 return "fldln2";
7775 case 5:
7776 return "fldl2e";
7777 case 6:
7778 return "fldl2t";
7779 case 7:
7780 return "fldpi";
7781 case 8:
7782 case 9:
7783 return "#";
7784 default:
7785 gcc_unreachable ();
7789 /* Return the CONST_DOUBLE representing the 80387 constant that is
7790 loaded by the specified special instruction. The argument IDX
7791 matches the return value from standard_80387_constant_p. */
7794 standard_80387_constant_rtx (int idx)
7796 int i;
7798 if (! ext_80387_constants_init)
7799 init_ext_80387_constants ();
7801 switch (idx)
7803 case 3:
7804 case 4:
7805 case 5:
7806 case 6:
7807 case 7:
7808 i = idx - 3;
7809 break;
7811 default:
7812 gcc_unreachable ();
7815 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7816 XFmode);
7819 /* Return 1 if X is all 0s and 2 if x is all 1s
7820 in supported SSE vector mode. */
7823 standard_sse_constant_p (rtx x)
7825 enum machine_mode mode = GET_MODE (x);
7827 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7828 return 1;
7829 if (vector_all_ones_operand (x, mode))
7830 switch (mode)
7832 case V16QImode:
7833 case V8HImode:
7834 case V4SImode:
7835 case V2DImode:
7836 if (TARGET_SSE2)
7837 return 2;
7838 default:
7839 break;
7842 return 0;
7845 /* Return the opcode of the special instruction to be used to load
7846 the constant X. */
7848 const char *
7849 standard_sse_constant_opcode (rtx insn, rtx x)
7851 switch (standard_sse_constant_p (x))
7853 case 1:
7854 switch (get_attr_mode (insn))
7856 case MODE_V4SF:
7857 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7858 case MODE_V2DF:
7859 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7860 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7861 else
7862 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7863 case MODE_TI:
7864 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7865 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7866 else
7867 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7868 case MODE_V8SF:
7869 return "vxorps\t%x0, %x0, %x0";
7870 case MODE_V4DF:
7871 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7872 return "vxorps\t%x0, %x0, %x0";
7873 else
7874 return "vxorpd\t%x0, %x0, %x0";
7875 case MODE_OI:
7876 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7877 return "vxorps\t%x0, %x0, %x0";
7878 else
7879 return "vpxor\t%x0, %x0, %x0";
7880 default:
7881 break;
7883 case 2:
7884 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7885 default:
7886 break;
7888 gcc_unreachable ();
7891 /* Returns true if OP contains a symbol reference */
7893 bool
7894 symbolic_reference_mentioned_p (rtx op)
7896 const char *fmt;
7897 int i;
7899 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7900 return true;
7902 fmt = GET_RTX_FORMAT (GET_CODE (op));
7903 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7905 if (fmt[i] == 'E')
7907 int j;
7909 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7910 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7911 return true;
7914 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7915 return true;
7918 return false;
7921 /* Return true if it is appropriate to emit `ret' instructions in the
7922 body of a function. Do this only if the epilogue is simple, needing a
7923 couple of insns. Prior to reloading, we can't tell how many registers
7924 must be saved, so return false then. Return false if there is no frame
7925 marker to de-allocate. */
7927 bool
7928 ix86_can_use_return_insn_p (void)
7930 struct ix86_frame frame;
7932 if (! reload_completed || frame_pointer_needed)
7933 return 0;
7935 /* Don't allow more than 32k pop, since that's all we can do
7936 with one instruction. */
7937 if (crtl->args.pops_args && crtl->args.size >= 32768)
7938 return 0;
7940 ix86_compute_frame_layout (&frame);
7941 return (frame.stack_pointer_offset == UNITS_PER_WORD
7942 && (frame.nregs + frame.nsseregs) == 0);
7945 /* Value should be nonzero if functions must have frame pointers.
7946 Zero means the frame pointer need not be set up (and parms may
7947 be accessed via the stack pointer) in functions that seem suitable. */
7949 static bool
7950 ix86_frame_pointer_required (void)
7952 /* If we accessed previous frames, then the generated code expects
7953 to be able to access the saved ebp value in our frame. */
7954 if (cfun->machine->accesses_prev_frame)
7955 return true;
7957 /* Several x86 os'es need a frame pointer for other reasons,
7958 usually pertaining to setjmp. */
7959 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7960 return true;
7962 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
7963 turns off the frame pointer by default. Turn it back on now if
7964 we've not got a leaf function. */
7965 if (TARGET_OMIT_LEAF_FRAME_POINTER
7966 && (!current_function_is_leaf
7967 || ix86_current_function_calls_tls_descriptor))
7968 return true;
7970 if (crtl->profile && !flag_fentry)
7971 return true;
7973 return false;
7976 /* Record that the current function accesses previous call frames. */
7978 void
7979 ix86_setup_frame_addresses (void)
7981 cfun->machine->accesses_prev_frame = 1;
7984 #ifndef USE_HIDDEN_LINKONCE
7985 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7986 # define USE_HIDDEN_LINKONCE 1
7987 # else
7988 # define USE_HIDDEN_LINKONCE 0
7989 # endif
7990 #endif
7992 static int pic_labels_used;
7994 /* Fills in the label name that should be used for a pc thunk for
7995 the given register. */
7997 static void
7998 get_pc_thunk_name (char name[32], unsigned int regno)
8000 gcc_assert (!TARGET_64BIT);
8002 if (USE_HIDDEN_LINKONCE)
8003 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8004 else
8005 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8009 /* This function generates code for -fpic that loads %ebx with
8010 the return address of the caller and then returns. */
8012 static void
8013 ix86_code_end (void)
8015 rtx xops[2];
8016 int regno;
8018 for (regno = 0; regno < 8; ++regno)
8020 char name[32];
8021 tree decl;
8023 if (! ((pic_labels_used >> regno) & 1))
8024 continue;
8026 get_pc_thunk_name (name, regno);
8028 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8029 get_identifier (name),
8030 build_function_type (void_type_node, void_list_node));
8031 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8032 NULL_TREE, void_type_node);
8033 TREE_PUBLIC (decl) = 1;
8034 TREE_STATIC (decl) = 1;
8036 #if TARGET_MACHO
8037 if (TARGET_MACHO)
8039 switch_to_section (darwin_sections[text_coal_section]);
8040 fputs ("\t.weak_definition\t", asm_out_file);
8041 assemble_name (asm_out_file, name);
8042 fputs ("\n\t.private_extern\t", asm_out_file);
8043 assemble_name (asm_out_file, name);
8044 putc ('\n', asm_out_file);
8045 ASM_OUTPUT_LABEL (asm_out_file, name);
8046 DECL_WEAK (decl) = 1;
8048 else
8049 #endif
8050 if (USE_HIDDEN_LINKONCE)
8052 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8054 targetm.asm_out.unique_section (decl, 0);
8055 switch_to_section (get_named_section (decl, NULL, 0));
8057 targetm.asm_out.globalize_label (asm_out_file, name);
8058 fputs ("\t.hidden\t", asm_out_file);
8059 assemble_name (asm_out_file, name);
8060 putc ('\n', asm_out_file);
8061 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8063 else
8065 switch_to_section (text_section);
8066 ASM_OUTPUT_LABEL (asm_out_file, name);
8069 DECL_INITIAL (decl) = make_node (BLOCK);
8070 current_function_decl = decl;
8071 init_function_start (decl);
8072 first_function_block_is_cold = false;
8073 /* Make sure unwind info is emitted for the thunk if needed. */
8074 final_start_function (emit_barrier (), asm_out_file, 1);
8076 xops[0] = gen_rtx_REG (Pmode, regno);
8077 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8078 /* Pad stack IP move with 4 instructions. 2 NOPs count as 1
8079 instruction. */
8080 if (TARGET_PAD_SHORT_FUNCTION)
8081 output_asm_insn ("nop; nop; nop; nop; nop; nop; nop; nop",
8082 xops);
8083 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8084 output_asm_insn ("ret", xops);
8085 final_end_function ();
8086 init_insn_lengths ();
8087 free_after_compilation (cfun);
8088 set_cfun (NULL);
8089 current_function_decl = NULL;
8092 if (flag_split_stack)
8093 file_end_indicate_split_stack ();
8096 /* Emit code for the SET_GOT patterns. */
8098 const char *
8099 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8101 rtx xops[3];
8103 xops[0] = dest;
8105 if (TARGET_VXWORKS_RTP && flag_pic)
8107 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8108 xops[2] = gen_rtx_MEM (Pmode,
8109 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8110 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8112 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8113 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8114 an unadorned address. */
8115 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8116 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8117 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8118 return "";
8121 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8123 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8125 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8127 if (!flag_pic)
8128 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8129 else
8131 output_asm_insn ("call\t%a2", xops);
8132 #ifdef DWARF2_UNWIND_INFO
8133 /* The call to next label acts as a push. */
8134 if (dwarf2out_do_frame ())
8136 rtx insn;
8137 start_sequence ();
8138 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8139 gen_rtx_PLUS (Pmode,
8140 stack_pointer_rtx,
8141 GEN_INT (-4))));
8142 RTX_FRAME_RELATED_P (insn) = 1;
8143 dwarf2out_frame_debug (insn, true);
8144 end_sequence ();
8146 #endif
8149 #if TARGET_MACHO
8150 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8151 is what will be referenced by the Mach-O PIC subsystem. */
8152 if (!label)
8153 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8154 #endif
8156 targetm.asm_out.internal_label (asm_out_file, "L",
8157 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8159 if (flag_pic)
8161 output_asm_insn ("pop%z0\t%0", xops);
8162 #ifdef DWARF2_UNWIND_INFO
8163 /* The pop is a pop and clobbers dest, but doesn't restore it
8164 for unwind info purposes. */
8165 if (dwarf2out_do_frame ())
8167 rtx insn;
8168 start_sequence ();
8169 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8170 dwarf2out_frame_debug (insn, true);
8171 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8172 gen_rtx_PLUS (Pmode,
8173 stack_pointer_rtx,
8174 GEN_INT (4))));
8175 RTX_FRAME_RELATED_P (insn) = 1;
8176 dwarf2out_frame_debug (insn, true);
8177 end_sequence ();
8179 #endif
8182 else
8184 char name[32];
8185 get_pc_thunk_name (name, REGNO (dest));
8186 pic_labels_used |= 1 << REGNO (dest);
8188 #ifdef DWARF2_UNWIND_INFO
8189 /* Ensure all queued register saves are flushed before the
8190 call. */
8191 if (dwarf2out_do_frame ())
8192 dwarf2out_flush_queued_reg_saves ();
8193 #endif
8194 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8195 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8196 output_asm_insn ("call\t%X2", xops);
8197 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8198 is what will be referenced by the Mach-O PIC subsystem. */
8199 #if TARGET_MACHO
8200 if (!label)
8201 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8202 else
8203 targetm.asm_out.internal_label (asm_out_file, "L",
8204 CODE_LABEL_NUMBER (label));
8205 #endif
8208 if (TARGET_MACHO)
8209 return "";
8211 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8212 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8213 else
8214 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8216 return "";
8219 /* Generate an "push" pattern for input ARG. */
8221 static rtx
8222 gen_push (rtx arg)
8224 struct machine_function *m = cfun->machine;
8226 if (m->fs.cfa_reg == stack_pointer_rtx)
8227 m->fs.cfa_offset += UNITS_PER_WORD;
8228 m->fs.sp_offset += UNITS_PER_WORD;
8230 return gen_rtx_SET (VOIDmode,
8231 gen_rtx_MEM (Pmode,
8232 gen_rtx_PRE_DEC (Pmode,
8233 stack_pointer_rtx)),
8234 arg);
8237 /* Generate an "pop" pattern for input ARG. */
8239 static rtx
8240 gen_pop (rtx arg)
8242 return gen_rtx_SET (VOIDmode,
8243 arg,
8244 gen_rtx_MEM (Pmode,
8245 gen_rtx_POST_INC (Pmode,
8246 stack_pointer_rtx)));
8249 /* Return >= 0 if there is an unused call-clobbered register available
8250 for the entire function. */
8252 static unsigned int
8253 ix86_select_alt_pic_regnum (void)
8255 if (current_function_is_leaf
8256 && !crtl->profile
8257 && !ix86_current_function_calls_tls_descriptor)
8259 int i, drap;
8260 /* Can't use the same register for both PIC and DRAP. */
8261 if (crtl->drap_reg)
8262 drap = REGNO (crtl->drap_reg);
8263 else
8264 drap = -1;
8265 for (i = 2; i >= 0; --i)
8266 if (i != drap && !df_regs_ever_live_p (i))
8267 return i;
8270 return INVALID_REGNUM;
8273 /* Return 1 if we need to save REGNO. */
8274 static int
8275 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8277 if (pic_offset_table_rtx
8278 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8279 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8280 || crtl->profile
8281 || crtl->calls_eh_return
8282 || crtl->uses_const_pool))
8284 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8285 return 0;
8286 return 1;
8289 if (crtl->calls_eh_return && maybe_eh_return)
8291 unsigned i;
8292 for (i = 0; ; i++)
8294 unsigned test = EH_RETURN_DATA_REGNO (i);
8295 if (test == INVALID_REGNUM)
8296 break;
8297 if (test == regno)
8298 return 1;
8302 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8303 return 1;
8305 return (df_regs_ever_live_p (regno)
8306 && !call_used_regs[regno]
8307 && !fixed_regs[regno]
8308 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8311 /* Return number of saved general prupose registers. */
8313 static int
8314 ix86_nsaved_regs (void)
8316 int nregs = 0;
8317 int regno;
8319 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8320 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8321 nregs ++;
8322 return nregs;
8325 /* Return number of saved SSE registrers. */
8327 static int
8328 ix86_nsaved_sseregs (void)
8330 int nregs = 0;
8331 int regno;
8333 if (ix86_cfun_abi () != MS_ABI)
8334 return 0;
8335 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8336 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8337 nregs ++;
8338 return nregs;
8341 /* Given FROM and TO register numbers, say whether this elimination is
8342 allowed. If stack alignment is needed, we can only replace argument
8343 pointer with hard frame pointer, or replace frame pointer with stack
8344 pointer. Otherwise, frame pointer elimination is automatically
8345 handled and all other eliminations are valid. */
8347 static bool
8348 ix86_can_eliminate (const int from, const int to)
8350 if (stack_realign_fp)
8351 return ((from == ARG_POINTER_REGNUM
8352 && to == HARD_FRAME_POINTER_REGNUM)
8353 || (from == FRAME_POINTER_REGNUM
8354 && to == STACK_POINTER_REGNUM));
8355 else
8356 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8359 /* Return the offset between two registers, one to be eliminated, and the other
8360 its replacement, at the start of a routine. */
8362 HOST_WIDE_INT
8363 ix86_initial_elimination_offset (int from, int to)
8365 struct ix86_frame frame;
8366 ix86_compute_frame_layout (&frame);
8368 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8369 return frame.hard_frame_pointer_offset;
8370 else if (from == FRAME_POINTER_REGNUM
8371 && to == HARD_FRAME_POINTER_REGNUM)
8372 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8373 else
8375 gcc_assert (to == STACK_POINTER_REGNUM);
8377 if (from == ARG_POINTER_REGNUM)
8378 return frame.stack_pointer_offset;
8380 gcc_assert (from == FRAME_POINTER_REGNUM);
8381 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8385 /* In a dynamically-aligned function, we can't know the offset from
8386 stack pointer to frame pointer, so we must ensure that setjmp
8387 eliminates fp against the hard fp (%ebp) rather than trying to
8388 index from %esp up to the top of the frame across a gap that is
8389 of unknown (at compile-time) size. */
8390 static rtx
8391 ix86_builtin_setjmp_frame_value (void)
8393 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8396 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8397 field in the TCB, so they can not be used together. */
8399 static bool
8400 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8402 bool ret = true;
8404 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8405 if (report)
8406 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8407 ret = false;
8408 #endif
8410 return ret;
8413 /* When using -fsplit-stack, the allocation routines set a field in
8414 the TCB to the bottom of the stack plus this much space, measured
8415 in bytes. */
8417 #define SPLIT_STACK_AVAILABLE 256
8419 /* Fill structure ix86_frame about frame of currently computed function. */
8421 static void
8422 ix86_compute_frame_layout (struct ix86_frame *frame)
8424 unsigned int stack_alignment_needed;
8425 HOST_WIDE_INT offset;
8426 unsigned int preferred_alignment;
8427 HOST_WIDE_INT size = get_frame_size ();
8428 HOST_WIDE_INT to_allocate;
8430 frame->nregs = ix86_nsaved_regs ();
8431 frame->nsseregs = ix86_nsaved_sseregs ();
8433 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8434 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8436 /* MS ABI seem to require stack alignment to be always 16 except for function
8437 prologues and leaf. */
8438 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8439 && (!current_function_is_leaf || cfun->calls_alloca != 0
8440 || ix86_current_function_calls_tls_descriptor))
8442 preferred_alignment = 16;
8443 stack_alignment_needed = 16;
8444 crtl->preferred_stack_boundary = 128;
8445 crtl->stack_alignment_needed = 128;
8448 gcc_assert (!size || stack_alignment_needed);
8449 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8450 gcc_assert (preferred_alignment <= stack_alignment_needed);
8452 /* During reload iteration the amount of registers saved can change.
8453 Recompute the value as needed. Do not recompute when amount of registers
8454 didn't change as reload does multiple calls to the function and does not
8455 expect the decision to change within single iteration. */
8456 if (!optimize_function_for_size_p (cfun)
8457 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8459 int count = frame->nregs;
8460 struct cgraph_node *node = cgraph_node (current_function_decl);
8462 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8463 /* The fast prologue uses move instead of push to save registers. This
8464 is significantly longer, but also executes faster as modern hardware
8465 can execute the moves in parallel, but can't do that for push/pop.
8467 Be careful about choosing what prologue to emit: When function takes
8468 many instructions to execute we may use slow version as well as in
8469 case function is known to be outside hot spot (this is known with
8470 feedback only). Weight the size of function by number of registers
8471 to save as it is cheap to use one or two push instructions but very
8472 slow to use many of them. */
8473 if (count)
8474 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8475 if (node->frequency < NODE_FREQUENCY_NORMAL
8476 || (flag_branch_probabilities
8477 && node->frequency < NODE_FREQUENCY_HOT))
8478 cfun->machine->use_fast_prologue_epilogue = false;
8479 else
8480 cfun->machine->use_fast_prologue_epilogue
8481 = !expensive_function_p (count);
8483 if (TARGET_PROLOGUE_USING_MOVE
8484 && cfun->machine->use_fast_prologue_epilogue)
8485 frame->save_regs_using_mov = true;
8486 else
8487 frame->save_regs_using_mov = false;
8489 /* If static stack checking is enabled and done with probes, the registers
8490 need to be saved before allocating the frame. */
8491 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8492 frame->save_regs_using_mov = false;
8494 /* Skip return address. */
8495 offset = UNITS_PER_WORD;
8497 /* Skip pushed static chain. */
8498 if (ix86_static_chain_on_stack)
8499 offset += UNITS_PER_WORD;
8501 /* Skip saved base pointer. */
8502 if (frame_pointer_needed)
8503 offset += UNITS_PER_WORD;
8505 frame->hard_frame_pointer_offset = offset;
8507 /* Register save area */
8508 offset += frame->nregs * UNITS_PER_WORD;
8509 frame->reg_save_offset = offset;
8511 /* Align and set SSE register save area. */
8512 if (frame->nsseregs)
8514 /* The only ABI that has saved SSE registers (Win64) also has a
8515 16-byte aligned default stack, and thus we don't need to be
8516 within the re-aligned local stack frame to save them. */
8517 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8518 offset = (offset + 16 - 1) & -16;
8519 offset += frame->nsseregs * 16;
8521 frame->sse_reg_save_offset = offset;
8523 /* The re-aligned stack starts here. Values before this point are not
8524 directly comparable with values below this point. In order to make
8525 sure that no value happens to be the same before and after, force
8526 the alignment computation below to add a non-zero value. */
8527 if (stack_realign_fp)
8528 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8530 /* Va-arg area */
8531 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8532 offset += frame->va_arg_size;
8534 /* Align start of frame for local function. */
8535 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8537 /* Frame pointer points here. */
8538 frame->frame_pointer_offset = offset;
8540 offset += size;
8542 /* Add outgoing arguments area. Can be skipped if we eliminated
8543 all the function calls as dead code.
8544 Skipping is however impossible when function calls alloca. Alloca
8545 expander assumes that last crtl->outgoing_args_size
8546 of stack frame are unused. */
8547 if (ACCUMULATE_OUTGOING_ARGS
8548 && (!current_function_is_leaf || cfun->calls_alloca
8549 || ix86_current_function_calls_tls_descriptor))
8551 offset += crtl->outgoing_args_size;
8552 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8554 else
8555 frame->outgoing_arguments_size = 0;
8557 /* Align stack boundary. Only needed if we're calling another function
8558 or using alloca. */
8559 if (!current_function_is_leaf || cfun->calls_alloca
8560 || ix86_current_function_calls_tls_descriptor)
8561 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8563 /* We've reached end of stack frame. */
8564 frame->stack_pointer_offset = offset;
8566 /* Size prologue needs to allocate. */
8567 to_allocate = offset - frame->sse_reg_save_offset;
8569 if ((!to_allocate && frame->nregs <= 1)
8570 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8571 frame->save_regs_using_mov = false;
8573 if (ix86_using_red_zone ()
8574 && current_function_sp_is_unchanging
8575 && current_function_is_leaf
8576 && !ix86_current_function_calls_tls_descriptor)
8578 frame->red_zone_size = to_allocate;
8579 if (frame->save_regs_using_mov)
8580 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8581 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8582 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8584 else
8585 frame->red_zone_size = 0;
8586 frame->stack_pointer_offset -= frame->red_zone_size;
8589 /* This is semi-inlined memory_address_length, but simplified
8590 since we know that we're always dealing with reg+offset, and
8591 to avoid having to create and discard all that rtl. */
8593 static inline int
8594 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8596 int len = 4;
8598 if (offset == 0)
8600 /* EBP and R13 cannot be encoded without an offset. */
8601 len = (regno == BP_REG || regno == R13_REG);
8603 else if (IN_RANGE (offset, -128, 127))
8604 len = 1;
8606 /* ESP and R12 must be encoded with a SIB byte. */
8607 if (regno == SP_REG || regno == R12_REG)
8608 len++;
8610 return len;
8613 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8614 The valid base registers are taken from CFUN->MACHINE->FS. */
8616 static rtx
8617 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8619 const struct machine_function *m = cfun->machine;
8620 rtx base_reg = NULL;
8621 HOST_WIDE_INT base_offset = 0;
8623 if (m->use_fast_prologue_epilogue)
8625 /* Choose the base register most likely to allow the most scheduling
8626 opportunities. Generally FP is valid througout the function,
8627 while DRAP must be reloaded within the epilogue. But choose either
8628 over the SP due to increased encoding size. */
8630 if (m->fs.fp_valid)
8632 base_reg = hard_frame_pointer_rtx;
8633 base_offset = m->fs.fp_offset - cfa_offset;
8635 else if (m->fs.drap_valid)
8637 base_reg = crtl->drap_reg;
8638 base_offset = 0 - cfa_offset;
8640 else if (m->fs.sp_valid)
8642 base_reg = stack_pointer_rtx;
8643 base_offset = m->fs.sp_offset - cfa_offset;
8646 else
8648 HOST_WIDE_INT toffset;
8649 int len = 16, tlen;
8651 /* Choose the base register with the smallest address encoding.
8652 With a tie, choose FP > DRAP > SP. */
8653 if (m->fs.sp_valid)
8655 base_reg = stack_pointer_rtx;
8656 base_offset = m->fs.sp_offset - cfa_offset;
8657 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8659 if (m->fs.drap_valid)
8661 toffset = 0 - cfa_offset;
8662 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8663 if (tlen <= len)
8665 base_reg = crtl->drap_reg;
8666 base_offset = toffset;
8667 len = tlen;
8670 if (m->fs.fp_valid)
8672 toffset = m->fs.fp_offset - cfa_offset;
8673 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8674 if (tlen <= len)
8676 base_reg = hard_frame_pointer_rtx;
8677 base_offset = toffset;
8678 len = tlen;
8682 gcc_assert (base_reg != NULL);
8684 return plus_constant (base_reg, base_offset);
8687 /* Emit code to save registers in the prologue. */
8689 static void
8690 ix86_emit_save_regs (void)
8692 unsigned int regno;
8693 rtx insn;
8695 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8696 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8698 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8699 RTX_FRAME_RELATED_P (insn) = 1;
8703 /* Emit a single register save at CFA - CFA_OFFSET. */
8705 static void
8706 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8707 HOST_WIDE_INT cfa_offset)
8709 struct machine_function *m = cfun->machine;
8710 rtx reg = gen_rtx_REG (mode, regno);
8711 rtx mem, addr, base, insn;
8713 addr = choose_baseaddr (cfa_offset);
8714 mem = gen_frame_mem (mode, addr);
8716 /* For SSE saves, we need to indicate the 128-bit alignment. */
8717 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8719 insn = emit_move_insn (mem, reg);
8720 RTX_FRAME_RELATED_P (insn) = 1;
8722 base = addr;
8723 if (GET_CODE (base) == PLUS)
8724 base = XEXP (base, 0);
8725 gcc_checking_assert (REG_P (base));
8727 /* When saving registers into a re-aligned local stack frame, avoid
8728 any tricky guessing by dwarf2out. */
8729 if (m->fs.realigned)
8731 gcc_checking_assert (stack_realign_drap);
8733 if (regno == REGNO (crtl->drap_reg))
8735 /* A bit of a hack. We force the DRAP register to be saved in
8736 the re-aligned stack frame, which provides us with a copy
8737 of the CFA that will last past the prologue. Install it. */
8738 gcc_checking_assert (cfun->machine->fs.fp_valid);
8739 addr = plus_constant (hard_frame_pointer_rtx,
8740 cfun->machine->fs.fp_offset - cfa_offset);
8741 mem = gen_rtx_MEM (mode, addr);
8742 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8744 else
8746 /* The frame pointer is a stable reference within the
8747 aligned frame. Use it. */
8748 gcc_checking_assert (cfun->machine->fs.fp_valid);
8749 addr = plus_constant (hard_frame_pointer_rtx,
8750 cfun->machine->fs.fp_offset - cfa_offset);
8751 mem = gen_rtx_MEM (mode, addr);
8752 add_reg_note (insn, REG_CFA_EXPRESSION,
8753 gen_rtx_SET (VOIDmode, mem, reg));
8757 /* The memory may not be relative to the current CFA register,
8758 which means that we may need to generate a new pattern for
8759 use by the unwind info. */
8760 else if (base != m->fs.cfa_reg)
8762 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8763 mem = gen_rtx_MEM (mode, addr);
8764 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8768 /* Emit code to save registers using MOV insns.
8769 First register is stored at CFA - CFA_OFFSET. */
8770 static void
8771 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8773 unsigned int regno;
8775 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8776 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8778 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8779 cfa_offset -= UNITS_PER_WORD;
8783 /* Emit code to save SSE registers using MOV insns.
8784 First register is stored at CFA - CFA_OFFSET. */
8785 static void
8786 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8788 unsigned int regno;
8790 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8791 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8793 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8794 cfa_offset -= 16;
8798 static GTY(()) rtx queued_cfa_restores;
8800 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8801 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8802 Don't add the note if the previously saved value will be left untouched
8803 within stack red-zone till return, as unwinders can find the same value
8804 in the register and on the stack. */
8806 static void
8807 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8809 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8810 return;
8812 if (insn)
8814 add_reg_note (insn, REG_CFA_RESTORE, reg);
8815 RTX_FRAME_RELATED_P (insn) = 1;
8817 else
8818 queued_cfa_restores
8819 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8822 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8824 static void
8825 ix86_add_queued_cfa_restore_notes (rtx insn)
8827 rtx last;
8828 if (!queued_cfa_restores)
8829 return;
8830 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8832 XEXP (last, 1) = REG_NOTES (insn);
8833 REG_NOTES (insn) = queued_cfa_restores;
8834 queued_cfa_restores = NULL_RTX;
8835 RTX_FRAME_RELATED_P (insn) = 1;
8838 /* Expand prologue or epilogue stack adjustment.
8839 The pattern exist to put a dependency on all ebp-based memory accesses.
8840 STYLE should be negative if instructions should be marked as frame related,
8841 zero if %r11 register is live and cannot be freely used and positive
8842 otherwise. */
8844 static void
8845 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8846 int style, bool set_cfa)
8848 struct machine_function *m = cfun->machine;
8849 rtx insn;
8851 if (! TARGET_64BIT)
8852 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8853 else if (x86_64_immediate_operand (offset, DImode))
8854 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8855 else
8857 rtx tmp;
8858 /* r11 is used by indirect sibcall return as well, set before the
8859 epilogue and used after the epilogue. */
8860 if (style)
8861 tmp = gen_rtx_REG (DImode, R11_REG);
8862 else
8864 gcc_assert (src != hard_frame_pointer_rtx
8865 && dest != hard_frame_pointer_rtx);
8866 tmp = hard_frame_pointer_rtx;
8868 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8869 if (style < 0)
8870 RTX_FRAME_RELATED_P (insn) = 1;
8871 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8872 offset));
8875 if (style >= 0)
8876 ix86_add_queued_cfa_restore_notes (insn);
8878 if (set_cfa)
8880 rtx r;
8882 gcc_assert (m->fs.cfa_reg == src);
8883 m->fs.cfa_offset += INTVAL (offset);
8884 m->fs.cfa_reg = dest;
8886 r = gen_rtx_PLUS (Pmode, src, offset);
8887 r = gen_rtx_SET (VOIDmode, dest, r);
8888 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8889 RTX_FRAME_RELATED_P (insn) = 1;
8891 else if (style < 0)
8892 RTX_FRAME_RELATED_P (insn) = 1;
8894 if (dest == stack_pointer_rtx)
8896 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8897 bool valid = m->fs.sp_valid;
8899 if (src == hard_frame_pointer_rtx)
8901 valid = m->fs.fp_valid;
8902 ooffset = m->fs.fp_offset;
8904 else if (src == crtl->drap_reg)
8906 valid = m->fs.drap_valid;
8907 ooffset = 0;
8909 else
8911 /* Else there are two possibilities: SP itself, which we set
8912 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8913 taken care of this by hand along the eh_return path. */
8914 gcc_checking_assert (src == stack_pointer_rtx
8915 || offset == const0_rtx);
8918 m->fs.sp_offset = ooffset - INTVAL (offset);
8919 m->fs.sp_valid = valid;
8923 /* Find an available register to be used as dynamic realign argument
8924 pointer regsiter. Such a register will be written in prologue and
8925 used in begin of body, so it must not be
8926 1. parameter passing register.
8927 2. GOT pointer.
8928 We reuse static-chain register if it is available. Otherwise, we
8929 use DI for i386 and R13 for x86-64. We chose R13 since it has
8930 shorter encoding.
8932 Return: the regno of chosen register. */
8934 static unsigned int
8935 find_drap_reg (void)
8937 tree decl = cfun->decl;
8939 if (TARGET_64BIT)
8941 /* Use R13 for nested function or function need static chain.
8942 Since function with tail call may use any caller-saved
8943 registers in epilogue, DRAP must not use caller-saved
8944 register in such case. */
8945 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8946 return R13_REG;
8948 return R10_REG;
8950 else
8952 /* Use DI for nested function or function need static chain.
8953 Since function with tail call may use any caller-saved
8954 registers in epilogue, DRAP must not use caller-saved
8955 register in such case. */
8956 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8957 return DI_REG;
8959 /* Reuse static chain register if it isn't used for parameter
8960 passing. */
8961 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8962 && !lookup_attribute ("fastcall",
8963 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8964 && !lookup_attribute ("thiscall",
8965 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8966 return CX_REG;
8967 else
8968 return DI_REG;
8972 /* Return minimum incoming stack alignment. */
8974 static unsigned int
8975 ix86_minimum_incoming_stack_boundary (bool sibcall)
8977 unsigned int incoming_stack_boundary;
8979 /* Prefer the one specified at command line. */
8980 if (ix86_user_incoming_stack_boundary)
8981 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8982 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8983 if -mstackrealign is used, it isn't used for sibcall check and
8984 estimated stack alignment is 128bit. */
8985 else if (!sibcall
8986 && !TARGET_64BIT
8987 && ix86_force_align_arg_pointer
8988 && crtl->stack_alignment_estimated == 128)
8989 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8990 else
8991 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8993 /* Incoming stack alignment can be changed on individual functions
8994 via force_align_arg_pointer attribute. We use the smallest
8995 incoming stack boundary. */
8996 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8997 && lookup_attribute (ix86_force_align_arg_pointer_string,
8998 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8999 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9001 /* The incoming stack frame has to be aligned at least at
9002 parm_stack_boundary. */
9003 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9004 incoming_stack_boundary = crtl->parm_stack_boundary;
9006 /* Stack at entrance of main is aligned by runtime. We use the
9007 smallest incoming stack boundary. */
9008 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9009 && DECL_NAME (current_function_decl)
9010 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9011 && DECL_FILE_SCOPE_P (current_function_decl))
9012 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9014 return incoming_stack_boundary;
9017 /* Update incoming stack boundary and estimated stack alignment. */
9019 static void
9020 ix86_update_stack_boundary (void)
9022 ix86_incoming_stack_boundary
9023 = ix86_minimum_incoming_stack_boundary (false);
9025 /* x86_64 vararg needs 16byte stack alignment for register save
9026 area. */
9027 if (TARGET_64BIT
9028 && cfun->stdarg
9029 && crtl->stack_alignment_estimated < 128)
9030 crtl->stack_alignment_estimated = 128;
9033 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9034 needed or an rtx for DRAP otherwise. */
9036 static rtx
9037 ix86_get_drap_rtx (void)
9039 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9040 crtl->need_drap = true;
9042 if (stack_realign_drap)
9044 /* Assign DRAP to vDRAP and returns vDRAP */
9045 unsigned int regno = find_drap_reg ();
9046 rtx drap_vreg;
9047 rtx arg_ptr;
9048 rtx seq, insn;
9050 arg_ptr = gen_rtx_REG (Pmode, regno);
9051 crtl->drap_reg = arg_ptr;
9053 start_sequence ();
9054 drap_vreg = copy_to_reg (arg_ptr);
9055 seq = get_insns ();
9056 end_sequence ();
9058 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9059 if (!optimize)
9061 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9062 RTX_FRAME_RELATED_P (insn) = 1;
9064 return drap_vreg;
9066 else
9067 return NULL;
9070 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9072 static rtx
9073 ix86_internal_arg_pointer (void)
9075 return virtual_incoming_args_rtx;
9078 struct scratch_reg {
9079 rtx reg;
9080 bool saved;
9083 /* Return a short-lived scratch register for use on function entry.
9084 In 32-bit mode, it is valid only after the registers are saved
9085 in the prologue. This register must be released by means of
9086 release_scratch_register_on_entry once it is dead. */
9088 static void
9089 get_scratch_register_on_entry (struct scratch_reg *sr)
9091 int regno;
9093 sr->saved = false;
9095 if (TARGET_64BIT)
9097 /* We always use R11 in 64-bit mode. */
9098 regno = R11_REG;
9100 else
9102 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9103 bool fastcall_p
9104 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9105 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9106 int regparm = ix86_function_regparm (fntype, decl);
9107 int drap_regno
9108 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9110 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9111 for the static chain register. */
9112 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9113 && drap_regno != AX_REG)
9114 regno = AX_REG;
9115 else if (regparm < 2 && drap_regno != DX_REG)
9116 regno = DX_REG;
9117 /* ecx is the static chain register. */
9118 else if (regparm < 3 && !fastcall_p && !static_chain_p
9119 && drap_regno != CX_REG)
9120 regno = CX_REG;
9121 else if (ix86_save_reg (BX_REG, true))
9122 regno = BX_REG;
9123 /* esi is the static chain register. */
9124 else if (!(regparm == 3 && static_chain_p)
9125 && ix86_save_reg (SI_REG, true))
9126 regno = SI_REG;
9127 else if (ix86_save_reg (DI_REG, true))
9128 regno = DI_REG;
9129 else
9131 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9132 sr->saved = true;
9136 sr->reg = gen_rtx_REG (Pmode, regno);
9137 if (sr->saved)
9139 rtx insn = emit_insn (gen_push (sr->reg));
9140 RTX_FRAME_RELATED_P (insn) = 1;
9144 /* Release a scratch register obtained from the preceding function. */
9146 static void
9147 release_scratch_register_on_entry (struct scratch_reg *sr)
9149 if (sr->saved)
9151 rtx x, insn = emit_insn (gen_pop (sr->reg));
9153 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9154 RTX_FRAME_RELATED_P (insn) = 1;
9155 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9156 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9157 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9161 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9163 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9165 static void
9166 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9168 /* We skip the probe for the first interval + a small dope of 4 words and
9169 probe that many bytes past the specified size to maintain a protection
9170 area at the botton of the stack. */
9171 const int dope = 4 * UNITS_PER_WORD;
9172 rtx size_rtx = GEN_INT (size);
9174 /* See if we have a constant small number of probes to generate. If so,
9175 that's the easy case. The run-time loop is made up of 11 insns in the
9176 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9177 for n # of intervals. */
9178 if (size <= 5 * PROBE_INTERVAL)
9180 HOST_WIDE_INT i, adjust;
9181 bool first_probe = true;
9183 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9184 values of N from 1 until it exceeds SIZE. If only one probe is
9185 needed, this will not generate any code. Then adjust and probe
9186 to PROBE_INTERVAL + SIZE. */
9187 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9189 if (first_probe)
9191 adjust = 2 * PROBE_INTERVAL + dope;
9192 first_probe = false;
9194 else
9195 adjust = PROBE_INTERVAL;
9197 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9198 plus_constant (stack_pointer_rtx, -adjust)));
9199 emit_stack_probe (stack_pointer_rtx);
9202 if (first_probe)
9203 adjust = size + PROBE_INTERVAL + dope;
9204 else
9205 adjust = size + PROBE_INTERVAL - i;
9207 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9208 plus_constant (stack_pointer_rtx, -adjust)));
9209 emit_stack_probe (stack_pointer_rtx);
9211 /* Adjust back to account for the additional first interval. */
9212 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9213 plus_constant (stack_pointer_rtx,
9214 PROBE_INTERVAL + dope)));
9217 /* Otherwise, do the same as above, but in a loop. Note that we must be
9218 extra careful with variables wrapping around because we might be at
9219 the very top (or the very bottom) of the address space and we have
9220 to be able to handle this case properly; in particular, we use an
9221 equality test for the loop condition. */
9222 else
9224 HOST_WIDE_INT rounded_size;
9225 struct scratch_reg sr;
9227 get_scratch_register_on_entry (&sr);
9230 /* Step 1: round SIZE to the previous multiple of the interval. */
9232 rounded_size = size & -PROBE_INTERVAL;
9235 /* Step 2: compute initial and final value of the loop counter. */
9237 /* SP = SP_0 + PROBE_INTERVAL. */
9238 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9239 plus_constant (stack_pointer_rtx,
9240 - (PROBE_INTERVAL + dope))));
9242 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9243 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9244 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9245 gen_rtx_PLUS (Pmode, sr.reg,
9246 stack_pointer_rtx)));
9249 /* Step 3: the loop
9251 while (SP != LAST_ADDR)
9253 SP = SP + PROBE_INTERVAL
9254 probe at SP
9257 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9258 values of N from 1 until it is equal to ROUNDED_SIZE. */
9260 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9263 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9264 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9266 if (size != rounded_size)
9268 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9269 plus_constant (stack_pointer_rtx,
9270 rounded_size - size)));
9271 emit_stack_probe (stack_pointer_rtx);
9274 /* Adjust back to account for the additional first interval. */
9275 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9276 plus_constant (stack_pointer_rtx,
9277 PROBE_INTERVAL + dope)));
9279 release_scratch_register_on_entry (&sr);
9282 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9283 cfun->machine->fs.sp_offset += size;
9285 /* Make sure nothing is scheduled before we are done. */
9286 emit_insn (gen_blockage ());
9289 /* Adjust the stack pointer up to REG while probing it. */
9291 const char *
9292 output_adjust_stack_and_probe (rtx reg)
9294 static int labelno = 0;
9295 char loop_lab[32], end_lab[32];
9296 rtx xops[2];
9298 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9299 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9301 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9303 /* Jump to END_LAB if SP == LAST_ADDR. */
9304 xops[0] = stack_pointer_rtx;
9305 xops[1] = reg;
9306 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9307 fputs ("\tje\t", asm_out_file);
9308 assemble_name_raw (asm_out_file, end_lab);
9309 fputc ('\n', asm_out_file);
9311 /* SP = SP + PROBE_INTERVAL. */
9312 xops[1] = GEN_INT (PROBE_INTERVAL);
9313 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9315 /* Probe at SP. */
9316 xops[1] = const0_rtx;
9317 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9319 fprintf (asm_out_file, "\tjmp\t");
9320 assemble_name_raw (asm_out_file, loop_lab);
9321 fputc ('\n', asm_out_file);
9323 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9325 return "";
9328 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9329 inclusive. These are offsets from the current stack pointer. */
9331 static void
9332 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9334 /* See if we have a constant small number of probes to generate. If so,
9335 that's the easy case. The run-time loop is made up of 7 insns in the
9336 generic case while the compile-time loop is made up of n insns for n #
9337 of intervals. */
9338 if (size <= 7 * PROBE_INTERVAL)
9340 HOST_WIDE_INT i;
9342 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9343 it exceeds SIZE. If only one probe is needed, this will not
9344 generate any code. Then probe at FIRST + SIZE. */
9345 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9346 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9348 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9351 /* Otherwise, do the same as above, but in a loop. Note that we must be
9352 extra careful with variables wrapping around because we might be at
9353 the very top (or the very bottom) of the address space and we have
9354 to be able to handle this case properly; in particular, we use an
9355 equality test for the loop condition. */
9356 else
9358 HOST_WIDE_INT rounded_size, last;
9359 struct scratch_reg sr;
9361 get_scratch_register_on_entry (&sr);
9364 /* Step 1: round SIZE to the previous multiple of the interval. */
9366 rounded_size = size & -PROBE_INTERVAL;
9369 /* Step 2: compute initial and final value of the loop counter. */
9371 /* TEST_OFFSET = FIRST. */
9372 emit_move_insn (sr.reg, GEN_INT (-first));
9374 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9375 last = first + rounded_size;
9378 /* Step 3: the loop
9380 while (TEST_ADDR != LAST_ADDR)
9382 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9383 probe at TEST_ADDR
9386 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9387 until it is equal to ROUNDED_SIZE. */
9389 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9392 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9393 that SIZE is equal to ROUNDED_SIZE. */
9395 if (size != rounded_size)
9396 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9397 stack_pointer_rtx,
9398 sr.reg),
9399 rounded_size - size));
9401 release_scratch_register_on_entry (&sr);
9404 /* Make sure nothing is scheduled before we are done. */
9405 emit_insn (gen_blockage ());
9408 /* Probe a range of stack addresses from REG to END, inclusive. These are
9409 offsets from the current stack pointer. */
9411 const char *
9412 output_probe_stack_range (rtx reg, rtx end)
9414 static int labelno = 0;
9415 char loop_lab[32], end_lab[32];
9416 rtx xops[3];
9418 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9419 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9421 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9423 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9424 xops[0] = reg;
9425 xops[1] = end;
9426 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9427 fputs ("\tje\t", asm_out_file);
9428 assemble_name_raw (asm_out_file, end_lab);
9429 fputc ('\n', asm_out_file);
9431 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9432 xops[1] = GEN_INT (PROBE_INTERVAL);
9433 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9435 /* Probe at TEST_ADDR. */
9436 xops[0] = stack_pointer_rtx;
9437 xops[1] = reg;
9438 xops[2] = const0_rtx;
9439 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9441 fprintf (asm_out_file, "\tjmp\t");
9442 assemble_name_raw (asm_out_file, loop_lab);
9443 fputc ('\n', asm_out_file);
9445 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9447 return "";
9450 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9451 to be generated in correct form. */
9452 static void
9453 ix86_finalize_stack_realign_flags (void)
9455 /* Check if stack realign is really needed after reload, and
9456 stores result in cfun */
9457 unsigned int incoming_stack_boundary
9458 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9459 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9460 unsigned int stack_realign = (incoming_stack_boundary
9461 < (current_function_is_leaf
9462 ? crtl->max_used_stack_slot_alignment
9463 : crtl->stack_alignment_needed));
9465 if (crtl->stack_realign_finalized)
9467 /* After stack_realign_needed is finalized, we can't no longer
9468 change it. */
9469 gcc_assert (crtl->stack_realign_needed == stack_realign);
9471 else
9473 crtl->stack_realign_needed = stack_realign;
9474 crtl->stack_realign_finalized = true;
9478 /* Expand the prologue into a bunch of separate insns. */
9480 void
9481 ix86_expand_prologue (void)
9483 struct machine_function *m = cfun->machine;
9484 rtx insn, t;
9485 bool pic_reg_used;
9486 struct ix86_frame frame;
9487 HOST_WIDE_INT allocate;
9488 bool int_registers_saved;
9490 ix86_finalize_stack_realign_flags ();
9492 /* DRAP should not coexist with stack_realign_fp */
9493 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9495 memset (&m->fs, 0, sizeof (m->fs));
9497 /* Initialize CFA state for before the prologue. */
9498 m->fs.cfa_reg = stack_pointer_rtx;
9499 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9501 /* Track SP offset to the CFA. We continue tracking this after we've
9502 swapped the CFA register away from SP. In the case of re-alignment
9503 this is fudged; we're interested to offsets within the local frame. */
9504 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9505 m->fs.sp_valid = true;
9507 ix86_compute_frame_layout (&frame);
9509 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9511 /* We should have already generated an error for any use of
9512 ms_hook on a nested function. */
9513 gcc_checking_assert (!ix86_static_chain_on_stack);
9515 /* Check if profiling is active and we shall use profiling before
9516 prologue variant. If so sorry. */
9517 if (crtl->profile && flag_fentry != 0)
9518 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9520 /* In ix86_asm_output_function_label we emitted:
9521 8b ff movl.s %edi,%edi
9522 55 push %ebp
9523 8b ec movl.s %esp,%ebp
9525 This matches the hookable function prologue in Win32 API
9526 functions in Microsoft Windows XP Service Pack 2 and newer.
9527 Wine uses this to enable Windows apps to hook the Win32 API
9528 functions provided by Wine.
9530 What that means is that we've already set up the frame pointer. */
9532 if (frame_pointer_needed
9533 && !(crtl->drap_reg && crtl->stack_realign_needed))
9535 rtx push, mov;
9537 /* We've decided to use the frame pointer already set up.
9538 Describe this to the unwinder by pretending that both
9539 push and mov insns happen right here.
9541 Putting the unwind info here at the end of the ms_hook
9542 is done so that we can make absolutely certain we get
9543 the required byte sequence at the start of the function,
9544 rather than relying on an assembler that can produce
9545 the exact encoding required.
9547 However it does mean (in the unpatched case) that we have
9548 a 1 insn window where the asynchronous unwind info is
9549 incorrect. However, if we placed the unwind info at
9550 its correct location we would have incorrect unwind info
9551 in the patched case. Which is probably all moot since
9552 I don't expect Wine generates dwarf2 unwind info for the
9553 system libraries that use this feature. */
9555 insn = emit_insn (gen_blockage ());
9557 push = gen_push (hard_frame_pointer_rtx);
9558 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9559 stack_pointer_rtx);
9560 RTX_FRAME_RELATED_P (push) = 1;
9561 RTX_FRAME_RELATED_P (mov) = 1;
9563 RTX_FRAME_RELATED_P (insn) = 1;
9564 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9565 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9567 /* Note that gen_push incremented m->fs.cfa_offset, even
9568 though we didn't emit the push insn here. */
9569 m->fs.cfa_reg = hard_frame_pointer_rtx;
9570 m->fs.fp_offset = m->fs.cfa_offset;
9571 m->fs.fp_valid = true;
9573 else
9575 /* The frame pointer is not needed so pop %ebp again.
9576 This leaves us with a pristine state. */
9577 emit_insn (gen_pop (hard_frame_pointer_rtx));
9581 /* The first insn of a function that accepts its static chain on the
9582 stack is to push the register that would be filled in by a direct
9583 call. This insn will be skipped by the trampoline. */
9584 else if (ix86_static_chain_on_stack)
9586 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9587 emit_insn (gen_blockage ());
9589 /* We don't want to interpret this push insn as a register save,
9590 only as a stack adjustment. The real copy of the register as
9591 a save will be done later, if needed. */
9592 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9593 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9594 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9595 RTX_FRAME_RELATED_P (insn) = 1;
9598 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9599 of DRAP is needed and stack realignment is really needed after reload */
9600 if (stack_realign_drap)
9602 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9604 /* Only need to push parameter pointer reg if it is caller saved. */
9605 if (!call_used_regs[REGNO (crtl->drap_reg)])
9607 /* Push arg pointer reg */
9608 insn = emit_insn (gen_push (crtl->drap_reg));
9609 RTX_FRAME_RELATED_P (insn) = 1;
9612 /* Grab the argument pointer. */
9613 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9614 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9615 RTX_FRAME_RELATED_P (insn) = 1;
9616 m->fs.cfa_reg = crtl->drap_reg;
9617 m->fs.cfa_offset = 0;
9619 /* Align the stack. */
9620 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9621 stack_pointer_rtx,
9622 GEN_INT (-align_bytes)));
9623 RTX_FRAME_RELATED_P (insn) = 1;
9625 /* Replicate the return address on the stack so that return
9626 address can be reached via (argp - 1) slot. This is needed
9627 to implement macro RETURN_ADDR_RTX and intrinsic function
9628 expand_builtin_return_addr etc. */
9629 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9630 t = gen_frame_mem (Pmode, t);
9631 insn = emit_insn (gen_push (t));
9632 RTX_FRAME_RELATED_P (insn) = 1;
9634 /* For the purposes of frame and register save area addressing,
9635 we've started over with a new frame. */
9636 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9637 m->fs.realigned = true;
9640 if (frame_pointer_needed && !m->fs.fp_valid)
9642 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9643 slower on all targets. Also sdb doesn't like it. */
9644 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9645 RTX_FRAME_RELATED_P (insn) = 1;
9647 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9648 RTX_FRAME_RELATED_P (insn) = 1;
9650 if (m->fs.cfa_reg == stack_pointer_rtx)
9651 m->fs.cfa_reg = hard_frame_pointer_rtx;
9652 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9653 m->fs.fp_offset = m->fs.sp_offset;
9654 m->fs.fp_valid = true;
9657 int_registers_saved = (frame.nregs == 0);
9659 if (!int_registers_saved)
9661 /* If saving registers via PUSH, do so now. */
9662 if (!frame.save_regs_using_mov)
9664 ix86_emit_save_regs ();
9665 int_registers_saved = true;
9666 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9669 /* When using red zone we may start register saving before allocating
9670 the stack frame saving one cycle of the prologue. However, avoid
9671 doing this if we have to probe the stack; at least on x86_64 the
9672 stack probe can turn into a call that clobbers a red zone location. */
9673 else if (ix86_using_red_zone ()
9674 && (! TARGET_STACK_PROBE
9675 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9677 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9678 int_registers_saved = true;
9682 if (stack_realign_fp)
9684 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9685 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9687 /* The computation of the size of the re-aligned stack frame means
9688 that we must allocate the size of the register save area before
9689 performing the actual alignment. Otherwise we cannot guarantee
9690 that there's enough storage above the realignment point. */
9691 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9692 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9693 GEN_INT (m->fs.sp_offset
9694 - frame.sse_reg_save_offset),
9695 -1, false);
9697 /* Align the stack. */
9698 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9699 stack_pointer_rtx,
9700 GEN_INT (-align_bytes)));
9702 /* For the purposes of register save area addressing, the stack
9703 pointer is no longer valid. As for the value of sp_offset,
9704 see ix86_compute_frame_layout, which we need to match in order
9705 to pass verification of stack_pointer_offset at the end. */
9706 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9707 m->fs.sp_valid = false;
9710 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9712 if (flag_stack_usage)
9714 /* We start to count from ARG_POINTER. */
9715 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9717 /* If it was realigned, take into account the fake frame. */
9718 if (stack_realign_drap)
9720 if (ix86_static_chain_on_stack)
9721 stack_size += UNITS_PER_WORD;
9723 if (!call_used_regs[REGNO (crtl->drap_reg)])
9724 stack_size += UNITS_PER_WORD;
9726 /* This over-estimates by 1 minimal-stack-alignment-unit but
9727 mitigates that by counting in the new return address slot. */
9728 current_function_dynamic_stack_size
9729 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9732 current_function_static_stack_size = stack_size;
9735 /* The stack has already been decremented by the instruction calling us
9736 so we need to probe unconditionally to preserve the protection area. */
9737 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9739 /* We expect the registers to be saved when probes are used. */
9740 gcc_assert (int_registers_saved);
9742 if (STACK_CHECK_MOVING_SP)
9744 ix86_adjust_stack_and_probe (allocate);
9745 allocate = 0;
9747 else
9749 HOST_WIDE_INT size = allocate;
9751 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9752 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9754 if (TARGET_STACK_PROBE)
9755 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9756 else
9757 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9761 if (allocate == 0)
9763 else if (!ix86_target_stack_probe ()
9764 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9766 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9767 GEN_INT (-allocate), -1,
9768 m->fs.cfa_reg == stack_pointer_rtx);
9770 else
9772 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9773 bool eax_live;
9775 if (cfun->machine->call_abi == MS_ABI)
9776 eax_live = false;
9777 else
9778 eax_live = ix86_eax_live_at_start_p ();
9780 if (eax_live)
9782 emit_insn (gen_push (eax));
9783 allocate -= UNITS_PER_WORD;
9786 emit_move_insn (eax, GEN_INT (allocate));
9788 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9790 if (m->fs.cfa_reg == stack_pointer_rtx)
9792 m->fs.cfa_offset += allocate;
9793 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9794 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9795 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9796 RTX_FRAME_RELATED_P (insn) = 1;
9798 m->fs.sp_offset += allocate;
9800 if (eax_live)
9802 t = choose_baseaddr (m->fs.sp_offset - allocate);
9803 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9806 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9808 if (!int_registers_saved)
9809 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9810 if (frame.nsseregs)
9811 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9813 pic_reg_used = false;
9814 if (pic_offset_table_rtx
9815 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9816 || crtl->profile))
9818 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9820 if (alt_pic_reg_used != INVALID_REGNUM)
9821 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9823 pic_reg_used = true;
9826 if (pic_reg_used)
9828 if (TARGET_64BIT)
9830 if (ix86_cmodel == CM_LARGE_PIC)
9832 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9833 rtx label = gen_label_rtx ();
9834 emit_label (label);
9835 LABEL_PRESERVE_P (label) = 1;
9836 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9837 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9838 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9839 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9840 pic_offset_table_rtx, tmp_reg));
9842 else
9843 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9845 else
9846 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9849 /* In the pic_reg_used case, make sure that the got load isn't deleted
9850 when mcount needs it. Blockage to avoid call movement across mcount
9851 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9852 note. */
9853 if (crtl->profile && !flag_fentry && pic_reg_used)
9854 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9856 if (crtl->drap_reg && !crtl->stack_realign_needed)
9858 /* vDRAP is setup but after reload it turns out stack realign
9859 isn't necessary, here we will emit prologue to setup DRAP
9860 without stack realign adjustment */
9861 t = choose_baseaddr (0);
9862 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9865 /* Prevent instructions from being scheduled into register save push
9866 sequence when access to the redzone area is done through frame pointer.
9867 The offset between the frame pointer and the stack pointer is calculated
9868 relative to the value of the stack pointer at the end of the function
9869 prologue, and moving instructions that access redzone area via frame
9870 pointer inside push sequence violates this assumption. */
9871 if (frame_pointer_needed && frame.red_zone_size)
9872 emit_insn (gen_memory_blockage ());
9874 /* Emit cld instruction if stringops are used in the function. */
9875 if (TARGET_CLD && ix86_current_function_needs_cld)
9876 emit_insn (gen_cld ());
9879 /* Emit code to restore REG using a POP insn. */
9881 static void
9882 ix86_emit_restore_reg_using_pop (rtx reg)
9884 struct machine_function *m = cfun->machine;
9885 rtx insn = emit_insn (gen_pop (reg));
9887 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9888 m->fs.sp_offset -= UNITS_PER_WORD;
9890 if (m->fs.cfa_reg == crtl->drap_reg
9891 && REGNO (reg) == REGNO (crtl->drap_reg))
9893 /* Previously we'd represented the CFA as an expression
9894 like *(%ebp - 8). We've just popped that value from
9895 the stack, which means we need to reset the CFA to
9896 the drap register. This will remain until we restore
9897 the stack pointer. */
9898 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9899 RTX_FRAME_RELATED_P (insn) = 1;
9901 /* This means that the DRAP register is valid for addressing too. */
9902 m->fs.drap_valid = true;
9903 return;
9906 if (m->fs.cfa_reg == stack_pointer_rtx)
9908 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9909 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9910 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9911 RTX_FRAME_RELATED_P (insn) = 1;
9913 m->fs.cfa_offset -= UNITS_PER_WORD;
9916 /* When the frame pointer is the CFA, and we pop it, we are
9917 swapping back to the stack pointer as the CFA. This happens
9918 for stack frames that don't allocate other data, so we assume
9919 the stack pointer is now pointing at the return address, i.e.
9920 the function entry state, which makes the offset be 1 word. */
9921 if (reg == hard_frame_pointer_rtx)
9923 m->fs.fp_valid = false;
9924 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9926 m->fs.cfa_reg = stack_pointer_rtx;
9927 m->fs.cfa_offset -= UNITS_PER_WORD;
9929 add_reg_note (insn, REG_CFA_DEF_CFA,
9930 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9931 GEN_INT (m->fs.cfa_offset)));
9932 RTX_FRAME_RELATED_P (insn) = 1;
9937 /* Emit code to restore saved registers using POP insns. */
9939 static void
9940 ix86_emit_restore_regs_using_pop (void)
9942 unsigned int regno;
9944 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9945 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9946 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9949 /* Emit code and notes for the LEAVE instruction. */
9951 static void
9952 ix86_emit_leave (void)
9954 struct machine_function *m = cfun->machine;
9955 rtx insn = emit_insn (ix86_gen_leave ());
9957 ix86_add_queued_cfa_restore_notes (insn);
9959 gcc_assert (m->fs.fp_valid);
9960 m->fs.sp_valid = true;
9961 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9962 m->fs.fp_valid = false;
9964 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9966 m->fs.cfa_reg = stack_pointer_rtx;
9967 m->fs.cfa_offset = m->fs.sp_offset;
9969 add_reg_note (insn, REG_CFA_DEF_CFA,
9970 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9971 RTX_FRAME_RELATED_P (insn) = 1;
9972 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9973 m->fs.fp_offset);
9977 /* Emit code to restore saved registers using MOV insns.
9978 First register is restored from CFA - CFA_OFFSET. */
9979 static void
9980 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9981 int maybe_eh_return)
9983 struct machine_function *m = cfun->machine;
9984 unsigned int regno;
9986 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9987 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9989 rtx reg = gen_rtx_REG (Pmode, regno);
9990 rtx insn, mem;
9992 mem = choose_baseaddr (cfa_offset);
9993 mem = gen_frame_mem (Pmode, mem);
9994 insn = emit_move_insn (reg, mem);
9996 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9998 /* Previously we'd represented the CFA as an expression
9999 like *(%ebp - 8). We've just popped that value from
10000 the stack, which means we need to reset the CFA to
10001 the drap register. This will remain until we restore
10002 the stack pointer. */
10003 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10004 RTX_FRAME_RELATED_P (insn) = 1;
10006 /* This means that the DRAP register is valid for addressing. */
10007 m->fs.drap_valid = true;
10009 else
10010 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10012 cfa_offset -= UNITS_PER_WORD;
10016 /* Emit code to restore saved registers using MOV insns.
10017 First register is restored from CFA - CFA_OFFSET. */
10018 static void
10019 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10020 int maybe_eh_return)
10022 unsigned int regno;
10024 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10025 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10027 rtx reg = gen_rtx_REG (V4SFmode, regno);
10028 rtx mem;
10030 mem = choose_baseaddr (cfa_offset);
10031 mem = gen_rtx_MEM (V4SFmode, mem);
10032 set_mem_align (mem, 128);
10033 emit_move_insn (reg, mem);
10035 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10037 cfa_offset -= 16;
10041 /* Restore function stack, frame, and registers. */
10043 void
10044 ix86_expand_epilogue (int style)
10046 struct machine_function *m = cfun->machine;
10047 struct machine_frame_state frame_state_save = m->fs;
10048 struct ix86_frame frame;
10049 bool restore_regs_via_mov;
10050 bool using_drap;
10052 ix86_finalize_stack_realign_flags ();
10053 ix86_compute_frame_layout (&frame);
10055 m->fs.sp_valid = (!frame_pointer_needed
10056 || (current_function_sp_is_unchanging
10057 && !stack_realign_fp));
10058 gcc_assert (!m->fs.sp_valid
10059 || m->fs.sp_offset == frame.stack_pointer_offset);
10061 /* The FP must be valid if the frame pointer is present. */
10062 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10063 gcc_assert (!m->fs.fp_valid
10064 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10066 /* We must have *some* valid pointer to the stack frame. */
10067 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10069 /* The DRAP is never valid at this point. */
10070 gcc_assert (!m->fs.drap_valid);
10072 /* See the comment about red zone and frame
10073 pointer usage in ix86_expand_prologue. */
10074 if (frame_pointer_needed && frame.red_zone_size)
10075 emit_insn (gen_memory_blockage ());
10077 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10078 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10080 /* Determine the CFA offset of the end of the red-zone. */
10081 m->fs.red_zone_offset = 0;
10082 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10084 /* The red-zone begins below the return address. */
10085 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10087 /* When the register save area is in the aligned portion of
10088 the stack, determine the maximum runtime displacement that
10089 matches up with the aligned frame. */
10090 if (stack_realign_drap)
10091 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10092 + UNITS_PER_WORD);
10095 /* Special care must be taken for the normal return case of a function
10096 using eh_return: the eax and edx registers are marked as saved, but
10097 not restored along this path. Adjust the save location to match. */
10098 if (crtl->calls_eh_return && style != 2)
10099 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10101 /* If we're only restoring one register and sp is not valid then
10102 using a move instruction to restore the register since it's
10103 less work than reloading sp and popping the register. */
10104 if (!m->fs.sp_valid && frame.nregs <= 1)
10105 restore_regs_via_mov = true;
10106 /* EH_RETURN requires the use of moves to function properly. */
10107 else if (crtl->calls_eh_return)
10108 restore_regs_via_mov = true;
10109 else if (TARGET_EPILOGUE_USING_MOVE
10110 && cfun->machine->use_fast_prologue_epilogue
10111 && (frame.nregs > 1
10112 || m->fs.sp_offset != frame.reg_save_offset))
10113 restore_regs_via_mov = true;
10114 else if (frame_pointer_needed
10115 && !frame.nregs
10116 && m->fs.sp_offset != frame.reg_save_offset)
10117 restore_regs_via_mov = true;
10118 else if (frame_pointer_needed
10119 && TARGET_USE_LEAVE
10120 && cfun->machine->use_fast_prologue_epilogue
10121 && frame.nregs == 1)
10122 restore_regs_via_mov = true;
10123 else
10124 restore_regs_via_mov = false;
10126 if (restore_regs_via_mov || frame.nsseregs)
10128 /* Ensure that the entire register save area is addressable via
10129 the stack pointer, if we will restore via sp. */
10130 if (TARGET_64BIT
10131 && m->fs.sp_offset > 0x7fffffff
10132 && !(m->fs.fp_valid || m->fs.drap_valid)
10133 && (frame.nsseregs + frame.nregs) != 0)
10135 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10136 GEN_INT (m->fs.sp_offset
10137 - frame.sse_reg_save_offset),
10138 style,
10139 m->fs.cfa_reg == stack_pointer_rtx);
10143 /* If there are any SSE registers to restore, then we have to do it
10144 via moves, since there's obviously no pop for SSE regs. */
10145 if (frame.nsseregs)
10146 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10147 style == 2);
10149 if (restore_regs_via_mov)
10151 rtx t;
10153 if (frame.nregs)
10154 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10156 /* eh_return epilogues need %ecx added to the stack pointer. */
10157 if (style == 2)
10159 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10161 /* Stack align doesn't work with eh_return. */
10162 gcc_assert (!stack_realign_drap);
10163 /* Neither does regparm nested functions. */
10164 gcc_assert (!ix86_static_chain_on_stack);
10166 if (frame_pointer_needed)
10168 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10169 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10170 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10172 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10173 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10175 /* Note that we use SA as a temporary CFA, as the return
10176 address is at the proper place relative to it. We
10177 pretend this happens at the FP restore insn because
10178 prior to this insn the FP would be stored at the wrong
10179 offset relative to SA, and after this insn we have no
10180 other reasonable register to use for the CFA. We don't
10181 bother resetting the CFA to the SP for the duration of
10182 the return insn. */
10183 add_reg_note (insn, REG_CFA_DEF_CFA,
10184 plus_constant (sa, UNITS_PER_WORD));
10185 ix86_add_queued_cfa_restore_notes (insn);
10186 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10187 RTX_FRAME_RELATED_P (insn) = 1;
10189 m->fs.cfa_reg = sa;
10190 m->fs.cfa_offset = UNITS_PER_WORD;
10191 m->fs.fp_valid = false;
10193 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10194 const0_rtx, style, false);
10196 else
10198 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10199 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10200 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10201 ix86_add_queued_cfa_restore_notes (insn);
10203 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10204 if (m->fs.cfa_offset != UNITS_PER_WORD)
10206 m->fs.cfa_offset = UNITS_PER_WORD;
10207 add_reg_note (insn, REG_CFA_DEF_CFA,
10208 plus_constant (stack_pointer_rtx,
10209 UNITS_PER_WORD));
10210 RTX_FRAME_RELATED_P (insn) = 1;
10213 m->fs.sp_offset = UNITS_PER_WORD;
10214 m->fs.sp_valid = true;
10217 else
10219 /* First step is to deallocate the stack frame so that we can
10220 pop the registers. */
10221 if (!m->fs.sp_valid)
10223 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10224 GEN_INT (m->fs.fp_offset
10225 - frame.reg_save_offset),
10226 style, false);
10228 else if (m->fs.sp_offset != frame.reg_save_offset)
10230 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10231 GEN_INT (m->fs.sp_offset
10232 - frame.reg_save_offset),
10233 style,
10234 m->fs.cfa_reg == stack_pointer_rtx);
10237 ix86_emit_restore_regs_using_pop ();
10240 /* If we used a stack pointer and haven't already got rid of it,
10241 then do so now. */
10242 if (m->fs.fp_valid)
10244 /* If the stack pointer is valid and pointing at the frame
10245 pointer store address, then we only need a pop. */
10246 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10247 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10248 /* Leave results in shorter dependency chains on CPUs that are
10249 able to grok it fast. */
10250 else if (TARGET_USE_LEAVE
10251 || optimize_function_for_size_p (cfun)
10252 || !cfun->machine->use_fast_prologue_epilogue)
10253 ix86_emit_leave ();
10254 else
10256 pro_epilogue_adjust_stack (stack_pointer_rtx,
10257 hard_frame_pointer_rtx,
10258 const0_rtx, style, !using_drap);
10259 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10263 if (using_drap)
10265 int param_ptr_offset = UNITS_PER_WORD;
10266 rtx insn;
10268 gcc_assert (stack_realign_drap);
10270 if (ix86_static_chain_on_stack)
10271 param_ptr_offset += UNITS_PER_WORD;
10272 if (!call_used_regs[REGNO (crtl->drap_reg)])
10273 param_ptr_offset += UNITS_PER_WORD;
10275 insn = emit_insn (gen_rtx_SET
10276 (VOIDmode, stack_pointer_rtx,
10277 gen_rtx_PLUS (Pmode,
10278 crtl->drap_reg,
10279 GEN_INT (-param_ptr_offset))));
10280 m->fs.cfa_reg = stack_pointer_rtx;
10281 m->fs.cfa_offset = param_ptr_offset;
10282 m->fs.sp_offset = param_ptr_offset;
10283 m->fs.realigned = false;
10285 add_reg_note (insn, REG_CFA_DEF_CFA,
10286 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10287 GEN_INT (param_ptr_offset)));
10288 RTX_FRAME_RELATED_P (insn) = 1;
10290 if (!call_used_regs[REGNO (crtl->drap_reg)])
10291 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10294 /* At this point the stack pointer must be valid, and we must have
10295 restored all of the registers. We may not have deallocated the
10296 entire stack frame. We've delayed this until now because it may
10297 be possible to merge the local stack deallocation with the
10298 deallocation forced by ix86_static_chain_on_stack. */
10299 gcc_assert (m->fs.sp_valid);
10300 gcc_assert (!m->fs.fp_valid);
10301 gcc_assert (!m->fs.realigned);
10302 if (m->fs.sp_offset != UNITS_PER_WORD)
10304 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10305 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10306 style, true);
10309 /* Sibcall epilogues don't want a return instruction. */
10310 if (style == 0)
10312 m->fs = frame_state_save;
10313 return;
10316 if (crtl->args.pops_args && crtl->args.size)
10318 rtx popc = GEN_INT (crtl->args.pops_args);
10320 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10321 address, do explicit add, and jump indirectly to the caller. */
10323 if (crtl->args.pops_args >= 65536)
10325 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10326 rtx insn;
10328 /* There is no "pascal" calling convention in any 64bit ABI. */
10329 gcc_assert (!TARGET_64BIT);
10331 insn = emit_insn (gen_pop (ecx));
10332 m->fs.cfa_offset -= UNITS_PER_WORD;
10333 m->fs.sp_offset -= UNITS_PER_WORD;
10335 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10336 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10337 add_reg_note (insn, REG_CFA_REGISTER,
10338 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10339 RTX_FRAME_RELATED_P (insn) = 1;
10341 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10342 popc, -1, true);
10343 emit_jump_insn (gen_return_indirect_internal (ecx));
10345 else
10346 emit_jump_insn (gen_return_pop_internal (popc));
10348 else
10349 emit_jump_insn (gen_return_internal ());
10351 /* Restore the state back to the state from the prologue,
10352 so that it's correct for the next epilogue. */
10353 m->fs = frame_state_save;
10356 /* Reset from the function's potential modifications. */
10358 static void
10359 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10360 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10362 if (pic_offset_table_rtx)
10363 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10364 #if TARGET_MACHO
10365 /* Mach-O doesn't support labels at the end of objects, so if
10366 it looks like we might want one, insert a NOP. */
10368 rtx insn = get_last_insn ();
10369 while (insn
10370 && NOTE_P (insn)
10371 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10372 insn = PREV_INSN (insn);
10373 if (insn
10374 && (LABEL_P (insn)
10375 || (NOTE_P (insn)
10376 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10377 fputs ("\tnop\n", file);
10379 #endif
10383 /* Return a scratch register to use in the split stack prologue. The
10384 split stack prologue is used for -fsplit-stack. It is the first
10385 instructions in the function, even before the regular prologue.
10386 The scratch register can be any caller-saved register which is not
10387 used for parameters or for the static chain. */
10389 static unsigned int
10390 split_stack_prologue_scratch_regno (void)
10392 if (TARGET_64BIT)
10393 return R11_REG;
10394 else
10396 bool is_fastcall;
10397 int regparm;
10399 is_fastcall = (lookup_attribute ("fastcall",
10400 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10401 != NULL);
10402 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10404 if (is_fastcall)
10406 if (DECL_STATIC_CHAIN (cfun->decl))
10407 sorry ("-fsplit-stack does not support fastcall with "
10408 "nested function");
10409 return AX_REG;
10411 else if (regparm < 3)
10413 if (!DECL_STATIC_CHAIN (cfun->decl))
10414 return CX_REG;
10415 else
10417 if (regparm >= 2)
10418 sorry ("-fsplit-stack does not support 2 register "
10419 " parameters for a nested function");
10420 return DX_REG;
10423 else
10425 /* FIXME: We could make this work by pushing a register
10426 around the addition and comparison. */
10427 sorry ("-fsplit-stack does not support 3 register parameters");
10428 return CX_REG;
10433 /* A SYMBOL_REF for the function which allocates new stackspace for
10434 -fsplit-stack. */
10436 static GTY(()) rtx split_stack_fn;
10438 /* Handle -fsplit-stack. These are the first instructions in the
10439 function, even before the regular prologue. */
10441 void
10442 ix86_expand_split_stack_prologue (void)
10444 struct ix86_frame frame;
10445 HOST_WIDE_INT allocate;
10446 int args_size;
10447 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10448 rtx scratch_reg = NULL_RTX;
10449 rtx varargs_label = NULL_RTX;
10451 gcc_assert (flag_split_stack && reload_completed);
10453 ix86_finalize_stack_realign_flags ();
10454 ix86_compute_frame_layout (&frame);
10455 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10457 /* This is the label we will branch to if we have enough stack
10458 space. We expect the basic block reordering pass to reverse this
10459 branch if optimizing, so that we branch in the unlikely case. */
10460 label = gen_label_rtx ();
10462 /* We need to compare the stack pointer minus the frame size with
10463 the stack boundary in the TCB. The stack boundary always gives
10464 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10465 can compare directly. Otherwise we need to do an addition. */
10467 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10468 UNSPEC_STACK_CHECK);
10469 limit = gen_rtx_CONST (Pmode, limit);
10470 limit = gen_rtx_MEM (Pmode, limit);
10471 if (allocate < SPLIT_STACK_AVAILABLE)
10472 current = stack_pointer_rtx;
10473 else
10475 unsigned int scratch_regno;
10476 rtx offset;
10478 /* We need a scratch register to hold the stack pointer minus
10479 the required frame size. Since this is the very start of the
10480 function, the scratch register can be any caller-saved
10481 register which is not used for parameters. */
10482 offset = GEN_INT (- allocate);
10483 scratch_regno = split_stack_prologue_scratch_regno ();
10484 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10485 if (!TARGET_64BIT)
10486 emit_insn (gen_addsi3 (scratch_reg, stack_pointer_rtx, offset));
10487 else
10489 if (x86_64_immediate_operand (offset, Pmode))
10491 /* We don't use gen_adddi3 in this case because it will
10492 want to split to lea, but when not optimizing the
10493 insn will not be split after this point. */
10494 emit_move_insn (scratch_reg,
10495 gen_rtx_PLUS (Pmode, stack_pointer_rtx, offset));
10497 else
10499 emit_move_insn (scratch_reg, offset);
10500 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10501 stack_pointer_rtx));
10504 current = scratch_reg;
10507 ix86_expand_branch (GEU, current, limit, label);
10508 jump_insn = get_last_insn ();
10509 JUMP_LABEL (jump_insn) = label;
10511 /* Mark the jump as very likely to be taken. */
10512 add_reg_note (jump_insn, REG_BR_PROB,
10513 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10515 /* Get more stack space. We pass in the desired stack space and the
10516 size of the arguments to copy to the new stack. In 32-bit mode
10517 we push the parameters; __morestack will return on a new stack
10518 anyhow. In 64-bit mode we pass the parameters in r10 and
10519 r11. */
10520 allocate_rtx = GEN_INT (allocate);
10521 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10522 call_fusage = NULL_RTX;
10523 if (!TARGET_64BIT)
10525 emit_insn (gen_push (GEN_INT (args_size)));
10526 emit_insn (gen_push (allocate_rtx));
10528 else
10530 rtx reg;
10532 reg = gen_rtx_REG (Pmode, R10_REG);
10534 /* If this function uses a static chain, it will be in %r10.
10535 Preserve it across the call to __morestack. */
10536 if (DECL_STATIC_CHAIN (cfun->decl))
10538 rtx rax;
10540 rax = gen_rtx_REG (Pmode, AX_REG);
10541 emit_move_insn (rax, reg);
10542 use_reg (&call_fusage, rax);
10545 emit_move_insn (reg, allocate_rtx);
10546 use_reg (&call_fusage, reg);
10547 reg = gen_rtx_REG (Pmode, R11_REG);
10548 emit_move_insn (reg, GEN_INT (args_size));
10549 use_reg (&call_fusage, reg);
10551 if (split_stack_fn == NULL_RTX)
10552 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10553 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
10554 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10555 NULL_RTX, 0);
10556 add_function_usage_to (call_insn, call_fusage);
10558 /* In order to make call/return prediction work right, we now need
10559 to execute a return instruction. See
10560 libgcc/config/i386/morestack.S for the details on how this works.
10562 In order to support backtracing, we need to set the CFA around
10563 the call, so that the unwinder knows how to correctly pick up the
10564 return address. We set the CFA around the call because the
10565 unwinder looks up to the point of the call but not after the
10566 call. */
10567 add_reg_note (call_insn, REG_CFA_TEMPORARY,
10568 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, SP_REG),
10569 GEN_INT (UNITS_PER_WORD)));
10570 RTX_FRAME_RELATED_P (call_insn) = 1;
10572 /* For flow purposes gcc must not see this as a return
10573 instruction--we need control flow to continue at the subsequent
10574 label. Therefore, we use an unspec. */
10575 if (crtl->args.pops_args == 0)
10576 emit_insn (gen_split_stack_return ());
10577 else
10579 gcc_assert (!TARGET_64BIT);
10580 gcc_assert (crtl->args.pops_args < 65536);
10581 emit_insn (gen_split_stack_pop_return (GEN_INT (crtl->args.pops_args)));
10584 /* If we are in 64-bit mode and this function uses a static chain,
10585 we saved %r10 in %rax before calling _morestack. */
10586 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10587 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
10588 gen_rtx_REG (Pmode, AX_REG));
10590 /* If this function calls va_start, we need to store a pointer to
10591 the arguments on the old stack, because they may not have been
10592 all copied to the new stack. At this point the old stack can be
10593 found at the frame pointer value used by __morestack, because
10594 __morestack has set that up before calling back to us. Here we
10595 store that pointer in a scratch register, and in
10596 ix86_expand_prologue we store the scratch register in a stack
10597 slot. */
10598 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10600 unsigned int scratch_regno;
10601 rtx frame_reg;
10603 scratch_regno = split_stack_prologue_scratch_regno ();
10604 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10605 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10607 /* fp -> old fp value
10608 return address within this function
10609 return address of caller of this function
10610 stack arguments
10611 So we add three words to get to the stack arguments.
10613 emit_move_insn (scratch_reg,
10614 gen_rtx_PLUS (Pmode, frame_reg,
10615 GEN_INT (3 * UNITS_PER_WORD)));
10617 varargs_label = gen_label_rtx ();
10618 emit_jump_insn (gen_jump (varargs_label));
10619 JUMP_LABEL (get_last_insn ()) = varargs_label;
10621 emit_barrier ();
10624 emit_label (label);
10625 LABEL_NUSES (label) = 1;
10627 /* If this function calls va_start, we now have to set the scratch
10628 register for the case where we do not call __morestack. In this
10629 case we need to set it based on the stack pointer. */
10630 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10632 emit_move_insn (scratch_reg,
10633 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10634 GEN_INT (UNITS_PER_WORD)));
10636 emit_label (varargs_label);
10637 LABEL_NUSES (varargs_label) = 1;
10641 /* We may have to tell the dataflow pass that the split stack prologue
10642 is initializing a scratch register. */
10644 static void
10645 ix86_live_on_entry (bitmap regs)
10647 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10649 gcc_assert (flag_split_stack);
10650 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10654 /* Extract the parts of an RTL expression that is a valid memory address
10655 for an instruction. Return 0 if the structure of the address is
10656 grossly off. Return -1 if the address contains ASHIFT, so it is not
10657 strictly valid, but still used for computing length of lea instruction. */
10660 ix86_decompose_address (rtx addr, struct ix86_address *out)
10662 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10663 rtx base_reg, index_reg;
10664 HOST_WIDE_INT scale = 1;
10665 rtx scale_rtx = NULL_RTX;
10666 rtx tmp;
10667 int retval = 1;
10668 enum ix86_address_seg seg = SEG_DEFAULT;
10670 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10671 base = addr;
10672 else if (GET_CODE (addr) == PLUS)
10674 rtx addends[4], op;
10675 int n = 0, i;
10677 op = addr;
10680 if (n >= 4)
10681 return 0;
10682 addends[n++] = XEXP (op, 1);
10683 op = XEXP (op, 0);
10685 while (GET_CODE (op) == PLUS);
10686 if (n >= 4)
10687 return 0;
10688 addends[n] = op;
10690 for (i = n; i >= 0; --i)
10692 op = addends[i];
10693 switch (GET_CODE (op))
10695 case MULT:
10696 if (index)
10697 return 0;
10698 index = XEXP (op, 0);
10699 scale_rtx = XEXP (op, 1);
10700 break;
10702 case ASHIFT:
10703 if (index)
10704 return 0;
10705 index = XEXP (op, 0);
10706 tmp = XEXP (op, 1);
10707 if (!CONST_INT_P (tmp))
10708 return 0;
10709 scale = INTVAL (tmp);
10710 if ((unsigned HOST_WIDE_INT) scale > 3)
10711 return 0;
10712 scale = 1 << scale;
10713 break;
10715 case UNSPEC:
10716 if (XINT (op, 1) == UNSPEC_TP
10717 && TARGET_TLS_DIRECT_SEG_REFS
10718 && seg == SEG_DEFAULT)
10719 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10720 else
10721 return 0;
10722 break;
10724 case REG:
10725 case SUBREG:
10726 if (!base)
10727 base = op;
10728 else if (!index)
10729 index = op;
10730 else
10731 return 0;
10732 break;
10734 case CONST:
10735 case CONST_INT:
10736 case SYMBOL_REF:
10737 case LABEL_REF:
10738 if (disp)
10739 return 0;
10740 disp = op;
10741 break;
10743 default:
10744 return 0;
10748 else if (GET_CODE (addr) == MULT)
10750 index = XEXP (addr, 0); /* index*scale */
10751 scale_rtx = XEXP (addr, 1);
10753 else if (GET_CODE (addr) == ASHIFT)
10755 /* We're called for lea too, which implements ashift on occasion. */
10756 index = XEXP (addr, 0);
10757 tmp = XEXP (addr, 1);
10758 if (!CONST_INT_P (tmp))
10759 return 0;
10760 scale = INTVAL (tmp);
10761 if ((unsigned HOST_WIDE_INT) scale > 3)
10762 return 0;
10763 scale = 1 << scale;
10764 retval = -1;
10766 else
10767 disp = addr; /* displacement */
10769 /* Extract the integral value of scale. */
10770 if (scale_rtx)
10772 if (!CONST_INT_P (scale_rtx))
10773 return 0;
10774 scale = INTVAL (scale_rtx);
10777 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10778 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10780 /* Avoid useless 0 displacement. */
10781 if (disp == const0_rtx && (base || index))
10782 disp = NULL_RTX;
10784 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10785 if (base_reg && index_reg && scale == 1
10786 && (index_reg == arg_pointer_rtx
10787 || index_reg == frame_pointer_rtx
10788 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10790 rtx tmp;
10791 tmp = base, base = index, index = tmp;
10792 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10795 /* Special case: %ebp cannot be encoded as a base without a displacement.
10796 Similarly %r13. */
10797 if (!disp
10798 && base_reg
10799 && (base_reg == hard_frame_pointer_rtx
10800 || base_reg == frame_pointer_rtx
10801 || base_reg == arg_pointer_rtx
10802 || (REG_P (base_reg)
10803 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10804 || REGNO (base_reg) == R13_REG))))
10805 disp = const0_rtx;
10807 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10808 Avoid this by transforming to [%esi+0].
10809 Reload calls address legitimization without cfun defined, so we need
10810 to test cfun for being non-NULL. */
10811 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10812 && base_reg && !index_reg && !disp
10813 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10814 disp = const0_rtx;
10816 /* Special case: encode reg+reg instead of reg*2. */
10817 if (!base && index && scale == 2)
10818 base = index, base_reg = index_reg, scale = 1;
10820 /* Special case: scaling cannot be encoded without base or displacement. */
10821 if (!base && !disp && index && scale != 1)
10822 disp = const0_rtx;
10824 out->base = base;
10825 out->index = index;
10826 out->disp = disp;
10827 out->scale = scale;
10828 out->seg = seg;
10830 return retval;
10833 /* Return cost of the memory address x.
10834 For i386, it is better to use a complex address than let gcc copy
10835 the address into a reg and make a new pseudo. But not if the address
10836 requires to two regs - that would mean more pseudos with longer
10837 lifetimes. */
10838 static int
10839 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10841 struct ix86_address parts;
10842 int cost = 1;
10843 int ok = ix86_decompose_address (x, &parts);
10845 gcc_assert (ok);
10847 if (parts.base && GET_CODE (parts.base) == SUBREG)
10848 parts.base = SUBREG_REG (parts.base);
10849 if (parts.index && GET_CODE (parts.index) == SUBREG)
10850 parts.index = SUBREG_REG (parts.index);
10852 /* Attempt to minimize number of registers in the address. */
10853 if ((parts.base
10854 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10855 || (parts.index
10856 && (!REG_P (parts.index)
10857 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10858 cost++;
10860 if (parts.base
10861 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10862 && parts.index
10863 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10864 && parts.base != parts.index)
10865 cost++;
10867 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10868 since it's predecode logic can't detect the length of instructions
10869 and it degenerates to vector decoded. Increase cost of such
10870 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10871 to split such addresses or even refuse such addresses at all.
10873 Following addressing modes are affected:
10874 [base+scale*index]
10875 [scale*index+disp]
10876 [base+index]
10878 The first and last case may be avoidable by explicitly coding the zero in
10879 memory address, but I don't have AMD-K6 machine handy to check this
10880 theory. */
10882 if (TARGET_K6
10883 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10884 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10885 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10886 cost += 10;
10888 return cost;
10891 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10892 this is used for to form addresses to local data when -fPIC is in
10893 use. */
10895 static bool
10896 darwin_local_data_pic (rtx disp)
10898 return (GET_CODE (disp) == UNSPEC
10899 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10902 /* Determine if a given RTX is a valid constant. We already know this
10903 satisfies CONSTANT_P. */
10905 bool
10906 legitimate_constant_p (rtx x)
10908 switch (GET_CODE (x))
10910 case CONST:
10911 x = XEXP (x, 0);
10913 if (GET_CODE (x) == PLUS)
10915 if (!CONST_INT_P (XEXP (x, 1)))
10916 return false;
10917 x = XEXP (x, 0);
10920 if (TARGET_MACHO && darwin_local_data_pic (x))
10921 return true;
10923 /* Only some unspecs are valid as "constants". */
10924 if (GET_CODE (x) == UNSPEC)
10925 switch (XINT (x, 1))
10927 case UNSPEC_GOT:
10928 case UNSPEC_GOTOFF:
10929 case UNSPEC_PLTOFF:
10930 return TARGET_64BIT;
10931 case UNSPEC_TPOFF:
10932 case UNSPEC_NTPOFF:
10933 x = XVECEXP (x, 0, 0);
10934 return (GET_CODE (x) == SYMBOL_REF
10935 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10936 case UNSPEC_DTPOFF:
10937 x = XVECEXP (x, 0, 0);
10938 return (GET_CODE (x) == SYMBOL_REF
10939 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10940 default:
10941 return false;
10944 /* We must have drilled down to a symbol. */
10945 if (GET_CODE (x) == LABEL_REF)
10946 return true;
10947 if (GET_CODE (x) != SYMBOL_REF)
10948 return false;
10949 /* FALLTHRU */
10951 case SYMBOL_REF:
10952 /* TLS symbols are never valid. */
10953 if (SYMBOL_REF_TLS_MODEL (x))
10954 return false;
10956 /* DLLIMPORT symbols are never valid. */
10957 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10958 && SYMBOL_REF_DLLIMPORT_P (x))
10959 return false;
10960 break;
10962 case CONST_DOUBLE:
10963 if (GET_MODE (x) == TImode
10964 && x != CONST0_RTX (TImode)
10965 && !TARGET_64BIT)
10966 return false;
10967 break;
10969 case CONST_VECTOR:
10970 if (!standard_sse_constant_p (x))
10971 return false;
10973 default:
10974 break;
10977 /* Otherwise we handle everything else in the move patterns. */
10978 return true;
10981 /* Determine if it's legal to put X into the constant pool. This
10982 is not possible for the address of thread-local symbols, which
10983 is checked above. */
10985 static bool
10986 ix86_cannot_force_const_mem (rtx x)
10988 /* We can always put integral constants and vectors in memory. */
10989 switch (GET_CODE (x))
10991 case CONST_INT:
10992 case CONST_DOUBLE:
10993 case CONST_VECTOR:
10994 return false;
10996 default:
10997 break;
10999 return !legitimate_constant_p (x);
11003 /* Nonzero if the constant value X is a legitimate general operand
11004 when generating PIC code. It is given that flag_pic is on and
11005 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11007 bool
11008 legitimate_pic_operand_p (rtx x)
11010 rtx inner;
11012 switch (GET_CODE (x))
11014 case CONST:
11015 inner = XEXP (x, 0);
11016 if (GET_CODE (inner) == PLUS
11017 && CONST_INT_P (XEXP (inner, 1)))
11018 inner = XEXP (inner, 0);
11020 /* Only some unspecs are valid as "constants". */
11021 if (GET_CODE (inner) == UNSPEC)
11022 switch (XINT (inner, 1))
11024 case UNSPEC_GOT:
11025 case UNSPEC_GOTOFF:
11026 case UNSPEC_PLTOFF:
11027 return TARGET_64BIT;
11028 case UNSPEC_TPOFF:
11029 x = XVECEXP (inner, 0, 0);
11030 return (GET_CODE (x) == SYMBOL_REF
11031 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11032 case UNSPEC_MACHOPIC_OFFSET:
11033 return legitimate_pic_address_disp_p (x);
11034 default:
11035 return false;
11037 /* FALLTHRU */
11039 case SYMBOL_REF:
11040 case LABEL_REF:
11041 return legitimate_pic_address_disp_p (x);
11043 default:
11044 return true;
11048 /* Determine if a given CONST RTX is a valid memory displacement
11049 in PIC mode. */
11051 bool
11052 legitimate_pic_address_disp_p (rtx disp)
11054 bool saw_plus;
11056 /* In 64bit mode we can allow direct addresses of symbols and labels
11057 when they are not dynamic symbols. */
11058 if (TARGET_64BIT)
11060 rtx op0 = disp, op1;
11062 switch (GET_CODE (disp))
11064 case LABEL_REF:
11065 return true;
11067 case CONST:
11068 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11069 break;
11070 op0 = XEXP (XEXP (disp, 0), 0);
11071 op1 = XEXP (XEXP (disp, 0), 1);
11072 if (!CONST_INT_P (op1)
11073 || INTVAL (op1) >= 16*1024*1024
11074 || INTVAL (op1) < -16*1024*1024)
11075 break;
11076 if (GET_CODE (op0) == LABEL_REF)
11077 return true;
11078 if (GET_CODE (op0) != SYMBOL_REF)
11079 break;
11080 /* FALLTHRU */
11082 case SYMBOL_REF:
11083 /* TLS references should always be enclosed in UNSPEC. */
11084 if (SYMBOL_REF_TLS_MODEL (op0))
11085 return false;
11086 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11087 && ix86_cmodel != CM_LARGE_PIC)
11088 return true;
11089 break;
11091 default:
11092 break;
11095 if (GET_CODE (disp) != CONST)
11096 return false;
11097 disp = XEXP (disp, 0);
11099 if (TARGET_64BIT)
11101 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11102 of GOT tables. We should not need these anyway. */
11103 if (GET_CODE (disp) != UNSPEC
11104 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11105 && XINT (disp, 1) != UNSPEC_GOTOFF
11106 && XINT (disp, 1) != UNSPEC_PLTOFF))
11107 return false;
11109 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11110 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11111 return false;
11112 return true;
11115 saw_plus = false;
11116 if (GET_CODE (disp) == PLUS)
11118 if (!CONST_INT_P (XEXP (disp, 1)))
11119 return false;
11120 disp = XEXP (disp, 0);
11121 saw_plus = true;
11124 if (TARGET_MACHO && darwin_local_data_pic (disp))
11125 return true;
11127 if (GET_CODE (disp) != UNSPEC)
11128 return false;
11130 switch (XINT (disp, 1))
11132 case UNSPEC_GOT:
11133 if (saw_plus)
11134 return false;
11135 /* We need to check for both symbols and labels because VxWorks loads
11136 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11137 details. */
11138 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11139 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11140 case UNSPEC_GOTOFF:
11141 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11142 While ABI specify also 32bit relocation but we don't produce it in
11143 small PIC model at all. */
11144 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11145 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11146 && !TARGET_64BIT)
11147 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11148 return false;
11149 case UNSPEC_GOTTPOFF:
11150 case UNSPEC_GOTNTPOFF:
11151 case UNSPEC_INDNTPOFF:
11152 if (saw_plus)
11153 return false;
11154 disp = XVECEXP (disp, 0, 0);
11155 return (GET_CODE (disp) == SYMBOL_REF
11156 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11157 case UNSPEC_NTPOFF:
11158 disp = XVECEXP (disp, 0, 0);
11159 return (GET_CODE (disp) == SYMBOL_REF
11160 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11161 case UNSPEC_DTPOFF:
11162 disp = XVECEXP (disp, 0, 0);
11163 return (GET_CODE (disp) == SYMBOL_REF
11164 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11167 return false;
11170 /* Recognizes RTL expressions that are valid memory addresses for an
11171 instruction. The MODE argument is the machine mode for the MEM
11172 expression that wants to use this address.
11174 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11175 convert common non-canonical forms to canonical form so that they will
11176 be recognized. */
11178 static bool
11179 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11180 rtx addr, bool strict)
11182 struct ix86_address parts;
11183 rtx base, index, disp;
11184 HOST_WIDE_INT scale;
11186 if (ix86_decompose_address (addr, &parts) <= 0)
11187 /* Decomposition failed. */
11188 return false;
11190 base = parts.base;
11191 index = parts.index;
11192 disp = parts.disp;
11193 scale = parts.scale;
11195 /* Validate base register.
11197 Don't allow SUBREG's that span more than a word here. It can lead to spill
11198 failures when the base is one word out of a two word structure, which is
11199 represented internally as a DImode int. */
11201 if (base)
11203 rtx reg;
11205 if (REG_P (base))
11206 reg = base;
11207 else if (GET_CODE (base) == SUBREG
11208 && REG_P (SUBREG_REG (base))
11209 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11210 <= UNITS_PER_WORD)
11211 reg = SUBREG_REG (base);
11212 else
11213 /* Base is not a register. */
11214 return false;
11216 if (GET_MODE (base) != Pmode)
11217 /* Base is not in Pmode. */
11218 return false;
11220 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11221 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11222 /* Base is not valid. */
11223 return false;
11226 /* Validate index register.
11228 Don't allow SUBREG's that span more than a word here -- same as above. */
11230 if (index)
11232 rtx reg;
11234 if (REG_P (index))
11235 reg = index;
11236 else if (GET_CODE (index) == SUBREG
11237 && REG_P (SUBREG_REG (index))
11238 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11239 <= UNITS_PER_WORD)
11240 reg = SUBREG_REG (index);
11241 else
11242 /* Index is not a register. */
11243 return false;
11245 if (GET_MODE (index) != Pmode)
11246 /* Index is not in Pmode. */
11247 return false;
11249 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11250 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11251 /* Index is not valid. */
11252 return false;
11255 /* Validate scale factor. */
11256 if (scale != 1)
11258 if (!index)
11259 /* Scale without index. */
11260 return false;
11262 if (scale != 2 && scale != 4 && scale != 8)
11263 /* Scale is not a valid multiplier. */
11264 return false;
11267 /* Validate displacement. */
11268 if (disp)
11270 if (GET_CODE (disp) == CONST
11271 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11272 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11273 switch (XINT (XEXP (disp, 0), 1))
11275 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11276 used. While ABI specify also 32bit relocations, we don't produce
11277 them at all and use IP relative instead. */
11278 case UNSPEC_GOT:
11279 case UNSPEC_GOTOFF:
11280 gcc_assert (flag_pic);
11281 if (!TARGET_64BIT)
11282 goto is_legitimate_pic;
11284 /* 64bit address unspec. */
11285 return false;
11287 case UNSPEC_GOTPCREL:
11288 gcc_assert (flag_pic);
11289 goto is_legitimate_pic;
11291 case UNSPEC_GOTTPOFF:
11292 case UNSPEC_GOTNTPOFF:
11293 case UNSPEC_INDNTPOFF:
11294 case UNSPEC_NTPOFF:
11295 case UNSPEC_DTPOFF:
11296 break;
11298 case UNSPEC_STACK_CHECK:
11299 gcc_assert (flag_split_stack);
11300 break;
11302 default:
11303 /* Invalid address unspec. */
11304 return false;
11307 else if (SYMBOLIC_CONST (disp)
11308 && (flag_pic
11309 || (TARGET_MACHO
11310 #if TARGET_MACHO
11311 && MACHOPIC_INDIRECT
11312 && !machopic_operand_p (disp)
11313 #endif
11317 is_legitimate_pic:
11318 if (TARGET_64BIT && (index || base))
11320 /* foo@dtpoff(%rX) is ok. */
11321 if (GET_CODE (disp) != CONST
11322 || GET_CODE (XEXP (disp, 0)) != PLUS
11323 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11324 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11325 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11326 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11327 /* Non-constant pic memory reference. */
11328 return false;
11330 else if (! legitimate_pic_address_disp_p (disp))
11331 /* Displacement is an invalid pic construct. */
11332 return false;
11334 /* This code used to verify that a symbolic pic displacement
11335 includes the pic_offset_table_rtx register.
11337 While this is good idea, unfortunately these constructs may
11338 be created by "adds using lea" optimization for incorrect
11339 code like:
11341 int a;
11342 int foo(int i)
11344 return *(&a+i);
11347 This code is nonsensical, but results in addressing
11348 GOT table with pic_offset_table_rtx base. We can't
11349 just refuse it easily, since it gets matched by
11350 "addsi3" pattern, that later gets split to lea in the
11351 case output register differs from input. While this
11352 can be handled by separate addsi pattern for this case
11353 that never results in lea, this seems to be easier and
11354 correct fix for crash to disable this test. */
11356 else if (GET_CODE (disp) != LABEL_REF
11357 && !CONST_INT_P (disp)
11358 && (GET_CODE (disp) != CONST
11359 || !legitimate_constant_p (disp))
11360 && (GET_CODE (disp) != SYMBOL_REF
11361 || !legitimate_constant_p (disp)))
11362 /* Displacement is not constant. */
11363 return false;
11364 else if (TARGET_64BIT
11365 && !x86_64_immediate_operand (disp, VOIDmode))
11366 /* Displacement is out of range. */
11367 return false;
11370 /* Everything looks valid. */
11371 return true;
11374 /* Determine if a given RTX is a valid constant address. */
11376 bool
11377 constant_address_p (rtx x)
11379 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11382 /* Return a unique alias set for the GOT. */
11384 static alias_set_type
11385 ix86_GOT_alias_set (void)
11387 static alias_set_type set = -1;
11388 if (set == -1)
11389 set = new_alias_set ();
11390 return set;
11393 /* Return a legitimate reference for ORIG (an address) using the
11394 register REG. If REG is 0, a new pseudo is generated.
11396 There are two types of references that must be handled:
11398 1. Global data references must load the address from the GOT, via
11399 the PIC reg. An insn is emitted to do this load, and the reg is
11400 returned.
11402 2. Static data references, constant pool addresses, and code labels
11403 compute the address as an offset from the GOT, whose base is in
11404 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11405 differentiate them from global data objects. The returned
11406 address is the PIC reg + an unspec constant.
11408 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11409 reg also appears in the address. */
11411 static rtx
11412 legitimize_pic_address (rtx orig, rtx reg)
11414 rtx addr = orig;
11415 rtx new_rtx = orig;
11416 rtx base;
11418 #if TARGET_MACHO
11419 if (TARGET_MACHO && !TARGET_64BIT)
11421 if (reg == 0)
11422 reg = gen_reg_rtx (Pmode);
11423 /* Use the generic Mach-O PIC machinery. */
11424 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11426 #endif
11428 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11429 new_rtx = addr;
11430 else if (TARGET_64BIT
11431 && ix86_cmodel != CM_SMALL_PIC
11432 && gotoff_operand (addr, Pmode))
11434 rtx tmpreg;
11435 /* This symbol may be referenced via a displacement from the PIC
11436 base address (@GOTOFF). */
11438 if (reload_in_progress)
11439 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11440 if (GET_CODE (addr) == CONST)
11441 addr = XEXP (addr, 0);
11442 if (GET_CODE (addr) == PLUS)
11444 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11445 UNSPEC_GOTOFF);
11446 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11448 else
11449 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11450 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11451 if (!reg)
11452 tmpreg = gen_reg_rtx (Pmode);
11453 else
11454 tmpreg = reg;
11455 emit_move_insn (tmpreg, new_rtx);
11457 if (reg != 0)
11459 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11460 tmpreg, 1, OPTAB_DIRECT);
11461 new_rtx = reg;
11463 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11465 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11467 /* This symbol may be referenced via a displacement from the PIC
11468 base address (@GOTOFF). */
11470 if (reload_in_progress)
11471 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11472 if (GET_CODE (addr) == CONST)
11473 addr = XEXP (addr, 0);
11474 if (GET_CODE (addr) == PLUS)
11476 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11477 UNSPEC_GOTOFF);
11478 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11480 else
11481 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11482 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11483 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11485 if (reg != 0)
11487 emit_move_insn (reg, new_rtx);
11488 new_rtx = reg;
11491 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11492 /* We can't use @GOTOFF for text labels on VxWorks;
11493 see gotoff_operand. */
11494 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11496 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11498 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11499 return legitimize_dllimport_symbol (addr, true);
11500 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11501 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11502 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11504 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11505 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11509 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11511 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11512 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11513 new_rtx = gen_const_mem (Pmode, new_rtx);
11514 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11516 if (reg == 0)
11517 reg = gen_reg_rtx (Pmode);
11518 /* Use directly gen_movsi, otherwise the address is loaded
11519 into register for CSE. We don't want to CSE this addresses,
11520 instead we CSE addresses from the GOT table, so skip this. */
11521 emit_insn (gen_movsi (reg, new_rtx));
11522 new_rtx = reg;
11524 else
11526 /* This symbol must be referenced via a load from the
11527 Global Offset Table (@GOT). */
11529 if (reload_in_progress)
11530 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11531 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11532 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11533 if (TARGET_64BIT)
11534 new_rtx = force_reg (Pmode, new_rtx);
11535 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11536 new_rtx = gen_const_mem (Pmode, new_rtx);
11537 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11539 if (reg == 0)
11540 reg = gen_reg_rtx (Pmode);
11541 emit_move_insn (reg, new_rtx);
11542 new_rtx = reg;
11545 else
11547 if (CONST_INT_P (addr)
11548 && !x86_64_immediate_operand (addr, VOIDmode))
11550 if (reg)
11552 emit_move_insn (reg, addr);
11553 new_rtx = reg;
11555 else
11556 new_rtx = force_reg (Pmode, addr);
11558 else if (GET_CODE (addr) == CONST)
11560 addr = XEXP (addr, 0);
11562 /* We must match stuff we generate before. Assume the only
11563 unspecs that can get here are ours. Not that we could do
11564 anything with them anyway.... */
11565 if (GET_CODE (addr) == UNSPEC
11566 || (GET_CODE (addr) == PLUS
11567 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11568 return orig;
11569 gcc_assert (GET_CODE (addr) == PLUS);
11571 if (GET_CODE (addr) == PLUS)
11573 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11575 /* Check first to see if this is a constant offset from a @GOTOFF
11576 symbol reference. */
11577 if (gotoff_operand (op0, Pmode)
11578 && CONST_INT_P (op1))
11580 if (!TARGET_64BIT)
11582 if (reload_in_progress)
11583 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11584 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11585 UNSPEC_GOTOFF);
11586 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11587 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11588 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11590 if (reg != 0)
11592 emit_move_insn (reg, new_rtx);
11593 new_rtx = reg;
11596 else
11598 if (INTVAL (op1) < -16*1024*1024
11599 || INTVAL (op1) >= 16*1024*1024)
11601 if (!x86_64_immediate_operand (op1, Pmode))
11602 op1 = force_reg (Pmode, op1);
11603 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11607 else
11609 base = legitimize_pic_address (XEXP (addr, 0), reg);
11610 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11611 base == reg ? NULL_RTX : reg);
11613 if (CONST_INT_P (new_rtx))
11614 new_rtx = plus_constant (base, INTVAL (new_rtx));
11615 else
11617 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11619 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11620 new_rtx = XEXP (new_rtx, 1);
11622 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11627 return new_rtx;
11630 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11632 static rtx
11633 get_thread_pointer (int to_reg)
11635 rtx tp, reg, insn;
11637 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11638 if (!to_reg)
11639 return tp;
11641 reg = gen_reg_rtx (Pmode);
11642 insn = gen_rtx_SET (VOIDmode, reg, tp);
11643 insn = emit_insn (insn);
11645 return reg;
11648 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11649 false if we expect this to be used for a memory address and true if
11650 we expect to load the address into a register. */
11652 static rtx
11653 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11655 rtx dest, base, off, pic, tp;
11656 int type;
11658 switch (model)
11660 case TLS_MODEL_GLOBAL_DYNAMIC:
11661 dest = gen_reg_rtx (Pmode);
11662 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11664 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11666 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11668 start_sequence ();
11669 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11670 insns = get_insns ();
11671 end_sequence ();
11673 RTL_CONST_CALL_P (insns) = 1;
11674 emit_libcall_block (insns, dest, rax, x);
11676 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11677 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11678 else
11679 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11681 if (TARGET_GNU2_TLS)
11683 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11685 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11687 break;
11689 case TLS_MODEL_LOCAL_DYNAMIC:
11690 base = gen_reg_rtx (Pmode);
11691 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11693 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11695 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11697 start_sequence ();
11698 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11699 insns = get_insns ();
11700 end_sequence ();
11702 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11703 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11704 RTL_CONST_CALL_P (insns) = 1;
11705 emit_libcall_block (insns, base, rax, note);
11707 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11708 emit_insn (gen_tls_local_dynamic_base_64 (base));
11709 else
11710 emit_insn (gen_tls_local_dynamic_base_32 (base));
11712 if (TARGET_GNU2_TLS)
11714 rtx x = ix86_tls_module_base ();
11716 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11717 gen_rtx_MINUS (Pmode, x, tp));
11720 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11721 off = gen_rtx_CONST (Pmode, off);
11723 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11725 if (TARGET_GNU2_TLS)
11727 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11729 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11732 break;
11734 case TLS_MODEL_INITIAL_EXEC:
11735 if (TARGET_64BIT)
11737 pic = NULL;
11738 type = UNSPEC_GOTNTPOFF;
11740 else if (flag_pic)
11742 if (reload_in_progress)
11743 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11744 pic = pic_offset_table_rtx;
11745 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11747 else if (!TARGET_ANY_GNU_TLS)
11749 pic = gen_reg_rtx (Pmode);
11750 emit_insn (gen_set_got (pic));
11751 type = UNSPEC_GOTTPOFF;
11753 else
11755 pic = NULL;
11756 type = UNSPEC_INDNTPOFF;
11759 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11760 off = gen_rtx_CONST (Pmode, off);
11761 if (pic)
11762 off = gen_rtx_PLUS (Pmode, pic, off);
11763 off = gen_const_mem (Pmode, off);
11764 set_mem_alias_set (off, ix86_GOT_alias_set ());
11766 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11768 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11769 off = force_reg (Pmode, off);
11770 return gen_rtx_PLUS (Pmode, base, off);
11772 else
11774 base = get_thread_pointer (true);
11775 dest = gen_reg_rtx (Pmode);
11776 emit_insn (gen_subsi3 (dest, base, off));
11778 break;
11780 case TLS_MODEL_LOCAL_EXEC:
11781 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11782 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11783 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11784 off = gen_rtx_CONST (Pmode, off);
11786 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11788 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11789 return gen_rtx_PLUS (Pmode, base, off);
11791 else
11793 base = get_thread_pointer (true);
11794 dest = gen_reg_rtx (Pmode);
11795 emit_insn (gen_subsi3 (dest, base, off));
11797 break;
11799 default:
11800 gcc_unreachable ();
11803 return dest;
11806 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11807 to symbol DECL. */
11809 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11810 htab_t dllimport_map;
11812 static tree
11813 get_dllimport_decl (tree decl)
11815 struct tree_map *h, in;
11816 void **loc;
11817 const char *name;
11818 const char *prefix;
11819 size_t namelen, prefixlen;
11820 char *imp_name;
11821 tree to;
11822 rtx rtl;
11824 if (!dllimport_map)
11825 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11827 in.hash = htab_hash_pointer (decl);
11828 in.base.from = decl;
11829 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11830 h = (struct tree_map *) *loc;
11831 if (h)
11832 return h->to;
11834 *loc = h = ggc_alloc_tree_map ();
11835 h->hash = in.hash;
11836 h->base.from = decl;
11837 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11838 VAR_DECL, NULL, ptr_type_node);
11839 DECL_ARTIFICIAL (to) = 1;
11840 DECL_IGNORED_P (to) = 1;
11841 DECL_EXTERNAL (to) = 1;
11842 TREE_READONLY (to) = 1;
11844 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11845 name = targetm.strip_name_encoding (name);
11846 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11847 ? "*__imp_" : "*__imp__";
11848 namelen = strlen (name);
11849 prefixlen = strlen (prefix);
11850 imp_name = (char *) alloca (namelen + prefixlen + 1);
11851 memcpy (imp_name, prefix, prefixlen);
11852 memcpy (imp_name + prefixlen, name, namelen + 1);
11854 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11855 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11856 SET_SYMBOL_REF_DECL (rtl, to);
11857 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11859 rtl = gen_const_mem (Pmode, rtl);
11860 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11862 SET_DECL_RTL (to, rtl);
11863 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11865 return to;
11868 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11869 true if we require the result be a register. */
11871 static rtx
11872 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11874 tree imp_decl;
11875 rtx x;
11877 gcc_assert (SYMBOL_REF_DECL (symbol));
11878 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11880 x = DECL_RTL (imp_decl);
11881 if (want_reg)
11882 x = force_reg (Pmode, x);
11883 return x;
11886 /* Try machine-dependent ways of modifying an illegitimate address
11887 to be legitimate. If we find one, return the new, valid address.
11888 This macro is used in only one place: `memory_address' in explow.c.
11890 OLDX is the address as it was before break_out_memory_refs was called.
11891 In some cases it is useful to look at this to decide what needs to be done.
11893 It is always safe for this macro to do nothing. It exists to recognize
11894 opportunities to optimize the output.
11896 For the 80386, we handle X+REG by loading X into a register R and
11897 using R+REG. R will go in a general reg and indexing will be used.
11898 However, if REG is a broken-out memory address or multiplication,
11899 nothing needs to be done because REG can certainly go in a general reg.
11901 When -fpic is used, special handling is needed for symbolic references.
11902 See comments by legitimize_pic_address in i386.c for details. */
11904 static rtx
11905 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11906 enum machine_mode mode)
11908 int changed = 0;
11909 unsigned log;
11911 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11912 if (log)
11913 return legitimize_tls_address (x, (enum tls_model) log, false);
11914 if (GET_CODE (x) == CONST
11915 && GET_CODE (XEXP (x, 0)) == PLUS
11916 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11917 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11919 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11920 (enum tls_model) log, false);
11921 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11924 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11926 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11927 return legitimize_dllimport_symbol (x, true);
11928 if (GET_CODE (x) == CONST
11929 && GET_CODE (XEXP (x, 0)) == PLUS
11930 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11931 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11933 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11934 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11938 if (flag_pic && SYMBOLIC_CONST (x))
11939 return legitimize_pic_address (x, 0);
11941 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11942 if (GET_CODE (x) == ASHIFT
11943 && CONST_INT_P (XEXP (x, 1))
11944 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11946 changed = 1;
11947 log = INTVAL (XEXP (x, 1));
11948 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11949 GEN_INT (1 << log));
11952 if (GET_CODE (x) == PLUS)
11954 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11956 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11957 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11958 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11960 changed = 1;
11961 log = INTVAL (XEXP (XEXP (x, 0), 1));
11962 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11963 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11964 GEN_INT (1 << log));
11967 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11968 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11969 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11971 changed = 1;
11972 log = INTVAL (XEXP (XEXP (x, 1), 1));
11973 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11974 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11975 GEN_INT (1 << log));
11978 /* Put multiply first if it isn't already. */
11979 if (GET_CODE (XEXP (x, 1)) == MULT)
11981 rtx tmp = XEXP (x, 0);
11982 XEXP (x, 0) = XEXP (x, 1);
11983 XEXP (x, 1) = tmp;
11984 changed = 1;
11987 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11988 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11989 created by virtual register instantiation, register elimination, and
11990 similar optimizations. */
11991 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11993 changed = 1;
11994 x = gen_rtx_PLUS (Pmode,
11995 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11996 XEXP (XEXP (x, 1), 0)),
11997 XEXP (XEXP (x, 1), 1));
12000 /* Canonicalize
12001 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12002 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12003 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12004 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12005 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12006 && CONSTANT_P (XEXP (x, 1)))
12008 rtx constant;
12009 rtx other = NULL_RTX;
12011 if (CONST_INT_P (XEXP (x, 1)))
12013 constant = XEXP (x, 1);
12014 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12016 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12018 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12019 other = XEXP (x, 1);
12021 else
12022 constant = 0;
12024 if (constant)
12026 changed = 1;
12027 x = gen_rtx_PLUS (Pmode,
12028 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12029 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12030 plus_constant (other, INTVAL (constant)));
12034 if (changed && ix86_legitimate_address_p (mode, x, false))
12035 return x;
12037 if (GET_CODE (XEXP (x, 0)) == MULT)
12039 changed = 1;
12040 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12043 if (GET_CODE (XEXP (x, 1)) == MULT)
12045 changed = 1;
12046 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12049 if (changed
12050 && REG_P (XEXP (x, 1))
12051 && REG_P (XEXP (x, 0)))
12052 return x;
12054 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12056 changed = 1;
12057 x = legitimize_pic_address (x, 0);
12060 if (changed && ix86_legitimate_address_p (mode, x, false))
12061 return x;
12063 if (REG_P (XEXP (x, 0)))
12065 rtx temp = gen_reg_rtx (Pmode);
12066 rtx val = force_operand (XEXP (x, 1), temp);
12067 if (val != temp)
12068 emit_move_insn (temp, val);
12070 XEXP (x, 1) = temp;
12071 return x;
12074 else if (REG_P (XEXP (x, 1)))
12076 rtx temp = gen_reg_rtx (Pmode);
12077 rtx val = force_operand (XEXP (x, 0), temp);
12078 if (val != temp)
12079 emit_move_insn (temp, val);
12081 XEXP (x, 0) = temp;
12082 return x;
12086 return x;
12089 /* Print an integer constant expression in assembler syntax. Addition
12090 and subtraction are the only arithmetic that may appear in these
12091 expressions. FILE is the stdio stream to write to, X is the rtx, and
12092 CODE is the operand print code from the output string. */
12094 static void
12095 output_pic_addr_const (FILE *file, rtx x, int code)
12097 char buf[256];
12099 switch (GET_CODE (x))
12101 case PC:
12102 gcc_assert (flag_pic);
12103 putc ('.', file);
12104 break;
12106 case SYMBOL_REF:
12107 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12108 output_addr_const (file, x);
12109 else
12111 const char *name = XSTR (x, 0);
12113 /* Mark the decl as referenced so that cgraph will
12114 output the function. */
12115 if (SYMBOL_REF_DECL (x))
12116 mark_decl_referenced (SYMBOL_REF_DECL (x));
12118 #if TARGET_MACHO
12119 if (MACHOPIC_INDIRECT
12120 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12121 name = machopic_indirection_name (x, /*stub_p=*/true);
12122 #endif
12123 assemble_name (file, name);
12125 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12126 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12127 fputs ("@PLT", file);
12128 break;
12130 case LABEL_REF:
12131 x = XEXP (x, 0);
12132 /* FALLTHRU */
12133 case CODE_LABEL:
12134 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12135 assemble_name (asm_out_file, buf);
12136 break;
12138 case CONST_INT:
12139 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12140 break;
12142 case CONST:
12143 /* This used to output parentheses around the expression,
12144 but that does not work on the 386 (either ATT or BSD assembler). */
12145 output_pic_addr_const (file, XEXP (x, 0), code);
12146 break;
12148 case CONST_DOUBLE:
12149 if (GET_MODE (x) == VOIDmode)
12151 /* We can use %d if the number is <32 bits and positive. */
12152 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12153 fprintf (file, "0x%lx%08lx",
12154 (unsigned long) CONST_DOUBLE_HIGH (x),
12155 (unsigned long) CONST_DOUBLE_LOW (x));
12156 else
12157 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12159 else
12160 /* We can't handle floating point constants;
12161 TARGET_PRINT_OPERAND must handle them. */
12162 output_operand_lossage ("floating constant misused");
12163 break;
12165 case PLUS:
12166 /* Some assemblers need integer constants to appear first. */
12167 if (CONST_INT_P (XEXP (x, 0)))
12169 output_pic_addr_const (file, XEXP (x, 0), code);
12170 putc ('+', file);
12171 output_pic_addr_const (file, XEXP (x, 1), code);
12173 else
12175 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12176 output_pic_addr_const (file, XEXP (x, 1), code);
12177 putc ('+', file);
12178 output_pic_addr_const (file, XEXP (x, 0), code);
12180 break;
12182 case MINUS:
12183 if (!TARGET_MACHO)
12184 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12185 output_pic_addr_const (file, XEXP (x, 0), code);
12186 putc ('-', file);
12187 output_pic_addr_const (file, XEXP (x, 1), code);
12188 if (!TARGET_MACHO)
12189 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12190 break;
12192 case UNSPEC:
12193 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12195 bool f = i386_asm_output_addr_const_extra (file, x);
12196 gcc_assert (f);
12197 break;
12200 gcc_assert (XVECLEN (x, 0) == 1);
12201 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12202 switch (XINT (x, 1))
12204 case UNSPEC_GOT:
12205 fputs ("@GOT", file);
12206 break;
12207 case UNSPEC_GOTOFF:
12208 fputs ("@GOTOFF", file);
12209 break;
12210 case UNSPEC_PLTOFF:
12211 fputs ("@PLTOFF", file);
12212 break;
12213 case UNSPEC_GOTPCREL:
12214 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12215 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12216 break;
12217 case UNSPEC_GOTTPOFF:
12218 /* FIXME: This might be @TPOFF in Sun ld too. */
12219 fputs ("@gottpoff", file);
12220 break;
12221 case UNSPEC_TPOFF:
12222 fputs ("@tpoff", file);
12223 break;
12224 case UNSPEC_NTPOFF:
12225 if (TARGET_64BIT)
12226 fputs ("@tpoff", file);
12227 else
12228 fputs ("@ntpoff", file);
12229 break;
12230 case UNSPEC_DTPOFF:
12231 fputs ("@dtpoff", file);
12232 break;
12233 case UNSPEC_GOTNTPOFF:
12234 if (TARGET_64BIT)
12235 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12236 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12237 else
12238 fputs ("@gotntpoff", file);
12239 break;
12240 case UNSPEC_INDNTPOFF:
12241 fputs ("@indntpoff", file);
12242 break;
12243 #if TARGET_MACHO
12244 case UNSPEC_MACHOPIC_OFFSET:
12245 putc ('-', file);
12246 machopic_output_function_base_name (file);
12247 break;
12248 #endif
12249 default:
12250 output_operand_lossage ("invalid UNSPEC as operand");
12251 break;
12253 break;
12255 default:
12256 output_operand_lossage ("invalid expression as operand");
12260 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12261 We need to emit DTP-relative relocations. */
12263 static void ATTRIBUTE_UNUSED
12264 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12266 fputs (ASM_LONG, file);
12267 output_addr_const (file, x);
12268 fputs ("@dtpoff", file);
12269 switch (size)
12271 case 4:
12272 break;
12273 case 8:
12274 fputs (", 0", file);
12275 break;
12276 default:
12277 gcc_unreachable ();
12281 /* Return true if X is a representation of the PIC register. This copes
12282 with calls from ix86_find_base_term, where the register might have
12283 been replaced by a cselib value. */
12285 static bool
12286 ix86_pic_register_p (rtx x)
12288 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12289 return (pic_offset_table_rtx
12290 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12291 else
12292 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12295 /* In the name of slightly smaller debug output, and to cater to
12296 general assembler lossage, recognize PIC+GOTOFF and turn it back
12297 into a direct symbol reference.
12299 On Darwin, this is necessary to avoid a crash, because Darwin
12300 has a different PIC label for each routine but the DWARF debugging
12301 information is not associated with any particular routine, so it's
12302 necessary to remove references to the PIC label from RTL stored by
12303 the DWARF output code. */
12305 static rtx
12306 ix86_delegitimize_address (rtx x)
12308 rtx orig_x = delegitimize_mem_from_attrs (x);
12309 /* addend is NULL or some rtx if x is something+GOTOFF where
12310 something doesn't include the PIC register. */
12311 rtx addend = NULL_RTX;
12312 /* reg_addend is NULL or a multiple of some register. */
12313 rtx reg_addend = NULL_RTX;
12314 /* const_addend is NULL or a const_int. */
12315 rtx const_addend = NULL_RTX;
12316 /* This is the result, or NULL. */
12317 rtx result = NULL_RTX;
12319 x = orig_x;
12321 if (MEM_P (x))
12322 x = XEXP (x, 0);
12324 if (TARGET_64BIT)
12326 if (GET_CODE (x) != CONST
12327 || GET_CODE (XEXP (x, 0)) != UNSPEC
12328 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12329 || !MEM_P (orig_x))
12330 return orig_x;
12331 x = XVECEXP (XEXP (x, 0), 0, 0);
12332 if (GET_MODE (orig_x) != Pmode)
12333 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12334 return x;
12337 if (GET_CODE (x) != PLUS
12338 || GET_CODE (XEXP (x, 1)) != CONST)
12339 return orig_x;
12341 if (ix86_pic_register_p (XEXP (x, 0)))
12342 /* %ebx + GOT/GOTOFF */
12344 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12346 /* %ebx + %reg * scale + GOT/GOTOFF */
12347 reg_addend = XEXP (x, 0);
12348 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12349 reg_addend = XEXP (reg_addend, 1);
12350 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12351 reg_addend = XEXP (reg_addend, 0);
12352 else
12354 reg_addend = NULL_RTX;
12355 addend = XEXP (x, 0);
12358 else
12359 addend = XEXP (x, 0);
12361 x = XEXP (XEXP (x, 1), 0);
12362 if (GET_CODE (x) == PLUS
12363 && CONST_INT_P (XEXP (x, 1)))
12365 const_addend = XEXP (x, 1);
12366 x = XEXP (x, 0);
12369 if (GET_CODE (x) == UNSPEC
12370 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12371 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12372 result = XVECEXP (x, 0, 0);
12374 if (TARGET_MACHO && darwin_local_data_pic (x)
12375 && !MEM_P (orig_x))
12376 result = XVECEXP (x, 0, 0);
12378 if (! result)
12379 return orig_x;
12381 if (const_addend)
12382 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12383 if (reg_addend)
12384 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12385 if (addend)
12387 /* If the rest of original X doesn't involve the PIC register, add
12388 addend and subtract pic_offset_table_rtx. This can happen e.g.
12389 for code like:
12390 leal (%ebx, %ecx, 4), %ecx
12392 movl foo@GOTOFF(%ecx), %edx
12393 in which case we return (%ecx - %ebx) + foo. */
12394 if (pic_offset_table_rtx)
12395 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12396 pic_offset_table_rtx),
12397 result);
12398 else
12399 return orig_x;
12401 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12402 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12403 return result;
12406 /* If X is a machine specific address (i.e. a symbol or label being
12407 referenced as a displacement from the GOT implemented using an
12408 UNSPEC), then return the base term. Otherwise return X. */
12411 ix86_find_base_term (rtx x)
12413 rtx term;
12415 if (TARGET_64BIT)
12417 if (GET_CODE (x) != CONST)
12418 return x;
12419 term = XEXP (x, 0);
12420 if (GET_CODE (term) == PLUS
12421 && (CONST_INT_P (XEXP (term, 1))
12422 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12423 term = XEXP (term, 0);
12424 if (GET_CODE (term) != UNSPEC
12425 || XINT (term, 1) != UNSPEC_GOTPCREL)
12426 return x;
12428 return XVECEXP (term, 0, 0);
12431 return ix86_delegitimize_address (x);
12434 static void
12435 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12436 int fp, FILE *file)
12438 const char *suffix;
12440 if (mode == CCFPmode || mode == CCFPUmode)
12442 code = ix86_fp_compare_code_to_integer (code);
12443 mode = CCmode;
12445 if (reverse)
12446 code = reverse_condition (code);
12448 switch (code)
12450 case EQ:
12451 switch (mode)
12453 case CCAmode:
12454 suffix = "a";
12455 break;
12457 case CCCmode:
12458 suffix = "c";
12459 break;
12461 case CCOmode:
12462 suffix = "o";
12463 break;
12465 case CCSmode:
12466 suffix = "s";
12467 break;
12469 default:
12470 suffix = "e";
12472 break;
12473 case NE:
12474 switch (mode)
12476 case CCAmode:
12477 suffix = "na";
12478 break;
12480 case CCCmode:
12481 suffix = "nc";
12482 break;
12484 case CCOmode:
12485 suffix = "no";
12486 break;
12488 case CCSmode:
12489 suffix = "ns";
12490 break;
12492 default:
12493 suffix = "ne";
12495 break;
12496 case GT:
12497 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12498 suffix = "g";
12499 break;
12500 case GTU:
12501 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12502 Those same assemblers have the same but opposite lossage on cmov. */
12503 if (mode == CCmode)
12504 suffix = fp ? "nbe" : "a";
12505 else if (mode == CCCmode)
12506 suffix = "b";
12507 else
12508 gcc_unreachable ();
12509 break;
12510 case LT:
12511 switch (mode)
12513 case CCNOmode:
12514 case CCGOCmode:
12515 suffix = "s";
12516 break;
12518 case CCmode:
12519 case CCGCmode:
12520 suffix = "l";
12521 break;
12523 default:
12524 gcc_unreachable ();
12526 break;
12527 case LTU:
12528 gcc_assert (mode == CCmode || mode == CCCmode);
12529 suffix = "b";
12530 break;
12531 case GE:
12532 switch (mode)
12534 case CCNOmode:
12535 case CCGOCmode:
12536 suffix = "ns";
12537 break;
12539 case CCmode:
12540 case CCGCmode:
12541 suffix = "ge";
12542 break;
12544 default:
12545 gcc_unreachable ();
12547 break;
12548 case GEU:
12549 /* ??? As above. */
12550 gcc_assert (mode == CCmode || mode == CCCmode);
12551 suffix = fp ? "nb" : "ae";
12552 break;
12553 case LE:
12554 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12555 suffix = "le";
12556 break;
12557 case LEU:
12558 /* ??? As above. */
12559 if (mode == CCmode)
12560 suffix = "be";
12561 else if (mode == CCCmode)
12562 suffix = fp ? "nb" : "ae";
12563 else
12564 gcc_unreachable ();
12565 break;
12566 case UNORDERED:
12567 suffix = fp ? "u" : "p";
12568 break;
12569 case ORDERED:
12570 suffix = fp ? "nu" : "np";
12571 break;
12572 default:
12573 gcc_unreachable ();
12575 fputs (suffix, file);
12578 /* Print the name of register X to FILE based on its machine mode and number.
12579 If CODE is 'w', pretend the mode is HImode.
12580 If CODE is 'b', pretend the mode is QImode.
12581 If CODE is 'k', pretend the mode is SImode.
12582 If CODE is 'q', pretend the mode is DImode.
12583 If CODE is 'x', pretend the mode is V4SFmode.
12584 If CODE is 't', pretend the mode is V8SFmode.
12585 If CODE is 'h', pretend the reg is the 'high' byte register.
12586 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12587 If CODE is 'd', duplicate the operand for AVX instruction.
12590 void
12591 print_reg (rtx x, int code, FILE *file)
12593 const char *reg;
12594 bool duplicated = code == 'd' && TARGET_AVX;
12596 gcc_assert (x == pc_rtx
12597 || (REGNO (x) != ARG_POINTER_REGNUM
12598 && REGNO (x) != FRAME_POINTER_REGNUM
12599 && REGNO (x) != FLAGS_REG
12600 && REGNO (x) != FPSR_REG
12601 && REGNO (x) != FPCR_REG));
12603 if (ASSEMBLER_DIALECT == ASM_ATT)
12604 putc ('%', file);
12606 if (x == pc_rtx)
12608 gcc_assert (TARGET_64BIT);
12609 fputs ("rip", file);
12610 return;
12613 if (code == 'w' || MMX_REG_P (x))
12614 code = 2;
12615 else if (code == 'b')
12616 code = 1;
12617 else if (code == 'k')
12618 code = 4;
12619 else if (code == 'q')
12620 code = 8;
12621 else if (code == 'y')
12622 code = 3;
12623 else if (code == 'h')
12624 code = 0;
12625 else if (code == 'x')
12626 code = 16;
12627 else if (code == 't')
12628 code = 32;
12629 else
12630 code = GET_MODE_SIZE (GET_MODE (x));
12632 /* Irritatingly, AMD extended registers use different naming convention
12633 from the normal registers. */
12634 if (REX_INT_REG_P (x))
12636 gcc_assert (TARGET_64BIT);
12637 switch (code)
12639 case 0:
12640 error ("extended registers have no high halves");
12641 break;
12642 case 1:
12643 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12644 break;
12645 case 2:
12646 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12647 break;
12648 case 4:
12649 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12650 break;
12651 case 8:
12652 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12653 break;
12654 default:
12655 error ("unsupported operand size for extended register");
12656 break;
12658 return;
12661 reg = NULL;
12662 switch (code)
12664 case 3:
12665 if (STACK_TOP_P (x))
12667 reg = "st(0)";
12668 break;
12670 /* FALLTHRU */
12671 case 8:
12672 case 4:
12673 case 12:
12674 if (! ANY_FP_REG_P (x))
12675 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12676 /* FALLTHRU */
12677 case 16:
12678 case 2:
12679 normal:
12680 reg = hi_reg_name[REGNO (x)];
12681 break;
12682 case 1:
12683 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12684 goto normal;
12685 reg = qi_reg_name[REGNO (x)];
12686 break;
12687 case 0:
12688 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12689 goto normal;
12690 reg = qi_high_reg_name[REGNO (x)];
12691 break;
12692 case 32:
12693 if (SSE_REG_P (x))
12695 gcc_assert (!duplicated);
12696 putc ('y', file);
12697 fputs (hi_reg_name[REGNO (x)] + 1, file);
12698 return;
12700 break;
12701 default:
12702 gcc_unreachable ();
12705 fputs (reg, file);
12706 if (duplicated)
12708 if (ASSEMBLER_DIALECT == ASM_ATT)
12709 fprintf (file, ", %%%s", reg);
12710 else
12711 fprintf (file, ", %s", reg);
12715 /* Locate some local-dynamic symbol still in use by this function
12716 so that we can print its name in some tls_local_dynamic_base
12717 pattern. */
12719 static int
12720 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12722 rtx x = *px;
12724 if (GET_CODE (x) == SYMBOL_REF
12725 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12727 cfun->machine->some_ld_name = XSTR (x, 0);
12728 return 1;
12731 return 0;
12734 static const char *
12735 get_some_local_dynamic_name (void)
12737 rtx insn;
12739 if (cfun->machine->some_ld_name)
12740 return cfun->machine->some_ld_name;
12742 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12743 if (NONDEBUG_INSN_P (insn)
12744 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12745 return cfun->machine->some_ld_name;
12747 return NULL;
12750 /* Meaning of CODE:
12751 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12752 C -- print opcode suffix for set/cmov insn.
12753 c -- like C, but print reversed condition
12754 F,f -- likewise, but for floating-point.
12755 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12756 otherwise nothing
12757 R -- print the prefix for register names.
12758 z -- print the opcode suffix for the size of the current operand.
12759 Z -- likewise, with special suffixes for x87 instructions.
12760 * -- print a star (in certain assembler syntax)
12761 A -- print an absolute memory reference.
12762 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12763 s -- print a shift double count, followed by the assemblers argument
12764 delimiter.
12765 b -- print the QImode name of the register for the indicated operand.
12766 %b0 would print %al if operands[0] is reg 0.
12767 w -- likewise, print the HImode name of the register.
12768 k -- likewise, print the SImode name of the register.
12769 q -- likewise, print the DImode name of the register.
12770 x -- likewise, print the V4SFmode name of the register.
12771 t -- likewise, print the V8SFmode name of the register.
12772 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12773 y -- print "st(0)" instead of "st" as a register.
12774 d -- print duplicated register operand for AVX instruction.
12775 D -- print condition for SSE cmp instruction.
12776 P -- if PIC, print an @PLT suffix.
12777 X -- don't print any sort of PIC '@' suffix for a symbol.
12778 & -- print some in-use local-dynamic symbol name.
12779 H -- print a memory address offset by 8; used for sse high-parts
12780 Y -- print condition for XOP pcom* instruction.
12781 + -- print a branch hint as 'cs' or 'ds' prefix
12782 ; -- print a semicolon (after prefixes due to bug in older gas).
12783 @ -- print a segment register of thread base pointer load
12786 void
12787 ix86_print_operand (FILE *file, rtx x, int code)
12789 if (code)
12791 switch (code)
12793 case '*':
12794 if (ASSEMBLER_DIALECT == ASM_ATT)
12795 putc ('*', file);
12796 return;
12798 case '&':
12800 const char *name = get_some_local_dynamic_name ();
12801 if (name == NULL)
12802 output_operand_lossage ("'%%&' used without any "
12803 "local dynamic TLS references");
12804 else
12805 assemble_name (file, name);
12806 return;
12809 case 'A':
12810 switch (ASSEMBLER_DIALECT)
12812 case ASM_ATT:
12813 putc ('*', file);
12814 break;
12816 case ASM_INTEL:
12817 /* Intel syntax. For absolute addresses, registers should not
12818 be surrounded by braces. */
12819 if (!REG_P (x))
12821 putc ('[', file);
12822 ix86_print_operand (file, x, 0);
12823 putc (']', file);
12824 return;
12826 break;
12828 default:
12829 gcc_unreachable ();
12832 ix86_print_operand (file, x, 0);
12833 return;
12836 case 'L':
12837 if (ASSEMBLER_DIALECT == ASM_ATT)
12838 putc ('l', file);
12839 return;
12841 case 'W':
12842 if (ASSEMBLER_DIALECT == ASM_ATT)
12843 putc ('w', file);
12844 return;
12846 case 'B':
12847 if (ASSEMBLER_DIALECT == ASM_ATT)
12848 putc ('b', file);
12849 return;
12851 case 'Q':
12852 if (ASSEMBLER_DIALECT == ASM_ATT)
12853 putc ('l', file);
12854 return;
12856 case 'S':
12857 if (ASSEMBLER_DIALECT == ASM_ATT)
12858 putc ('s', file);
12859 return;
12861 case 'T':
12862 if (ASSEMBLER_DIALECT == ASM_ATT)
12863 putc ('t', file);
12864 return;
12866 case 'z':
12867 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12869 /* Opcodes don't get size suffixes if using Intel opcodes. */
12870 if (ASSEMBLER_DIALECT == ASM_INTEL)
12871 return;
12873 switch (GET_MODE_SIZE (GET_MODE (x)))
12875 case 1:
12876 putc ('b', file);
12877 return;
12879 case 2:
12880 putc ('w', file);
12881 return;
12883 case 4:
12884 putc ('l', file);
12885 return;
12887 case 8:
12888 putc ('q', file);
12889 return;
12891 default:
12892 output_operand_lossage
12893 ("invalid operand size for operand code '%c'", code);
12894 return;
12898 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12899 warning
12900 (0, "non-integer operand used with operand code '%c'", code);
12901 /* FALLTHRU */
12903 case 'Z':
12904 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12905 if (ASSEMBLER_DIALECT == ASM_INTEL)
12906 return;
12908 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12910 switch (GET_MODE_SIZE (GET_MODE (x)))
12912 case 2:
12913 #ifdef HAVE_AS_IX86_FILDS
12914 putc ('s', file);
12915 #endif
12916 return;
12918 case 4:
12919 putc ('l', file);
12920 return;
12922 case 8:
12923 #ifdef HAVE_AS_IX86_FILDQ
12924 putc ('q', file);
12925 #else
12926 fputs ("ll", file);
12927 #endif
12928 return;
12930 default:
12931 break;
12934 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12936 /* 387 opcodes don't get size suffixes
12937 if the operands are registers. */
12938 if (STACK_REG_P (x))
12939 return;
12941 switch (GET_MODE_SIZE (GET_MODE (x)))
12943 case 4:
12944 putc ('s', file);
12945 return;
12947 case 8:
12948 putc ('l', file);
12949 return;
12951 case 12:
12952 case 16:
12953 putc ('t', file);
12954 return;
12956 default:
12957 break;
12960 else
12962 output_operand_lossage
12963 ("invalid operand type used with operand code '%c'", code);
12964 return;
12967 output_operand_lossage
12968 ("invalid operand size for operand code '%c'", code);
12969 return;
12971 case 'd':
12972 case 'b':
12973 case 'w':
12974 case 'k':
12975 case 'q':
12976 case 'h':
12977 case 't':
12978 case 'y':
12979 case 'x':
12980 case 'X':
12981 case 'P':
12982 break;
12984 case 's':
12985 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12987 ix86_print_operand (file, x, 0);
12988 fputs (", ", file);
12990 return;
12992 case 'D':
12993 /* Little bit of braindamage here. The SSE compare instructions
12994 does use completely different names for the comparisons that the
12995 fp conditional moves. */
12996 if (TARGET_AVX)
12998 switch (GET_CODE (x))
13000 case EQ:
13001 fputs ("eq", file);
13002 break;
13003 case UNEQ:
13004 fputs ("eq_us", file);
13005 break;
13006 case LT:
13007 fputs ("lt", file);
13008 break;
13009 case UNLT:
13010 fputs ("nge", file);
13011 break;
13012 case LE:
13013 fputs ("le", file);
13014 break;
13015 case UNLE:
13016 fputs ("ngt", file);
13017 break;
13018 case UNORDERED:
13019 fputs ("unord", file);
13020 break;
13021 case NE:
13022 fputs ("neq", file);
13023 break;
13024 case LTGT:
13025 fputs ("neq_oq", file);
13026 break;
13027 case GE:
13028 fputs ("ge", file);
13029 break;
13030 case UNGE:
13031 fputs ("nlt", file);
13032 break;
13033 case GT:
13034 fputs ("gt", file);
13035 break;
13036 case UNGT:
13037 fputs ("nle", file);
13038 break;
13039 case ORDERED:
13040 fputs ("ord", file);
13041 break;
13042 default:
13043 output_operand_lossage ("operand is not a condition code, "
13044 "invalid operand code 'D'");
13045 return;
13048 else
13050 switch (GET_CODE (x))
13052 case EQ:
13053 case UNEQ:
13054 fputs ("eq", file);
13055 break;
13056 case LT:
13057 case UNLT:
13058 fputs ("lt", file);
13059 break;
13060 case LE:
13061 case UNLE:
13062 fputs ("le", file);
13063 break;
13064 case UNORDERED:
13065 fputs ("unord", file);
13066 break;
13067 case NE:
13068 case LTGT:
13069 fputs ("neq", file);
13070 break;
13071 case UNGE:
13072 case GE:
13073 fputs ("nlt", file);
13074 break;
13075 case UNGT:
13076 case GT:
13077 fputs ("nle", file);
13078 break;
13079 case ORDERED:
13080 fputs ("ord", file);
13081 break;
13082 default:
13083 output_operand_lossage ("operand is not a condition code, "
13084 "invalid operand code 'D'");
13085 return;
13088 return;
13089 case 'O':
13090 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13091 if (ASSEMBLER_DIALECT == ASM_ATT)
13093 switch (GET_MODE (x))
13095 case HImode: putc ('w', file); break;
13096 case SImode:
13097 case SFmode: putc ('l', file); break;
13098 case DImode:
13099 case DFmode: putc ('q', file); break;
13100 default: gcc_unreachable ();
13102 putc ('.', file);
13104 #endif
13105 return;
13106 case 'C':
13107 if (!COMPARISON_P (x))
13109 output_operand_lossage ("operand is neither a constant nor a "
13110 "condition code, invalid operand code "
13111 "'C'");
13112 return;
13114 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13115 return;
13116 case 'F':
13117 if (!COMPARISON_P (x))
13119 output_operand_lossage ("operand is neither a constant nor a "
13120 "condition code, invalid operand code "
13121 "'F'");
13122 return;
13124 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13125 if (ASSEMBLER_DIALECT == ASM_ATT)
13126 putc ('.', file);
13127 #endif
13128 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13129 return;
13131 /* Like above, but reverse condition */
13132 case 'c':
13133 /* Check to see if argument to %c is really a constant
13134 and not a condition code which needs to be reversed. */
13135 if (!COMPARISON_P (x))
13137 output_operand_lossage ("operand is neither a constant nor a "
13138 "condition code, invalid operand "
13139 "code 'c'");
13140 return;
13142 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13143 return;
13144 case 'f':
13145 if (!COMPARISON_P (x))
13147 output_operand_lossage ("operand is neither a constant nor a "
13148 "condition code, invalid operand "
13149 "code 'f'");
13150 return;
13152 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13153 if (ASSEMBLER_DIALECT == ASM_ATT)
13154 putc ('.', file);
13155 #endif
13156 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13157 return;
13159 case 'H':
13160 /* It doesn't actually matter what mode we use here, as we're
13161 only going to use this for printing. */
13162 x = adjust_address_nv (x, DImode, 8);
13163 break;
13165 case '+':
13167 rtx x;
13169 if (!optimize
13170 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13171 return;
13173 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13174 if (x)
13176 int pred_val = INTVAL (XEXP (x, 0));
13178 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13179 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13181 int taken = pred_val > REG_BR_PROB_BASE / 2;
13182 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13184 /* Emit hints only in the case default branch prediction
13185 heuristics would fail. */
13186 if (taken != cputaken)
13188 /* We use 3e (DS) prefix for taken branches and
13189 2e (CS) prefix for not taken branches. */
13190 if (taken)
13191 fputs ("ds ; ", file);
13192 else
13193 fputs ("cs ; ", file);
13197 return;
13200 case 'Y':
13201 switch (GET_CODE (x))
13203 case NE:
13204 fputs ("neq", file);
13205 break;
13206 case EQ:
13207 fputs ("eq", file);
13208 break;
13209 case GE:
13210 case GEU:
13211 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13212 break;
13213 case GT:
13214 case GTU:
13215 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13216 break;
13217 case LE:
13218 case LEU:
13219 fputs ("le", file);
13220 break;
13221 case LT:
13222 case LTU:
13223 fputs ("lt", file);
13224 break;
13225 case UNORDERED:
13226 fputs ("unord", file);
13227 break;
13228 case ORDERED:
13229 fputs ("ord", file);
13230 break;
13231 case UNEQ:
13232 fputs ("ueq", file);
13233 break;
13234 case UNGE:
13235 fputs ("nlt", file);
13236 break;
13237 case UNGT:
13238 fputs ("nle", file);
13239 break;
13240 case UNLE:
13241 fputs ("ule", file);
13242 break;
13243 case UNLT:
13244 fputs ("ult", file);
13245 break;
13246 case LTGT:
13247 fputs ("une", file);
13248 break;
13249 default:
13250 output_operand_lossage ("operand is not a condition code, "
13251 "invalid operand code 'Y'");
13252 return;
13254 return;
13256 case ';':
13257 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13258 putc (';', file);
13259 #endif
13260 return;
13262 case '@':
13263 if (ASSEMBLER_DIALECT == ASM_ATT)
13264 putc ('%', file);
13266 /* The kernel uses a different segment register for performance
13267 reasons; a system call would not have to trash the userspace
13268 segment register, which would be expensive. */
13269 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13270 fputs ("fs", file);
13271 else
13272 fputs ("gs", file);
13273 return;
13275 default:
13276 output_operand_lossage ("invalid operand code '%c'", code);
13280 if (REG_P (x))
13281 print_reg (x, code, file);
13283 else if (MEM_P (x))
13285 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13286 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13287 && GET_MODE (x) != BLKmode)
13289 const char * size;
13290 switch (GET_MODE_SIZE (GET_MODE (x)))
13292 case 1: size = "BYTE"; break;
13293 case 2: size = "WORD"; break;
13294 case 4: size = "DWORD"; break;
13295 case 8: size = "QWORD"; break;
13296 case 12: size = "TBYTE"; break;
13297 case 16:
13298 if (GET_MODE (x) == XFmode)
13299 size = "TBYTE";
13300 else
13301 size = "XMMWORD";
13302 break;
13303 case 32: size = "YMMWORD"; break;
13304 default:
13305 gcc_unreachable ();
13308 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13309 if (code == 'b')
13310 size = "BYTE";
13311 else if (code == 'w')
13312 size = "WORD";
13313 else if (code == 'k')
13314 size = "DWORD";
13316 fputs (size, file);
13317 fputs (" PTR ", file);
13320 x = XEXP (x, 0);
13321 /* Avoid (%rip) for call operands. */
13322 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13323 && !CONST_INT_P (x))
13324 output_addr_const (file, x);
13325 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13326 output_operand_lossage ("invalid constraints for operand");
13327 else
13328 output_address (x);
13331 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13333 REAL_VALUE_TYPE r;
13334 long l;
13336 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13337 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13339 if (ASSEMBLER_DIALECT == ASM_ATT)
13340 putc ('$', file);
13341 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13342 if (code == 'q')
13343 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13344 else
13345 fprintf (file, "0x%08x", (unsigned int) l);
13348 /* These float cases don't actually occur as immediate operands. */
13349 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13351 char dstr[30];
13353 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13354 fputs (dstr, file);
13357 else if (GET_CODE (x) == CONST_DOUBLE
13358 && GET_MODE (x) == XFmode)
13360 char dstr[30];
13362 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13363 fputs (dstr, file);
13366 else
13368 /* We have patterns that allow zero sets of memory, for instance.
13369 In 64-bit mode, we should probably support all 8-byte vectors,
13370 since we can in fact encode that into an immediate. */
13371 if (GET_CODE (x) == CONST_VECTOR)
13373 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13374 x = const0_rtx;
13377 if (code != 'P')
13379 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13381 if (ASSEMBLER_DIALECT == ASM_ATT)
13382 putc ('$', file);
13384 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13385 || GET_CODE (x) == LABEL_REF)
13387 if (ASSEMBLER_DIALECT == ASM_ATT)
13388 putc ('$', file);
13389 else
13390 fputs ("OFFSET FLAT:", file);
13393 if (CONST_INT_P (x))
13394 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13395 else if (flag_pic)
13396 output_pic_addr_const (file, x, code);
13397 else
13398 output_addr_const (file, x);
13402 static bool
13403 ix86_print_operand_punct_valid_p (unsigned char code)
13405 return (code == '@' || code == '*' || code == '+'
13406 || code == '&' || code == ';');
13409 /* Print a memory operand whose address is ADDR. */
13411 static void
13412 ix86_print_operand_address (FILE *file, rtx addr)
13414 struct ix86_address parts;
13415 rtx base, index, disp;
13416 int scale;
13417 int ok = ix86_decompose_address (addr, &parts);
13419 gcc_assert (ok);
13421 base = parts.base;
13422 index = parts.index;
13423 disp = parts.disp;
13424 scale = parts.scale;
13426 switch (parts.seg)
13428 case SEG_DEFAULT:
13429 break;
13430 case SEG_FS:
13431 case SEG_GS:
13432 if (ASSEMBLER_DIALECT == ASM_ATT)
13433 putc ('%', file);
13434 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13435 break;
13436 default:
13437 gcc_unreachable ();
13440 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13441 if (TARGET_64BIT && !base && !index)
13443 rtx symbol = disp;
13445 if (GET_CODE (disp) == CONST
13446 && GET_CODE (XEXP (disp, 0)) == PLUS
13447 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13448 symbol = XEXP (XEXP (disp, 0), 0);
13450 if (GET_CODE (symbol) == LABEL_REF
13451 || (GET_CODE (symbol) == SYMBOL_REF
13452 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13453 base = pc_rtx;
13455 if (!base && !index)
13457 /* Displacement only requires special attention. */
13459 if (CONST_INT_P (disp))
13461 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13462 fputs ("ds:", file);
13463 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13465 else if (flag_pic)
13466 output_pic_addr_const (file, disp, 0);
13467 else
13468 output_addr_const (file, disp);
13470 else
13472 if (ASSEMBLER_DIALECT == ASM_ATT)
13474 if (disp)
13476 if (flag_pic)
13477 output_pic_addr_const (file, disp, 0);
13478 else if (GET_CODE (disp) == LABEL_REF)
13479 output_asm_label (disp);
13480 else
13481 output_addr_const (file, disp);
13484 putc ('(', file);
13485 if (base)
13486 print_reg (base, 0, file);
13487 if (index)
13489 putc (',', file);
13490 print_reg (index, 0, file);
13491 if (scale != 1)
13492 fprintf (file, ",%d", scale);
13494 putc (')', file);
13496 else
13498 rtx offset = NULL_RTX;
13500 if (disp)
13502 /* Pull out the offset of a symbol; print any symbol itself. */
13503 if (GET_CODE (disp) == CONST
13504 && GET_CODE (XEXP (disp, 0)) == PLUS
13505 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13507 offset = XEXP (XEXP (disp, 0), 1);
13508 disp = gen_rtx_CONST (VOIDmode,
13509 XEXP (XEXP (disp, 0), 0));
13512 if (flag_pic)
13513 output_pic_addr_const (file, disp, 0);
13514 else if (GET_CODE (disp) == LABEL_REF)
13515 output_asm_label (disp);
13516 else if (CONST_INT_P (disp))
13517 offset = disp;
13518 else
13519 output_addr_const (file, disp);
13522 putc ('[', file);
13523 if (base)
13525 print_reg (base, 0, file);
13526 if (offset)
13528 if (INTVAL (offset) >= 0)
13529 putc ('+', file);
13530 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13533 else if (offset)
13534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13535 else
13536 putc ('0', file);
13538 if (index)
13540 putc ('+', file);
13541 print_reg (index, 0, file);
13542 if (scale != 1)
13543 fprintf (file, "*%d", scale);
13545 putc (']', file);
13550 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13552 static bool
13553 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13555 rtx op;
13557 if (GET_CODE (x) != UNSPEC)
13558 return false;
13560 op = XVECEXP (x, 0, 0);
13561 switch (XINT (x, 1))
13563 case UNSPEC_GOTTPOFF:
13564 output_addr_const (file, op);
13565 /* FIXME: This might be @TPOFF in Sun ld. */
13566 fputs ("@gottpoff", file);
13567 break;
13568 case UNSPEC_TPOFF:
13569 output_addr_const (file, op);
13570 fputs ("@tpoff", file);
13571 break;
13572 case UNSPEC_NTPOFF:
13573 output_addr_const (file, op);
13574 if (TARGET_64BIT)
13575 fputs ("@tpoff", file);
13576 else
13577 fputs ("@ntpoff", file);
13578 break;
13579 case UNSPEC_DTPOFF:
13580 output_addr_const (file, op);
13581 fputs ("@dtpoff", file);
13582 break;
13583 case UNSPEC_GOTNTPOFF:
13584 output_addr_const (file, op);
13585 if (TARGET_64BIT)
13586 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13587 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13588 else
13589 fputs ("@gotntpoff", file);
13590 break;
13591 case UNSPEC_INDNTPOFF:
13592 output_addr_const (file, op);
13593 fputs ("@indntpoff", file);
13594 break;
13595 #if TARGET_MACHO
13596 case UNSPEC_MACHOPIC_OFFSET:
13597 output_addr_const (file, op);
13598 putc ('-', file);
13599 machopic_output_function_base_name (file);
13600 break;
13601 #endif
13603 case UNSPEC_STACK_CHECK:
13605 int offset;
13607 gcc_assert (flag_split_stack);
13609 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
13610 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
13611 #else
13612 gcc_unreachable ();
13613 #endif
13615 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
13617 break;
13619 default:
13620 return false;
13623 return true;
13626 /* Split one or more DImode RTL references into pairs of SImode
13627 references. The RTL can be REG, offsettable MEM, integer constant, or
13628 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13629 split and "num" is its length. lo_half and hi_half are output arrays
13630 that parallel "operands". */
13632 void
13633 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13635 while (num--)
13637 rtx op = operands[num];
13639 /* simplify_subreg refuse to split volatile memory addresses,
13640 but we still have to handle it. */
13641 if (MEM_P (op))
13643 lo_half[num] = adjust_address (op, SImode, 0);
13644 hi_half[num] = adjust_address (op, SImode, 4);
13646 else
13648 lo_half[num] = simplify_gen_subreg (SImode, op,
13649 GET_MODE (op) == VOIDmode
13650 ? DImode : GET_MODE (op), 0);
13651 hi_half[num] = simplify_gen_subreg (SImode, op,
13652 GET_MODE (op) == VOIDmode
13653 ? DImode : GET_MODE (op), 4);
13657 /* Split one or more TImode RTL references into pairs of DImode
13658 references. The RTL can be REG, offsettable MEM, integer constant, or
13659 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13660 split and "num" is its length. lo_half and hi_half are output arrays
13661 that parallel "operands". */
13663 void
13664 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13666 while (num--)
13668 rtx op = operands[num];
13670 /* simplify_subreg refuse to split volatile memory addresses, but we
13671 still have to handle it. */
13672 if (MEM_P (op))
13674 lo_half[num] = adjust_address (op, DImode, 0);
13675 hi_half[num] = adjust_address (op, DImode, 8);
13677 else
13679 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13680 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13685 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13686 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13687 is the expression of the binary operation. The output may either be
13688 emitted here, or returned to the caller, like all output_* functions.
13690 There is no guarantee that the operands are the same mode, as they
13691 might be within FLOAT or FLOAT_EXTEND expressions. */
13693 #ifndef SYSV386_COMPAT
13694 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13695 wants to fix the assemblers because that causes incompatibility
13696 with gcc. No-one wants to fix gcc because that causes
13697 incompatibility with assemblers... You can use the option of
13698 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13699 #define SYSV386_COMPAT 1
13700 #endif
13702 const char *
13703 output_387_binary_op (rtx insn, rtx *operands)
13705 static char buf[40];
13706 const char *p;
13707 const char *ssep;
13708 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13710 #ifdef ENABLE_CHECKING
13711 /* Even if we do not want to check the inputs, this documents input
13712 constraints. Which helps in understanding the following code. */
13713 if (STACK_REG_P (operands[0])
13714 && ((REG_P (operands[1])
13715 && REGNO (operands[0]) == REGNO (operands[1])
13716 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13717 || (REG_P (operands[2])
13718 && REGNO (operands[0]) == REGNO (operands[2])
13719 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13720 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13721 ; /* ok */
13722 else
13723 gcc_assert (is_sse);
13724 #endif
13726 switch (GET_CODE (operands[3]))
13728 case PLUS:
13729 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13730 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13731 p = "fiadd";
13732 else
13733 p = "fadd";
13734 ssep = "vadd";
13735 break;
13737 case MINUS:
13738 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13739 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13740 p = "fisub";
13741 else
13742 p = "fsub";
13743 ssep = "vsub";
13744 break;
13746 case MULT:
13747 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13748 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13749 p = "fimul";
13750 else
13751 p = "fmul";
13752 ssep = "vmul";
13753 break;
13755 case DIV:
13756 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13757 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13758 p = "fidiv";
13759 else
13760 p = "fdiv";
13761 ssep = "vdiv";
13762 break;
13764 default:
13765 gcc_unreachable ();
13768 if (is_sse)
13770 if (TARGET_AVX)
13772 strcpy (buf, ssep);
13773 if (GET_MODE (operands[0]) == SFmode)
13774 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13775 else
13776 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13778 else
13780 strcpy (buf, ssep + 1);
13781 if (GET_MODE (operands[0]) == SFmode)
13782 strcat (buf, "ss\t{%2, %0|%0, %2}");
13783 else
13784 strcat (buf, "sd\t{%2, %0|%0, %2}");
13786 return buf;
13788 strcpy (buf, p);
13790 switch (GET_CODE (operands[3]))
13792 case MULT:
13793 case PLUS:
13794 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13796 rtx temp = operands[2];
13797 operands[2] = operands[1];
13798 operands[1] = temp;
13801 /* know operands[0] == operands[1]. */
13803 if (MEM_P (operands[2]))
13805 p = "%Z2\t%2";
13806 break;
13809 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13811 if (STACK_TOP_P (operands[0]))
13812 /* How is it that we are storing to a dead operand[2]?
13813 Well, presumably operands[1] is dead too. We can't
13814 store the result to st(0) as st(0) gets popped on this
13815 instruction. Instead store to operands[2] (which I
13816 think has to be st(1)). st(1) will be popped later.
13817 gcc <= 2.8.1 didn't have this check and generated
13818 assembly code that the Unixware assembler rejected. */
13819 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13820 else
13821 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13822 break;
13825 if (STACK_TOP_P (operands[0]))
13826 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13827 else
13828 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13829 break;
13831 case MINUS:
13832 case DIV:
13833 if (MEM_P (operands[1]))
13835 p = "r%Z1\t%1";
13836 break;
13839 if (MEM_P (operands[2]))
13841 p = "%Z2\t%2";
13842 break;
13845 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13847 #if SYSV386_COMPAT
13848 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13849 derived assemblers, confusingly reverse the direction of
13850 the operation for fsub{r} and fdiv{r} when the
13851 destination register is not st(0). The Intel assembler
13852 doesn't have this brain damage. Read !SYSV386_COMPAT to
13853 figure out what the hardware really does. */
13854 if (STACK_TOP_P (operands[0]))
13855 p = "{p\t%0, %2|rp\t%2, %0}";
13856 else
13857 p = "{rp\t%2, %0|p\t%0, %2}";
13858 #else
13859 if (STACK_TOP_P (operands[0]))
13860 /* As above for fmul/fadd, we can't store to st(0). */
13861 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13862 else
13863 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13864 #endif
13865 break;
13868 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13870 #if SYSV386_COMPAT
13871 if (STACK_TOP_P (operands[0]))
13872 p = "{rp\t%0, %1|p\t%1, %0}";
13873 else
13874 p = "{p\t%1, %0|rp\t%0, %1}";
13875 #else
13876 if (STACK_TOP_P (operands[0]))
13877 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13878 else
13879 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13880 #endif
13881 break;
13884 if (STACK_TOP_P (operands[0]))
13886 if (STACK_TOP_P (operands[1]))
13887 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13888 else
13889 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13890 break;
13892 else if (STACK_TOP_P (operands[1]))
13894 #if SYSV386_COMPAT
13895 p = "{\t%1, %0|r\t%0, %1}";
13896 #else
13897 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13898 #endif
13900 else
13902 #if SYSV386_COMPAT
13903 p = "{r\t%2, %0|\t%0, %2}";
13904 #else
13905 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13906 #endif
13908 break;
13910 default:
13911 gcc_unreachable ();
13914 strcat (buf, p);
13915 return buf;
13918 /* Return needed mode for entity in optimize_mode_switching pass. */
13921 ix86_mode_needed (int entity, rtx insn)
13923 enum attr_i387_cw mode;
13925 /* The mode UNINITIALIZED is used to store control word after a
13926 function call or ASM pattern. The mode ANY specify that function
13927 has no requirements on the control word and make no changes in the
13928 bits we are interested in. */
13930 if (CALL_P (insn)
13931 || (NONJUMP_INSN_P (insn)
13932 && (asm_noperands (PATTERN (insn)) >= 0
13933 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13934 return I387_CW_UNINITIALIZED;
13936 if (recog_memoized (insn) < 0)
13937 return I387_CW_ANY;
13939 mode = get_attr_i387_cw (insn);
13941 switch (entity)
13943 case I387_TRUNC:
13944 if (mode == I387_CW_TRUNC)
13945 return mode;
13946 break;
13948 case I387_FLOOR:
13949 if (mode == I387_CW_FLOOR)
13950 return mode;
13951 break;
13953 case I387_CEIL:
13954 if (mode == I387_CW_CEIL)
13955 return mode;
13956 break;
13958 case I387_MASK_PM:
13959 if (mode == I387_CW_MASK_PM)
13960 return mode;
13961 break;
13963 default:
13964 gcc_unreachable ();
13967 return I387_CW_ANY;
13970 /* Output code to initialize control word copies used by trunc?f?i and
13971 rounding patterns. CURRENT_MODE is set to current control word,
13972 while NEW_MODE is set to new control word. */
13974 void
13975 emit_i387_cw_initialization (int mode)
13977 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13978 rtx new_mode;
13980 enum ix86_stack_slot slot;
13982 rtx reg = gen_reg_rtx (HImode);
13984 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13985 emit_move_insn (reg, copy_rtx (stored_mode));
13987 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13988 || optimize_function_for_size_p (cfun))
13990 switch (mode)
13992 case I387_CW_TRUNC:
13993 /* round toward zero (truncate) */
13994 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13995 slot = SLOT_CW_TRUNC;
13996 break;
13998 case I387_CW_FLOOR:
13999 /* round down toward -oo */
14000 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14001 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14002 slot = SLOT_CW_FLOOR;
14003 break;
14005 case I387_CW_CEIL:
14006 /* round up toward +oo */
14007 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14008 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14009 slot = SLOT_CW_CEIL;
14010 break;
14012 case I387_CW_MASK_PM:
14013 /* mask precision exception for nearbyint() */
14014 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14015 slot = SLOT_CW_MASK_PM;
14016 break;
14018 default:
14019 gcc_unreachable ();
14022 else
14024 switch (mode)
14026 case I387_CW_TRUNC:
14027 /* round toward zero (truncate) */
14028 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14029 slot = SLOT_CW_TRUNC;
14030 break;
14032 case I387_CW_FLOOR:
14033 /* round down toward -oo */
14034 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14035 slot = SLOT_CW_FLOOR;
14036 break;
14038 case I387_CW_CEIL:
14039 /* round up toward +oo */
14040 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14041 slot = SLOT_CW_CEIL;
14042 break;
14044 case I387_CW_MASK_PM:
14045 /* mask precision exception for nearbyint() */
14046 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14047 slot = SLOT_CW_MASK_PM;
14048 break;
14050 default:
14051 gcc_unreachable ();
14055 gcc_assert (slot < MAX_386_STACK_LOCALS);
14057 new_mode = assign_386_stack_local (HImode, slot);
14058 emit_move_insn (new_mode, reg);
14061 /* Output code for INSN to convert a float to a signed int. OPERANDS
14062 are the insn operands. The output may be [HSD]Imode and the input
14063 operand may be [SDX]Fmode. */
14065 const char *
14066 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14068 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14069 int dimode_p = GET_MODE (operands[0]) == DImode;
14070 int round_mode = get_attr_i387_cw (insn);
14072 /* Jump through a hoop or two for DImode, since the hardware has no
14073 non-popping instruction. We used to do this a different way, but
14074 that was somewhat fragile and broke with post-reload splitters. */
14075 if ((dimode_p || fisttp) && !stack_top_dies)
14076 output_asm_insn ("fld\t%y1", operands);
14078 gcc_assert (STACK_TOP_P (operands[1]));
14079 gcc_assert (MEM_P (operands[0]));
14080 gcc_assert (GET_MODE (operands[1]) != TFmode);
14082 if (fisttp)
14083 output_asm_insn ("fisttp%Z0\t%0", operands);
14084 else
14086 if (round_mode != I387_CW_ANY)
14087 output_asm_insn ("fldcw\t%3", operands);
14088 if (stack_top_dies || dimode_p)
14089 output_asm_insn ("fistp%Z0\t%0", operands);
14090 else
14091 output_asm_insn ("fist%Z0\t%0", operands);
14092 if (round_mode != I387_CW_ANY)
14093 output_asm_insn ("fldcw\t%2", operands);
14096 return "";
14099 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14100 have the values zero or one, indicates the ffreep insn's operand
14101 from the OPERANDS array. */
14103 static const char *
14104 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14106 if (TARGET_USE_FFREEP)
14107 #ifdef HAVE_AS_IX86_FFREEP
14108 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14109 #else
14111 static char retval[32];
14112 int regno = REGNO (operands[opno]);
14114 gcc_assert (FP_REGNO_P (regno));
14116 regno -= FIRST_STACK_REG;
14118 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14119 return retval;
14121 #endif
14123 return opno ? "fstp\t%y1" : "fstp\t%y0";
14127 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14128 should be used. UNORDERED_P is true when fucom should be used. */
14130 const char *
14131 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14133 int stack_top_dies;
14134 rtx cmp_op0, cmp_op1;
14135 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14137 if (eflags_p)
14139 cmp_op0 = operands[0];
14140 cmp_op1 = operands[1];
14142 else
14144 cmp_op0 = operands[1];
14145 cmp_op1 = operands[2];
14148 if (is_sse)
14150 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14151 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14152 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14153 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14155 if (GET_MODE (operands[0]) == SFmode)
14156 if (unordered_p)
14157 return &ucomiss[TARGET_AVX ? 0 : 1];
14158 else
14159 return &comiss[TARGET_AVX ? 0 : 1];
14160 else
14161 if (unordered_p)
14162 return &ucomisd[TARGET_AVX ? 0 : 1];
14163 else
14164 return &comisd[TARGET_AVX ? 0 : 1];
14167 gcc_assert (STACK_TOP_P (cmp_op0));
14169 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14171 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14173 if (stack_top_dies)
14175 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14176 return output_387_ffreep (operands, 1);
14178 else
14179 return "ftst\n\tfnstsw\t%0";
14182 if (STACK_REG_P (cmp_op1)
14183 && stack_top_dies
14184 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14185 && REGNO (cmp_op1) != FIRST_STACK_REG)
14187 /* If both the top of the 387 stack dies, and the other operand
14188 is also a stack register that dies, then this must be a
14189 `fcompp' float compare */
14191 if (eflags_p)
14193 /* There is no double popping fcomi variant. Fortunately,
14194 eflags is immune from the fstp's cc clobbering. */
14195 if (unordered_p)
14196 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14197 else
14198 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14199 return output_387_ffreep (operands, 0);
14201 else
14203 if (unordered_p)
14204 return "fucompp\n\tfnstsw\t%0";
14205 else
14206 return "fcompp\n\tfnstsw\t%0";
14209 else
14211 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14213 static const char * const alt[16] =
14215 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14216 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14217 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14218 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14220 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14221 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14222 NULL,
14223 NULL,
14225 "fcomi\t{%y1, %0|%0, %y1}",
14226 "fcomip\t{%y1, %0|%0, %y1}",
14227 "fucomi\t{%y1, %0|%0, %y1}",
14228 "fucomip\t{%y1, %0|%0, %y1}",
14230 NULL,
14231 NULL,
14232 NULL,
14233 NULL
14236 int mask;
14237 const char *ret;
14239 mask = eflags_p << 3;
14240 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14241 mask |= unordered_p << 1;
14242 mask |= stack_top_dies;
14244 gcc_assert (mask < 16);
14245 ret = alt[mask];
14246 gcc_assert (ret);
14248 return ret;
14252 void
14253 ix86_output_addr_vec_elt (FILE *file, int value)
14255 const char *directive = ASM_LONG;
14257 #ifdef ASM_QUAD
14258 if (TARGET_64BIT)
14259 directive = ASM_QUAD;
14260 #else
14261 gcc_assert (!TARGET_64BIT);
14262 #endif
14264 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14267 void
14268 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14270 const char *directive = ASM_LONG;
14272 #ifdef ASM_QUAD
14273 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14274 directive = ASM_QUAD;
14275 #else
14276 gcc_assert (!TARGET_64BIT);
14277 #endif
14278 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14279 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14280 fprintf (file, "%s%s%d-%s%d\n",
14281 directive, LPREFIX, value, LPREFIX, rel);
14282 else if (HAVE_AS_GOTOFF_IN_DATA)
14283 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14284 #if TARGET_MACHO
14285 else if (TARGET_MACHO)
14287 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14288 machopic_output_function_base_name (file);
14289 putc ('\n', file);
14291 #endif
14292 else
14293 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14294 GOT_SYMBOL_NAME, LPREFIX, value);
14297 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14298 for the target. */
14300 void
14301 ix86_expand_clear (rtx dest)
14303 rtx tmp;
14305 /* We play register width games, which are only valid after reload. */
14306 gcc_assert (reload_completed);
14308 /* Avoid HImode and its attendant prefix byte. */
14309 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14310 dest = gen_rtx_REG (SImode, REGNO (dest));
14311 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14313 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14314 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14316 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14317 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14320 emit_insn (tmp);
14323 /* X is an unchanging MEM. If it is a constant pool reference, return
14324 the constant pool rtx, else NULL. */
14327 maybe_get_pool_constant (rtx x)
14329 x = ix86_delegitimize_address (XEXP (x, 0));
14331 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14332 return get_pool_constant (x);
14334 return NULL_RTX;
14337 void
14338 ix86_expand_move (enum machine_mode mode, rtx operands[])
14340 rtx op0, op1;
14341 enum tls_model model;
14343 op0 = operands[0];
14344 op1 = operands[1];
14346 if (GET_CODE (op1) == SYMBOL_REF)
14348 model = SYMBOL_REF_TLS_MODEL (op1);
14349 if (model)
14351 op1 = legitimize_tls_address (op1, model, true);
14352 op1 = force_operand (op1, op0);
14353 if (op1 == op0)
14354 return;
14356 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14357 && SYMBOL_REF_DLLIMPORT_P (op1))
14358 op1 = legitimize_dllimport_symbol (op1, false);
14360 else if (GET_CODE (op1) == CONST
14361 && GET_CODE (XEXP (op1, 0)) == PLUS
14362 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14364 rtx addend = XEXP (XEXP (op1, 0), 1);
14365 rtx symbol = XEXP (XEXP (op1, 0), 0);
14366 rtx tmp = NULL;
14368 model = SYMBOL_REF_TLS_MODEL (symbol);
14369 if (model)
14370 tmp = legitimize_tls_address (symbol, model, true);
14371 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14372 && SYMBOL_REF_DLLIMPORT_P (symbol))
14373 tmp = legitimize_dllimport_symbol (symbol, true);
14375 if (tmp)
14377 tmp = force_operand (tmp, NULL);
14378 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14379 op0, 1, OPTAB_DIRECT);
14380 if (tmp == op0)
14381 return;
14385 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14387 if (TARGET_MACHO && !TARGET_64BIT)
14389 #if TARGET_MACHO
14390 if (MACHOPIC_PURE)
14392 rtx temp = ((reload_in_progress
14393 || ((op0 && REG_P (op0))
14394 && mode == Pmode))
14395 ? op0 : gen_reg_rtx (Pmode));
14396 op1 = machopic_indirect_data_reference (op1, temp);
14397 op1 = machopic_legitimize_pic_address (op1, mode,
14398 temp == op1 ? 0 : temp);
14400 else if (MACHOPIC_INDIRECT)
14401 op1 = machopic_indirect_data_reference (op1, 0);
14402 if (op0 == op1)
14403 return;
14404 #endif
14406 else
14408 if (MEM_P (op0))
14409 op1 = force_reg (Pmode, op1);
14410 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14412 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14413 op1 = legitimize_pic_address (op1, reg);
14414 if (op0 == op1)
14415 return;
14419 else
14421 if (MEM_P (op0)
14422 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14423 || !push_operand (op0, mode))
14424 && MEM_P (op1))
14425 op1 = force_reg (mode, op1);
14427 if (push_operand (op0, mode)
14428 && ! general_no_elim_operand (op1, mode))
14429 op1 = copy_to_mode_reg (mode, op1);
14431 /* Force large constants in 64bit compilation into register
14432 to get them CSEed. */
14433 if (can_create_pseudo_p ()
14434 && (mode == DImode) && TARGET_64BIT
14435 && immediate_operand (op1, mode)
14436 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14437 && !register_operand (op0, mode)
14438 && optimize)
14439 op1 = copy_to_mode_reg (mode, op1);
14441 if (can_create_pseudo_p ()
14442 && FLOAT_MODE_P (mode)
14443 && GET_CODE (op1) == CONST_DOUBLE)
14445 /* If we are loading a floating point constant to a register,
14446 force the value to memory now, since we'll get better code
14447 out the back end. */
14449 op1 = validize_mem (force_const_mem (mode, op1));
14450 if (!register_operand (op0, mode))
14452 rtx temp = gen_reg_rtx (mode);
14453 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14454 emit_move_insn (op0, temp);
14455 return;
14460 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14463 void
14464 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14466 rtx op0 = operands[0], op1 = operands[1];
14467 unsigned int align = GET_MODE_ALIGNMENT (mode);
14469 /* Force constants other than zero into memory. We do not know how
14470 the instructions used to build constants modify the upper 64 bits
14471 of the register, once we have that information we may be able
14472 to handle some of them more efficiently. */
14473 if (can_create_pseudo_p ()
14474 && register_operand (op0, mode)
14475 && (CONSTANT_P (op1)
14476 || (GET_CODE (op1) == SUBREG
14477 && CONSTANT_P (SUBREG_REG (op1))))
14478 && !standard_sse_constant_p (op1))
14479 op1 = validize_mem (force_const_mem (mode, op1));
14481 /* We need to check memory alignment for SSE mode since attribute
14482 can make operands unaligned. */
14483 if (can_create_pseudo_p ()
14484 && SSE_REG_MODE_P (mode)
14485 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14486 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14488 rtx tmp[2];
14490 /* ix86_expand_vector_move_misalign() does not like constants ... */
14491 if (CONSTANT_P (op1)
14492 || (GET_CODE (op1) == SUBREG
14493 && CONSTANT_P (SUBREG_REG (op1))))
14494 op1 = validize_mem (force_const_mem (mode, op1));
14496 /* ... nor both arguments in memory. */
14497 if (!register_operand (op0, mode)
14498 && !register_operand (op1, mode))
14499 op1 = force_reg (mode, op1);
14501 tmp[0] = op0; tmp[1] = op1;
14502 ix86_expand_vector_move_misalign (mode, tmp);
14503 return;
14506 /* Make operand1 a register if it isn't already. */
14507 if (can_create_pseudo_p ()
14508 && !register_operand (op0, mode)
14509 && !register_operand (op1, mode))
14511 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14512 return;
14515 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14518 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14519 straight to ix86_expand_vector_move. */
14520 /* Code generation for scalar reg-reg moves of single and double precision data:
14521 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14522 movaps reg, reg
14523 else
14524 movss reg, reg
14525 if (x86_sse_partial_reg_dependency == true)
14526 movapd reg, reg
14527 else
14528 movsd reg, reg
14530 Code generation for scalar loads of double precision data:
14531 if (x86_sse_split_regs == true)
14532 movlpd mem, reg (gas syntax)
14533 else
14534 movsd mem, reg
14536 Code generation for unaligned packed loads of single precision data
14537 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14538 if (x86_sse_unaligned_move_optimal)
14539 movups mem, reg
14541 if (x86_sse_partial_reg_dependency == true)
14543 xorps reg, reg
14544 movlps mem, reg
14545 movhps mem+8, reg
14547 else
14549 movlps mem, reg
14550 movhps mem+8, reg
14553 Code generation for unaligned packed loads of double precision data
14554 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14555 if (x86_sse_unaligned_move_optimal)
14556 movupd mem, reg
14558 if (x86_sse_split_regs == true)
14560 movlpd mem, reg
14561 movhpd mem+8, reg
14563 else
14565 movsd mem, reg
14566 movhpd mem+8, reg
14570 void
14571 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14573 rtx op0, op1, m;
14575 op0 = operands[0];
14576 op1 = operands[1];
14578 if (TARGET_AVX)
14580 switch (GET_MODE_CLASS (mode))
14582 case MODE_VECTOR_INT:
14583 case MODE_INT:
14584 switch (GET_MODE_SIZE (mode))
14586 case 16:
14587 /* If we're optimizing for size, movups is the smallest. */
14588 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14590 op0 = gen_lowpart (V4SFmode, op0);
14591 op1 = gen_lowpart (V4SFmode, op1);
14592 emit_insn (gen_avx_movups (op0, op1));
14593 return;
14595 op0 = gen_lowpart (V16QImode, op0);
14596 op1 = gen_lowpart (V16QImode, op1);
14597 emit_insn (gen_avx_movdqu (op0, op1));
14598 break;
14599 case 32:
14600 op0 = gen_lowpart (V32QImode, op0);
14601 op1 = gen_lowpart (V32QImode, op1);
14602 emit_insn (gen_avx_movdqu256 (op0, op1));
14603 break;
14604 default:
14605 gcc_unreachable ();
14607 break;
14608 case MODE_VECTOR_FLOAT:
14609 op0 = gen_lowpart (mode, op0);
14610 op1 = gen_lowpart (mode, op1);
14612 switch (mode)
14614 case V4SFmode:
14615 emit_insn (gen_avx_movups (op0, op1));
14616 break;
14617 case V8SFmode:
14618 emit_insn (gen_avx_movups256 (op0, op1));
14619 break;
14620 case V2DFmode:
14621 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14623 op0 = gen_lowpart (V4SFmode, op0);
14624 op1 = gen_lowpart (V4SFmode, op1);
14625 emit_insn (gen_avx_movups (op0, op1));
14626 return;
14628 emit_insn (gen_avx_movupd (op0, op1));
14629 break;
14630 case V4DFmode:
14631 emit_insn (gen_avx_movupd256 (op0, op1));
14632 break;
14633 default:
14634 gcc_unreachable ();
14636 break;
14638 default:
14639 gcc_unreachable ();
14642 return;
14645 if (MEM_P (op1))
14647 /* If we're optimizing for size, movups is the smallest. */
14648 if (optimize_insn_for_size_p ()
14649 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14651 op0 = gen_lowpart (V4SFmode, op0);
14652 op1 = gen_lowpart (V4SFmode, op1);
14653 emit_insn (gen_sse_movups (op0, op1));
14654 return;
14657 /* ??? If we have typed data, then it would appear that using
14658 movdqu is the only way to get unaligned data loaded with
14659 integer type. */
14660 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14662 op0 = gen_lowpart (V16QImode, op0);
14663 op1 = gen_lowpart (V16QImode, op1);
14664 emit_insn (gen_sse2_movdqu (op0, op1));
14665 return;
14668 if (TARGET_SSE2 && mode == V2DFmode)
14670 rtx zero;
14672 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14674 op0 = gen_lowpart (V2DFmode, op0);
14675 op1 = gen_lowpart (V2DFmode, op1);
14676 emit_insn (gen_sse2_movupd (op0, op1));
14677 return;
14680 /* When SSE registers are split into halves, we can avoid
14681 writing to the top half twice. */
14682 if (TARGET_SSE_SPLIT_REGS)
14684 emit_clobber (op0);
14685 zero = op0;
14687 else
14689 /* ??? Not sure about the best option for the Intel chips.
14690 The following would seem to satisfy; the register is
14691 entirely cleared, breaking the dependency chain. We
14692 then store to the upper half, with a dependency depth
14693 of one. A rumor has it that Intel recommends two movsd
14694 followed by an unpacklpd, but this is unconfirmed. And
14695 given that the dependency depth of the unpacklpd would
14696 still be one, I'm not sure why this would be better. */
14697 zero = CONST0_RTX (V2DFmode);
14700 m = adjust_address (op1, DFmode, 0);
14701 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14702 m = adjust_address (op1, DFmode, 8);
14703 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14705 else
14707 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14709 op0 = gen_lowpart (V4SFmode, op0);
14710 op1 = gen_lowpart (V4SFmode, op1);
14711 emit_insn (gen_sse_movups (op0, op1));
14712 return;
14715 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14716 emit_move_insn (op0, CONST0_RTX (mode));
14717 else
14718 emit_clobber (op0);
14720 if (mode != V4SFmode)
14721 op0 = gen_lowpart (V4SFmode, op0);
14722 m = adjust_address (op1, V2SFmode, 0);
14723 emit_insn (gen_sse_loadlps (op0, op0, m));
14724 m = adjust_address (op1, V2SFmode, 8);
14725 emit_insn (gen_sse_loadhps (op0, op0, m));
14728 else if (MEM_P (op0))
14730 /* If we're optimizing for size, movups is the smallest. */
14731 if (optimize_insn_for_size_p ()
14732 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14734 op0 = gen_lowpart (V4SFmode, op0);
14735 op1 = gen_lowpart (V4SFmode, op1);
14736 emit_insn (gen_sse_movups (op0, op1));
14737 return;
14740 /* ??? Similar to above, only less clear because of quote
14741 typeless stores unquote. */
14742 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14743 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14745 op0 = gen_lowpart (V16QImode, op0);
14746 op1 = gen_lowpart (V16QImode, op1);
14747 emit_insn (gen_sse2_movdqu (op0, op1));
14748 return;
14751 if (TARGET_SSE2 && mode == V2DFmode)
14753 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14755 op0 = gen_lowpart (V2DFmode, op0);
14756 op1 = gen_lowpart (V2DFmode, op1);
14757 emit_insn (gen_sse2_movupd (op0, op1));
14759 else
14761 m = adjust_address (op0, DFmode, 0);
14762 emit_insn (gen_sse2_storelpd (m, op1));
14763 m = adjust_address (op0, DFmode, 8);
14764 emit_insn (gen_sse2_storehpd (m, op1));
14767 else
14769 if (mode != V4SFmode)
14770 op1 = gen_lowpart (V4SFmode, op1);
14772 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14774 op0 = gen_lowpart (V4SFmode, op0);
14775 emit_insn (gen_sse_movups (op0, op1));
14777 else
14779 m = adjust_address (op0, V2SFmode, 0);
14780 emit_insn (gen_sse_storelps (m, op1));
14781 m = adjust_address (op0, V2SFmode, 8);
14782 emit_insn (gen_sse_storehps (m, op1));
14786 else
14787 gcc_unreachable ();
14790 /* Expand a push in MODE. This is some mode for which we do not support
14791 proper push instructions, at least from the registers that we expect
14792 the value to live in. */
14794 void
14795 ix86_expand_push (enum machine_mode mode, rtx x)
14797 rtx tmp;
14799 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14800 GEN_INT (-GET_MODE_SIZE (mode)),
14801 stack_pointer_rtx, 1, OPTAB_DIRECT);
14802 if (tmp != stack_pointer_rtx)
14803 emit_move_insn (stack_pointer_rtx, tmp);
14805 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14807 /* When we push an operand onto stack, it has to be aligned at least
14808 at the function argument boundary. However since we don't have
14809 the argument type, we can't determine the actual argument
14810 boundary. */
14811 emit_move_insn (tmp, x);
14814 /* Helper function of ix86_fixup_binary_operands to canonicalize
14815 operand order. Returns true if the operands should be swapped. */
14817 static bool
14818 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14819 rtx operands[])
14821 rtx dst = operands[0];
14822 rtx src1 = operands[1];
14823 rtx src2 = operands[2];
14825 /* If the operation is not commutative, we can't do anything. */
14826 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14827 return false;
14829 /* Highest priority is that src1 should match dst. */
14830 if (rtx_equal_p (dst, src1))
14831 return false;
14832 if (rtx_equal_p (dst, src2))
14833 return true;
14835 /* Next highest priority is that immediate constants come second. */
14836 if (immediate_operand (src2, mode))
14837 return false;
14838 if (immediate_operand (src1, mode))
14839 return true;
14841 /* Lowest priority is that memory references should come second. */
14842 if (MEM_P (src2))
14843 return false;
14844 if (MEM_P (src1))
14845 return true;
14847 return false;
14851 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14852 destination to use for the operation. If different from the true
14853 destination in operands[0], a copy operation will be required. */
14856 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14857 rtx operands[])
14859 rtx dst = operands[0];
14860 rtx src1 = operands[1];
14861 rtx src2 = operands[2];
14863 /* Canonicalize operand order. */
14864 if (ix86_swap_binary_operands_p (code, mode, operands))
14866 rtx temp;
14868 /* It is invalid to swap operands of different modes. */
14869 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14871 temp = src1;
14872 src1 = src2;
14873 src2 = temp;
14876 /* Both source operands cannot be in memory. */
14877 if (MEM_P (src1) && MEM_P (src2))
14879 /* Optimization: Only read from memory once. */
14880 if (rtx_equal_p (src1, src2))
14882 src2 = force_reg (mode, src2);
14883 src1 = src2;
14885 else
14886 src2 = force_reg (mode, src2);
14889 /* If the destination is memory, and we do not have matching source
14890 operands, do things in registers. */
14891 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14892 dst = gen_reg_rtx (mode);
14894 /* Source 1 cannot be a constant. */
14895 if (CONSTANT_P (src1))
14896 src1 = force_reg (mode, src1);
14898 /* Source 1 cannot be a non-matching memory. */
14899 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14900 src1 = force_reg (mode, src1);
14902 operands[1] = src1;
14903 operands[2] = src2;
14904 return dst;
14907 /* Similarly, but assume that the destination has already been
14908 set up properly. */
14910 void
14911 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14912 enum machine_mode mode, rtx operands[])
14914 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14915 gcc_assert (dst == operands[0]);
14918 /* Attempt to expand a binary operator. Make the expansion closer to the
14919 actual machine, then just general_operand, which will allow 3 separate
14920 memory references (one output, two input) in a single insn. */
14922 void
14923 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14924 rtx operands[])
14926 rtx src1, src2, dst, op, clob;
14928 dst = ix86_fixup_binary_operands (code, mode, operands);
14929 src1 = operands[1];
14930 src2 = operands[2];
14932 /* Emit the instruction. */
14934 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14935 if (reload_in_progress)
14937 /* Reload doesn't know about the flags register, and doesn't know that
14938 it doesn't want to clobber it. We can only do this with PLUS. */
14939 gcc_assert (code == PLUS);
14940 emit_insn (op);
14942 else
14944 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14945 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14948 /* Fix up the destination if needed. */
14949 if (dst != operands[0])
14950 emit_move_insn (operands[0], dst);
14953 /* Return TRUE or FALSE depending on whether the binary operator meets the
14954 appropriate constraints. */
14956 bool
14957 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14958 rtx operands[3])
14960 rtx dst = operands[0];
14961 rtx src1 = operands[1];
14962 rtx src2 = operands[2];
14964 /* Both source operands cannot be in memory. */
14965 if (MEM_P (src1) && MEM_P (src2))
14966 return false;
14968 /* Canonicalize operand order for commutative operators. */
14969 if (ix86_swap_binary_operands_p (code, mode, operands))
14971 rtx temp = src1;
14972 src1 = src2;
14973 src2 = temp;
14976 /* If the destination is memory, we must have a matching source operand. */
14977 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14978 return false;
14980 /* Source 1 cannot be a constant. */
14981 if (CONSTANT_P (src1))
14982 return false;
14984 /* Source 1 cannot be a non-matching memory. */
14985 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14986 return false;
14988 return true;
14991 /* Attempt to expand a unary operator. Make the expansion closer to the
14992 actual machine, then just general_operand, which will allow 2 separate
14993 memory references (one output, one input) in a single insn. */
14995 void
14996 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14997 rtx operands[])
14999 int matching_memory;
15000 rtx src, dst, op, clob;
15002 dst = operands[0];
15003 src = operands[1];
15005 /* If the destination is memory, and we do not have matching source
15006 operands, do things in registers. */
15007 matching_memory = 0;
15008 if (MEM_P (dst))
15010 if (rtx_equal_p (dst, src))
15011 matching_memory = 1;
15012 else
15013 dst = gen_reg_rtx (mode);
15016 /* When source operand is memory, destination must match. */
15017 if (MEM_P (src) && !matching_memory)
15018 src = force_reg (mode, src);
15020 /* Emit the instruction. */
15022 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15023 if (reload_in_progress || code == NOT)
15025 /* Reload doesn't know about the flags register, and doesn't know that
15026 it doesn't want to clobber it. */
15027 gcc_assert (code == NOT);
15028 emit_insn (op);
15030 else
15032 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15033 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15036 /* Fix up the destination if needed. */
15037 if (dst != operands[0])
15038 emit_move_insn (operands[0], dst);
15041 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15042 divisor are within the the range [0-255]. */
15044 void
15045 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15046 bool signed_p)
15048 rtx end_label, qimode_label;
15049 rtx insn, div, mod;
15050 rtx scratch, tmp0, tmp1, tmp2;
15051 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15052 rtx (*gen_zero_extend) (rtx, rtx);
15053 rtx (*gen_test_ccno_1) (rtx, rtx);
15055 switch (mode)
15057 case SImode:
15058 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15059 gen_test_ccno_1 = gen_testsi_ccno_1;
15060 gen_zero_extend = gen_zero_extendqisi2;
15061 break;
15062 case DImode:
15063 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15064 gen_test_ccno_1 = gen_testdi_ccno_1;
15065 gen_zero_extend = gen_zero_extendqidi2;
15066 break;
15067 default:
15068 gcc_unreachable ();
15071 end_label = gen_label_rtx ();
15072 qimode_label = gen_label_rtx ();
15074 scratch = gen_reg_rtx (mode);
15076 /* Use 8bit unsigned divimod if dividend and divisor are within the
15077 the range [0-255]. */
15078 emit_move_insn (scratch, operands[2]);
15079 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15080 scratch, 1, OPTAB_DIRECT);
15081 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15082 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15083 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15084 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15085 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15086 pc_rtx);
15087 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15088 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15089 JUMP_LABEL (insn) = qimode_label;
15091 /* Generate original signed/unsigned divimod. */
15092 div = gen_divmod4_1 (operands[0], operands[1],
15093 operands[2], operands[3]);
15094 emit_insn (div);
15096 /* Branch to the end. */
15097 emit_jump_insn (gen_jump (end_label));
15098 emit_barrier ();
15100 /* Generate 8bit unsigned divide. */
15101 emit_label (qimode_label);
15102 /* Don't use operands[0] for result of 8bit divide since not all
15103 registers support QImode ZERO_EXTRACT. */
15104 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15105 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15106 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15107 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15109 if (signed_p)
15111 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15112 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15114 else
15116 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15117 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15120 /* Extract remainder from AH. */
15121 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15122 if (REG_P (operands[1]))
15123 insn = emit_move_insn (operands[1], tmp1);
15124 else
15126 /* Need a new scratch register since the old one has result
15127 of 8bit divide. */
15128 scratch = gen_reg_rtx (mode);
15129 emit_move_insn (scratch, tmp1);
15130 insn = emit_move_insn (operands[1], scratch);
15132 set_unique_reg_note (insn, REG_EQUAL, mod);
15134 /* Zero extend quotient from AL. */
15135 tmp1 = gen_lowpart (QImode, tmp0);
15136 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15137 set_unique_reg_note (insn, REG_EQUAL, div);
15139 emit_label (end_label);
15142 #define LEA_SEARCH_THRESHOLD 12
15144 /* Search backward for non-agu definition of register number REGNO1
15145 or register number REGNO2 in INSN's basic block until
15146 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15147 2. Reach BB boundary, or
15148 3. Reach agu definition.
15149 Returns the distance between the non-agu definition point and INSN.
15150 If no definition point, returns -1. */
15152 static int
15153 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15154 rtx insn)
15156 basic_block bb = BLOCK_FOR_INSN (insn);
15157 int distance = 0;
15158 df_ref *def_rec;
15159 enum attr_type insn_type;
15161 if (insn != BB_HEAD (bb))
15163 rtx prev = PREV_INSN (insn);
15164 while (prev && distance < LEA_SEARCH_THRESHOLD)
15166 if (NONDEBUG_INSN_P (prev))
15168 distance++;
15169 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15170 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15171 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15172 && (regno1 == DF_REF_REGNO (*def_rec)
15173 || regno2 == DF_REF_REGNO (*def_rec)))
15175 insn_type = get_attr_type (prev);
15176 if (insn_type != TYPE_LEA)
15177 goto done;
15180 if (prev == BB_HEAD (bb))
15181 break;
15182 prev = PREV_INSN (prev);
15186 if (distance < LEA_SEARCH_THRESHOLD)
15188 edge e;
15189 edge_iterator ei;
15190 bool simple_loop = false;
15192 FOR_EACH_EDGE (e, ei, bb->preds)
15193 if (e->src == bb)
15195 simple_loop = true;
15196 break;
15199 if (simple_loop)
15201 rtx prev = BB_END (bb);
15202 while (prev
15203 && prev != insn
15204 && distance < LEA_SEARCH_THRESHOLD)
15206 if (NONDEBUG_INSN_P (prev))
15208 distance++;
15209 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15210 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15211 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15212 && (regno1 == DF_REF_REGNO (*def_rec)
15213 || regno2 == DF_REF_REGNO (*def_rec)))
15215 insn_type = get_attr_type (prev);
15216 if (insn_type != TYPE_LEA)
15217 goto done;
15220 prev = PREV_INSN (prev);
15225 distance = -1;
15227 done:
15228 /* get_attr_type may modify recog data. We want to make sure
15229 that recog data is valid for instruction INSN, on which
15230 distance_non_agu_define is called. INSN is unchanged here. */
15231 extract_insn_cached (insn);
15232 return distance;
15235 /* Return the distance between INSN and the next insn that uses
15236 register number REGNO0 in memory address. Return -1 if no such
15237 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15239 static int
15240 distance_agu_use (unsigned int regno0, rtx insn)
15242 basic_block bb = BLOCK_FOR_INSN (insn);
15243 int distance = 0;
15244 df_ref *def_rec;
15245 df_ref *use_rec;
15247 if (insn != BB_END (bb))
15249 rtx next = NEXT_INSN (insn);
15250 while (next && distance < LEA_SEARCH_THRESHOLD)
15252 if (NONDEBUG_INSN_P (next))
15254 distance++;
15256 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15257 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15258 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15259 && regno0 == DF_REF_REGNO (*use_rec))
15261 /* Return DISTANCE if OP0 is used in memory
15262 address in NEXT. */
15263 return distance;
15266 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15267 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15268 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15269 && regno0 == DF_REF_REGNO (*def_rec))
15271 /* Return -1 if OP0 is set in NEXT. */
15272 return -1;
15275 if (next == BB_END (bb))
15276 break;
15277 next = NEXT_INSN (next);
15281 if (distance < LEA_SEARCH_THRESHOLD)
15283 edge e;
15284 edge_iterator ei;
15285 bool simple_loop = false;
15287 FOR_EACH_EDGE (e, ei, bb->succs)
15288 if (e->dest == bb)
15290 simple_loop = true;
15291 break;
15294 if (simple_loop)
15296 rtx next = BB_HEAD (bb);
15297 while (next
15298 && next != insn
15299 && distance < LEA_SEARCH_THRESHOLD)
15301 if (NONDEBUG_INSN_P (next))
15303 distance++;
15305 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15306 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15307 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15308 && regno0 == DF_REF_REGNO (*use_rec))
15310 /* Return DISTANCE if OP0 is used in memory
15311 address in NEXT. */
15312 return distance;
15315 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15316 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15317 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15318 && regno0 == DF_REF_REGNO (*def_rec))
15320 /* Return -1 if OP0 is set in NEXT. */
15321 return -1;
15325 next = NEXT_INSN (next);
15330 return -1;
15333 /* Define this macro to tune LEA priority vs ADD, it take effect when
15334 there is a dilemma of choicing LEA or ADD
15335 Negative value: ADD is more preferred than LEA
15336 Zero: Netrual
15337 Positive value: LEA is more preferred than ADD*/
15338 #define IX86_LEA_PRIORITY 2
15340 /* Return true if it is ok to optimize an ADD operation to LEA
15341 operation to avoid flag register consumation. For most processors,
15342 ADD is faster than LEA. For the processors like ATOM, if the
15343 destination register of LEA holds an actual address which will be
15344 used soon, LEA is better and otherwise ADD is better. */
15346 bool
15347 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15349 unsigned int regno0 = true_regnum (operands[0]);
15350 unsigned int regno1 = true_regnum (operands[1]);
15351 unsigned int regno2 = true_regnum (operands[2]);
15353 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15354 if (regno0 != regno1 && regno0 != regno2)
15355 return true;
15357 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15358 return false;
15359 else
15361 int dist_define, dist_use;
15363 /* Return false if REGNO0 isn't used in memory address. */
15364 dist_use = distance_agu_use (regno0, insn);
15365 if (dist_use <= 0)
15366 return false;
15368 dist_define = distance_non_agu_define (regno1, regno2, insn);
15369 if (dist_define <= 0)
15370 return true;
15372 /* If this insn has both backward non-agu dependence and forward
15373 agu dependence, the one with short distance take effect. */
15374 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15375 return false;
15377 return true;
15381 /* Return true if destination reg of SET_BODY is shift count of
15382 USE_BODY. */
15384 static bool
15385 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15387 rtx set_dest;
15388 rtx shift_rtx;
15389 int i;
15391 /* Retrieve destination of SET_BODY. */
15392 switch (GET_CODE (set_body))
15394 case SET:
15395 set_dest = SET_DEST (set_body);
15396 if (!set_dest || !REG_P (set_dest))
15397 return false;
15398 break;
15399 case PARALLEL:
15400 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15401 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15402 use_body))
15403 return true;
15404 default:
15405 return false;
15406 break;
15409 /* Retrieve shift count of USE_BODY. */
15410 switch (GET_CODE (use_body))
15412 case SET:
15413 shift_rtx = XEXP (use_body, 1);
15414 break;
15415 case PARALLEL:
15416 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15417 if (ix86_dep_by_shift_count_body (set_body,
15418 XVECEXP (use_body, 0, i)))
15419 return true;
15420 default:
15421 return false;
15422 break;
15425 if (shift_rtx
15426 && (GET_CODE (shift_rtx) == ASHIFT
15427 || GET_CODE (shift_rtx) == LSHIFTRT
15428 || GET_CODE (shift_rtx) == ASHIFTRT
15429 || GET_CODE (shift_rtx) == ROTATE
15430 || GET_CODE (shift_rtx) == ROTATERT))
15432 rtx shift_count = XEXP (shift_rtx, 1);
15434 /* Return true if shift count is dest of SET_BODY. */
15435 if (REG_P (shift_count)
15436 && true_regnum (set_dest) == true_regnum (shift_count))
15437 return true;
15440 return false;
15443 /* Return true if destination reg of SET_INSN is shift count of
15444 USE_INSN. */
15446 bool
15447 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15449 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15450 PATTERN (use_insn));
15453 /* Return TRUE or FALSE depending on whether the unary operator meets the
15454 appropriate constraints. */
15456 bool
15457 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
15458 enum machine_mode mode ATTRIBUTE_UNUSED,
15459 rtx operands[2] ATTRIBUTE_UNUSED)
15461 /* If one of operands is memory, source and destination must match. */
15462 if ((MEM_P (operands[0])
15463 || MEM_P (operands[1]))
15464 && ! rtx_equal_p (operands[0], operands[1]))
15465 return false;
15466 return true;
15469 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15470 are ok, keeping in mind the possible movddup alternative. */
15472 bool
15473 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15475 if (MEM_P (operands[0]))
15476 return rtx_equal_p (operands[0], operands[1 + high]);
15477 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15478 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15479 return true;
15482 /* Post-reload splitter for converting an SF or DFmode value in an
15483 SSE register into an unsigned SImode. */
15485 void
15486 ix86_split_convert_uns_si_sse (rtx operands[])
15488 enum machine_mode vecmode;
15489 rtx value, large, zero_or_two31, input, two31, x;
15491 large = operands[1];
15492 zero_or_two31 = operands[2];
15493 input = operands[3];
15494 two31 = operands[4];
15495 vecmode = GET_MODE (large);
15496 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15498 /* Load up the value into the low element. We must ensure that the other
15499 elements are valid floats -- zero is the easiest such value. */
15500 if (MEM_P (input))
15502 if (vecmode == V4SFmode)
15503 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15504 else
15505 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15507 else
15509 input = gen_rtx_REG (vecmode, REGNO (input));
15510 emit_move_insn (value, CONST0_RTX (vecmode));
15511 if (vecmode == V4SFmode)
15512 emit_insn (gen_sse_movss (value, value, input));
15513 else
15514 emit_insn (gen_sse2_movsd (value, value, input));
15517 emit_move_insn (large, two31);
15518 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15520 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15521 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15523 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15524 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15526 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15527 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15529 large = gen_rtx_REG (V4SImode, REGNO (large));
15530 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15532 x = gen_rtx_REG (V4SImode, REGNO (value));
15533 if (vecmode == V4SFmode)
15534 emit_insn (gen_sse2_cvttps2dq (x, value));
15535 else
15536 emit_insn (gen_sse2_cvttpd2dq (x, value));
15537 value = x;
15539 emit_insn (gen_xorv4si3 (value, value, large));
15542 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15543 Expects the 64-bit DImode to be supplied in a pair of integral
15544 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15545 -mfpmath=sse, !optimize_size only. */
15547 void
15548 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15550 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15551 rtx int_xmm, fp_xmm;
15552 rtx biases, exponents;
15553 rtx x;
15555 int_xmm = gen_reg_rtx (V4SImode);
15556 if (TARGET_INTER_UNIT_MOVES)
15557 emit_insn (gen_movdi_to_sse (int_xmm, input));
15558 else if (TARGET_SSE_SPLIT_REGS)
15560 emit_clobber (int_xmm);
15561 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15563 else
15565 x = gen_reg_rtx (V2DImode);
15566 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15567 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15570 x = gen_rtx_CONST_VECTOR (V4SImode,
15571 gen_rtvec (4, GEN_INT (0x43300000UL),
15572 GEN_INT (0x45300000UL),
15573 const0_rtx, const0_rtx));
15574 exponents = validize_mem (force_const_mem (V4SImode, x));
15576 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15577 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15579 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15580 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15581 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15582 (0x1.0p84 + double(fp_value_hi_xmm)).
15583 Note these exponents differ by 32. */
15585 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15587 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15588 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15589 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15590 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15591 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15592 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15593 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15594 biases = validize_mem (force_const_mem (V2DFmode, biases));
15595 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15597 /* Add the upper and lower DFmode values together. */
15598 if (TARGET_SSE3)
15599 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15600 else
15602 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15603 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15604 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15607 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15610 /* Not used, but eases macroization of patterns. */
15611 void
15612 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15613 rtx input ATTRIBUTE_UNUSED)
15615 gcc_unreachable ();
15618 /* Convert an unsigned SImode value into a DFmode. Only currently used
15619 for SSE, but applicable anywhere. */
15621 void
15622 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15624 REAL_VALUE_TYPE TWO31r;
15625 rtx x, fp;
15627 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15628 NULL, 1, OPTAB_DIRECT);
15630 fp = gen_reg_rtx (DFmode);
15631 emit_insn (gen_floatsidf2 (fp, x));
15633 real_ldexp (&TWO31r, &dconst1, 31);
15634 x = const_double_from_real_value (TWO31r, DFmode);
15636 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15637 if (x != target)
15638 emit_move_insn (target, x);
15641 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15642 32-bit mode; otherwise we have a direct convert instruction. */
15644 void
15645 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15647 REAL_VALUE_TYPE TWO32r;
15648 rtx fp_lo, fp_hi, x;
15650 fp_lo = gen_reg_rtx (DFmode);
15651 fp_hi = gen_reg_rtx (DFmode);
15653 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15655 real_ldexp (&TWO32r, &dconst1, 32);
15656 x = const_double_from_real_value (TWO32r, DFmode);
15657 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15659 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15661 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15662 0, OPTAB_DIRECT);
15663 if (x != target)
15664 emit_move_insn (target, x);
15667 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15668 For x86_32, -mfpmath=sse, !optimize_size only. */
15669 void
15670 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15672 REAL_VALUE_TYPE ONE16r;
15673 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15675 real_ldexp (&ONE16r, &dconst1, 16);
15676 x = const_double_from_real_value (ONE16r, SFmode);
15677 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15678 NULL, 0, OPTAB_DIRECT);
15679 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15680 NULL, 0, OPTAB_DIRECT);
15681 fp_hi = gen_reg_rtx (SFmode);
15682 fp_lo = gen_reg_rtx (SFmode);
15683 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15684 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15685 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15686 0, OPTAB_DIRECT);
15687 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15688 0, OPTAB_DIRECT);
15689 if (!rtx_equal_p (target, fp_hi))
15690 emit_move_insn (target, fp_hi);
15693 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15694 then replicate the value for all elements of the vector
15695 register. */
15698 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15700 rtvec v;
15701 switch (mode)
15703 case SImode:
15704 gcc_assert (vect);
15705 v = gen_rtvec (4, value, value, value, value);
15706 return gen_rtx_CONST_VECTOR (V4SImode, v);
15708 case DImode:
15709 gcc_assert (vect);
15710 v = gen_rtvec (2, value, value);
15711 return gen_rtx_CONST_VECTOR (V2DImode, v);
15713 case SFmode:
15714 if (vect)
15715 v = gen_rtvec (4, value, value, value, value);
15716 else
15717 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15718 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15719 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15721 case DFmode:
15722 if (vect)
15723 v = gen_rtvec (2, value, value);
15724 else
15725 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15726 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15728 default:
15729 gcc_unreachable ();
15733 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15734 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15735 for an SSE register. If VECT is true, then replicate the mask for
15736 all elements of the vector register. If INVERT is true, then create
15737 a mask excluding the sign bit. */
15740 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15742 enum machine_mode vec_mode, imode;
15743 HOST_WIDE_INT hi, lo;
15744 int shift = 63;
15745 rtx v;
15746 rtx mask;
15748 /* Find the sign bit, sign extended to 2*HWI. */
15749 switch (mode)
15751 case SImode:
15752 case SFmode:
15753 imode = SImode;
15754 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15755 lo = 0x80000000, hi = lo < 0;
15756 break;
15758 case DImode:
15759 case DFmode:
15760 imode = DImode;
15761 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15762 if (HOST_BITS_PER_WIDE_INT >= 64)
15763 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15764 else
15765 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15766 break;
15768 case TImode:
15769 case TFmode:
15770 vec_mode = VOIDmode;
15771 if (HOST_BITS_PER_WIDE_INT >= 64)
15773 imode = TImode;
15774 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15776 else
15778 rtvec vec;
15780 imode = DImode;
15781 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15783 if (invert)
15785 lo = ~lo, hi = ~hi;
15786 v = constm1_rtx;
15788 else
15789 v = const0_rtx;
15791 mask = immed_double_const (lo, hi, imode);
15793 vec = gen_rtvec (2, v, mask);
15794 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15795 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15797 return v;
15799 break;
15801 default:
15802 gcc_unreachable ();
15805 if (invert)
15806 lo = ~lo, hi = ~hi;
15808 /* Force this value into the low part of a fp vector constant. */
15809 mask = immed_double_const (lo, hi, imode);
15810 mask = gen_lowpart (mode, mask);
15812 if (vec_mode == VOIDmode)
15813 return force_reg (mode, mask);
15815 v = ix86_build_const_vector (mode, vect, mask);
15816 return force_reg (vec_mode, v);
15819 /* Generate code for floating point ABS or NEG. */
15821 void
15822 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15823 rtx operands[])
15825 rtx mask, set, use, clob, dst, src;
15826 bool use_sse = false;
15827 bool vector_mode = VECTOR_MODE_P (mode);
15828 enum machine_mode elt_mode = mode;
15830 if (vector_mode)
15832 elt_mode = GET_MODE_INNER (mode);
15833 use_sse = true;
15835 else if (mode == TFmode)
15836 use_sse = true;
15837 else if (TARGET_SSE_MATH)
15838 use_sse = SSE_FLOAT_MODE_P (mode);
15840 /* NEG and ABS performed with SSE use bitwise mask operations.
15841 Create the appropriate mask now. */
15842 if (use_sse)
15843 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15844 else
15845 mask = NULL_RTX;
15847 dst = operands[0];
15848 src = operands[1];
15850 if (vector_mode)
15852 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15853 set = gen_rtx_SET (VOIDmode, dst, set);
15854 emit_insn (set);
15856 else
15858 set = gen_rtx_fmt_e (code, mode, src);
15859 set = gen_rtx_SET (VOIDmode, dst, set);
15860 if (mask)
15862 use = gen_rtx_USE (VOIDmode, mask);
15863 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15864 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15865 gen_rtvec (3, set, use, clob)));
15867 else
15868 emit_insn (set);
15872 /* Expand a copysign operation. Special case operand 0 being a constant. */
15874 void
15875 ix86_expand_copysign (rtx operands[])
15877 enum machine_mode mode;
15878 rtx dest, op0, op1, mask, nmask;
15880 dest = operands[0];
15881 op0 = operands[1];
15882 op1 = operands[2];
15884 mode = GET_MODE (dest);
15886 if (GET_CODE (op0) == CONST_DOUBLE)
15888 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15890 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15891 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15893 if (mode == SFmode || mode == DFmode)
15895 enum machine_mode vmode;
15897 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15899 if (op0 == CONST0_RTX (mode))
15900 op0 = CONST0_RTX (vmode);
15901 else
15903 rtx v = ix86_build_const_vector (mode, false, op0);
15905 op0 = force_reg (vmode, v);
15908 else if (op0 != CONST0_RTX (mode))
15909 op0 = force_reg (mode, op0);
15911 mask = ix86_build_signbit_mask (mode, 0, 0);
15913 if (mode == SFmode)
15914 copysign_insn = gen_copysignsf3_const;
15915 else if (mode == DFmode)
15916 copysign_insn = gen_copysigndf3_const;
15917 else
15918 copysign_insn = gen_copysigntf3_const;
15920 emit_insn (copysign_insn (dest, op0, op1, mask));
15922 else
15924 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15926 nmask = ix86_build_signbit_mask (mode, 0, 1);
15927 mask = ix86_build_signbit_mask (mode, 0, 0);
15929 if (mode == SFmode)
15930 copysign_insn = gen_copysignsf3_var;
15931 else if (mode == DFmode)
15932 copysign_insn = gen_copysigndf3_var;
15933 else
15934 copysign_insn = gen_copysigntf3_var;
15936 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15940 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15941 be a constant, and so has already been expanded into a vector constant. */
15943 void
15944 ix86_split_copysign_const (rtx operands[])
15946 enum machine_mode mode, vmode;
15947 rtx dest, op0, mask, x;
15949 dest = operands[0];
15950 op0 = operands[1];
15951 mask = operands[3];
15953 mode = GET_MODE (dest);
15954 vmode = GET_MODE (mask);
15956 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15957 x = gen_rtx_AND (vmode, dest, mask);
15958 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15960 if (op0 != CONST0_RTX (vmode))
15962 x = gen_rtx_IOR (vmode, dest, op0);
15963 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15967 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15968 so we have to do two masks. */
15970 void
15971 ix86_split_copysign_var (rtx operands[])
15973 enum machine_mode mode, vmode;
15974 rtx dest, scratch, op0, op1, mask, nmask, x;
15976 dest = operands[0];
15977 scratch = operands[1];
15978 op0 = operands[2];
15979 op1 = operands[3];
15980 nmask = operands[4];
15981 mask = operands[5];
15983 mode = GET_MODE (dest);
15984 vmode = GET_MODE (mask);
15986 if (rtx_equal_p (op0, op1))
15988 /* Shouldn't happen often (it's useless, obviously), but when it does
15989 we'd generate incorrect code if we continue below. */
15990 emit_move_insn (dest, op0);
15991 return;
15994 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15996 gcc_assert (REGNO (op1) == REGNO (scratch));
15998 x = gen_rtx_AND (vmode, scratch, mask);
15999 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16001 dest = mask;
16002 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16003 x = gen_rtx_NOT (vmode, dest);
16004 x = gen_rtx_AND (vmode, x, op0);
16005 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16007 else
16009 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16011 x = gen_rtx_AND (vmode, scratch, mask);
16013 else /* alternative 2,4 */
16015 gcc_assert (REGNO (mask) == REGNO (scratch));
16016 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16017 x = gen_rtx_AND (vmode, scratch, op1);
16019 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16021 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16023 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16024 x = gen_rtx_AND (vmode, dest, nmask);
16026 else /* alternative 3,4 */
16028 gcc_assert (REGNO (nmask) == REGNO (dest));
16029 dest = nmask;
16030 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16031 x = gen_rtx_AND (vmode, dest, op0);
16033 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16036 x = gen_rtx_IOR (vmode, dest, scratch);
16037 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16040 /* Return TRUE or FALSE depending on whether the first SET in INSN
16041 has source and destination with matching CC modes, and that the
16042 CC mode is at least as constrained as REQ_MODE. */
16044 bool
16045 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16047 rtx set;
16048 enum machine_mode set_mode;
16050 set = PATTERN (insn);
16051 if (GET_CODE (set) == PARALLEL)
16052 set = XVECEXP (set, 0, 0);
16053 gcc_assert (GET_CODE (set) == SET);
16054 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16056 set_mode = GET_MODE (SET_DEST (set));
16057 switch (set_mode)
16059 case CCNOmode:
16060 if (req_mode != CCNOmode
16061 && (req_mode != CCmode
16062 || XEXP (SET_SRC (set), 1) != const0_rtx))
16063 return false;
16064 break;
16065 case CCmode:
16066 if (req_mode == CCGCmode)
16067 return false;
16068 /* FALLTHRU */
16069 case CCGCmode:
16070 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16071 return false;
16072 /* FALLTHRU */
16073 case CCGOCmode:
16074 if (req_mode == CCZmode)
16075 return false;
16076 /* FALLTHRU */
16077 case CCAmode:
16078 case CCCmode:
16079 case CCOmode:
16080 case CCSmode:
16081 case CCZmode:
16082 break;
16084 default:
16085 gcc_unreachable ();
16088 return GET_MODE (SET_SRC (set)) == set_mode;
16091 /* Generate insn patterns to do an integer compare of OPERANDS. */
16093 static rtx
16094 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16096 enum machine_mode cmpmode;
16097 rtx tmp, flags;
16099 cmpmode = SELECT_CC_MODE (code, op0, op1);
16100 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16102 /* This is very simple, but making the interface the same as in the
16103 FP case makes the rest of the code easier. */
16104 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16105 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16107 /* Return the test that should be put into the flags user, i.e.
16108 the bcc, scc, or cmov instruction. */
16109 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16112 /* Figure out whether to use ordered or unordered fp comparisons.
16113 Return the appropriate mode to use. */
16115 enum machine_mode
16116 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16118 /* ??? In order to make all comparisons reversible, we do all comparisons
16119 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16120 all forms trapping and nontrapping comparisons, we can make inequality
16121 comparisons trapping again, since it results in better code when using
16122 FCOM based compares. */
16123 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16126 enum machine_mode
16127 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16129 enum machine_mode mode = GET_MODE (op0);
16131 if (SCALAR_FLOAT_MODE_P (mode))
16133 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16134 return ix86_fp_compare_mode (code);
16137 switch (code)
16139 /* Only zero flag is needed. */
16140 case EQ: /* ZF=0 */
16141 case NE: /* ZF!=0 */
16142 return CCZmode;
16143 /* Codes needing carry flag. */
16144 case GEU: /* CF=0 */
16145 case LTU: /* CF=1 */
16146 /* Detect overflow checks. They need just the carry flag. */
16147 if (GET_CODE (op0) == PLUS
16148 && rtx_equal_p (op1, XEXP (op0, 0)))
16149 return CCCmode;
16150 else
16151 return CCmode;
16152 case GTU: /* CF=0 & ZF=0 */
16153 case LEU: /* CF=1 | ZF=1 */
16154 /* Detect overflow checks. They need just the carry flag. */
16155 if (GET_CODE (op0) == MINUS
16156 && rtx_equal_p (op1, XEXP (op0, 0)))
16157 return CCCmode;
16158 else
16159 return CCmode;
16160 /* Codes possibly doable only with sign flag when
16161 comparing against zero. */
16162 case GE: /* SF=OF or SF=0 */
16163 case LT: /* SF<>OF or SF=1 */
16164 if (op1 == const0_rtx)
16165 return CCGOCmode;
16166 else
16167 /* For other cases Carry flag is not required. */
16168 return CCGCmode;
16169 /* Codes doable only with sign flag when comparing
16170 against zero, but we miss jump instruction for it
16171 so we need to use relational tests against overflow
16172 that thus needs to be zero. */
16173 case GT: /* ZF=0 & SF=OF */
16174 case LE: /* ZF=1 | SF<>OF */
16175 if (op1 == const0_rtx)
16176 return CCNOmode;
16177 else
16178 return CCGCmode;
16179 /* strcmp pattern do (use flags) and combine may ask us for proper
16180 mode. */
16181 case USE:
16182 return CCmode;
16183 default:
16184 gcc_unreachable ();
16188 /* Return the fixed registers used for condition codes. */
16190 static bool
16191 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16193 *p1 = FLAGS_REG;
16194 *p2 = FPSR_REG;
16195 return true;
16198 /* If two condition code modes are compatible, return a condition code
16199 mode which is compatible with both. Otherwise, return
16200 VOIDmode. */
16202 static enum machine_mode
16203 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16205 if (m1 == m2)
16206 return m1;
16208 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16209 return VOIDmode;
16211 if ((m1 == CCGCmode && m2 == CCGOCmode)
16212 || (m1 == CCGOCmode && m2 == CCGCmode))
16213 return CCGCmode;
16215 switch (m1)
16217 default:
16218 gcc_unreachable ();
16220 case CCmode:
16221 case CCGCmode:
16222 case CCGOCmode:
16223 case CCNOmode:
16224 case CCAmode:
16225 case CCCmode:
16226 case CCOmode:
16227 case CCSmode:
16228 case CCZmode:
16229 switch (m2)
16231 default:
16232 return VOIDmode;
16234 case CCmode:
16235 case CCGCmode:
16236 case CCGOCmode:
16237 case CCNOmode:
16238 case CCAmode:
16239 case CCCmode:
16240 case CCOmode:
16241 case CCSmode:
16242 case CCZmode:
16243 return CCmode;
16246 case CCFPmode:
16247 case CCFPUmode:
16248 /* These are only compatible with themselves, which we already
16249 checked above. */
16250 return VOIDmode;
16255 /* Return a comparison we can do and that it is equivalent to
16256 swap_condition (code) apart possibly from orderedness.
16257 But, never change orderedness if TARGET_IEEE_FP, returning
16258 UNKNOWN in that case if necessary. */
16260 static enum rtx_code
16261 ix86_fp_swap_condition (enum rtx_code code)
16263 switch (code)
16265 case GT: /* GTU - CF=0 & ZF=0 */
16266 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16267 case GE: /* GEU - CF=0 */
16268 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16269 case UNLT: /* LTU - CF=1 */
16270 return TARGET_IEEE_FP ? UNKNOWN : GT;
16271 case UNLE: /* LEU - CF=1 | ZF=1 */
16272 return TARGET_IEEE_FP ? UNKNOWN : GE;
16273 default:
16274 return swap_condition (code);
16278 /* Return cost of comparison CODE using the best strategy for performance.
16279 All following functions do use number of instructions as a cost metrics.
16280 In future this should be tweaked to compute bytes for optimize_size and
16281 take into account performance of various instructions on various CPUs. */
16283 static int
16284 ix86_fp_comparison_cost (enum rtx_code code)
16286 int arith_cost;
16288 /* The cost of code using bit-twiddling on %ah. */
16289 switch (code)
16291 case UNLE:
16292 case UNLT:
16293 case LTGT:
16294 case GT:
16295 case GE:
16296 case UNORDERED:
16297 case ORDERED:
16298 case UNEQ:
16299 arith_cost = 4;
16300 break;
16301 case LT:
16302 case NE:
16303 case EQ:
16304 case UNGE:
16305 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16306 break;
16307 case LE:
16308 case UNGT:
16309 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16310 break;
16311 default:
16312 gcc_unreachable ();
16315 switch (ix86_fp_comparison_strategy (code))
16317 case IX86_FPCMP_COMI:
16318 return arith_cost > 4 ? 3 : 2;
16319 case IX86_FPCMP_SAHF:
16320 return arith_cost > 4 ? 4 : 3;
16321 default:
16322 return arith_cost;
16326 /* Return strategy to use for floating-point. We assume that fcomi is always
16327 preferrable where available, since that is also true when looking at size
16328 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16330 enum ix86_fpcmp_strategy
16331 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16333 /* Do fcomi/sahf based test when profitable. */
16335 if (TARGET_CMOVE)
16336 return IX86_FPCMP_COMI;
16338 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16339 return IX86_FPCMP_SAHF;
16341 return IX86_FPCMP_ARITH;
16344 /* Swap, force into registers, or otherwise massage the two operands
16345 to a fp comparison. The operands are updated in place; the new
16346 comparison code is returned. */
16348 static enum rtx_code
16349 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
16351 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
16352 rtx op0 = *pop0, op1 = *pop1;
16353 enum machine_mode op_mode = GET_MODE (op0);
16354 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
16356 /* All of the unordered compare instructions only work on registers.
16357 The same is true of the fcomi compare instructions. The XFmode
16358 compare instructions require registers except when comparing
16359 against zero or when converting operand 1 from fixed point to
16360 floating point. */
16362 if (!is_sse
16363 && (fpcmp_mode == CCFPUmode
16364 || (op_mode == XFmode
16365 && ! (standard_80387_constant_p (op0) == 1
16366 || standard_80387_constant_p (op1) == 1)
16367 && GET_CODE (op1) != FLOAT)
16368 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
16370 op0 = force_reg (op_mode, op0);
16371 op1 = force_reg (op_mode, op1);
16373 else
16375 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16376 things around if they appear profitable, otherwise force op0
16377 into a register. */
16379 if (standard_80387_constant_p (op0) == 0
16380 || (MEM_P (op0)
16381 && ! (standard_80387_constant_p (op1) == 0
16382 || MEM_P (op1))))
16384 enum rtx_code new_code = ix86_fp_swap_condition (code);
16385 if (new_code != UNKNOWN)
16387 rtx tmp;
16388 tmp = op0, op0 = op1, op1 = tmp;
16389 code = new_code;
16393 if (!REG_P (op0))
16394 op0 = force_reg (op_mode, op0);
16396 if (CONSTANT_P (op1))
16398 int tmp = standard_80387_constant_p (op1);
16399 if (tmp == 0)
16400 op1 = validize_mem (force_const_mem (op_mode, op1));
16401 else if (tmp == 1)
16403 if (TARGET_CMOVE)
16404 op1 = force_reg (op_mode, op1);
16406 else
16407 op1 = force_reg (op_mode, op1);
16411 /* Try to rearrange the comparison to make it cheaper. */
16412 if (ix86_fp_comparison_cost (code)
16413 > ix86_fp_comparison_cost (swap_condition (code))
16414 && (REG_P (op1) || can_create_pseudo_p ()))
16416 rtx tmp;
16417 tmp = op0, op0 = op1, op1 = tmp;
16418 code = swap_condition (code);
16419 if (!REG_P (op0))
16420 op0 = force_reg (op_mode, op0);
16423 *pop0 = op0;
16424 *pop1 = op1;
16425 return code;
16428 /* Convert comparison codes we use to represent FP comparison to integer
16429 code that will result in proper branch. Return UNKNOWN if no such code
16430 is available. */
16432 enum rtx_code
16433 ix86_fp_compare_code_to_integer (enum rtx_code code)
16435 switch (code)
16437 case GT:
16438 return GTU;
16439 case GE:
16440 return GEU;
16441 case ORDERED:
16442 case UNORDERED:
16443 return code;
16444 break;
16445 case UNEQ:
16446 return EQ;
16447 break;
16448 case UNLT:
16449 return LTU;
16450 break;
16451 case UNLE:
16452 return LEU;
16453 break;
16454 case LTGT:
16455 return NE;
16456 break;
16457 default:
16458 return UNKNOWN;
16462 /* Generate insn patterns to do a floating point compare of OPERANDS. */
16464 static rtx
16465 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
16467 enum machine_mode fpcmp_mode, intcmp_mode;
16468 rtx tmp, tmp2;
16470 fpcmp_mode = ix86_fp_compare_mode (code);
16471 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
16473 /* Do fcomi/sahf based test when profitable. */
16474 switch (ix86_fp_comparison_strategy (code))
16476 case IX86_FPCMP_COMI:
16477 intcmp_mode = fpcmp_mode;
16478 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16479 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16480 tmp);
16481 emit_insn (tmp);
16482 break;
16484 case IX86_FPCMP_SAHF:
16485 intcmp_mode = fpcmp_mode;
16486 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16487 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16488 tmp);
16490 if (!scratch)
16491 scratch = gen_reg_rtx (HImode);
16492 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
16493 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16494 break;
16496 case IX86_FPCMP_ARITH:
16497 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16498 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16499 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16500 if (!scratch)
16501 scratch = gen_reg_rtx (HImode);
16502 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16504 /* In the unordered case, we have to check C2 for NaN's, which
16505 doesn't happen to work out to anything nice combination-wise.
16506 So do some bit twiddling on the value we've got in AH to come
16507 up with an appropriate set of condition codes. */
16509 intcmp_mode = CCNOmode;
16510 switch (code)
16512 case GT:
16513 case UNGT:
16514 if (code == GT || !TARGET_IEEE_FP)
16516 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16517 code = EQ;
16519 else
16521 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16522 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16523 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16524 intcmp_mode = CCmode;
16525 code = GEU;
16527 break;
16528 case LT:
16529 case UNLT:
16530 if (code == LT && TARGET_IEEE_FP)
16532 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16533 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16534 intcmp_mode = CCmode;
16535 code = EQ;
16537 else
16539 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16540 code = NE;
16542 break;
16543 case GE:
16544 case UNGE:
16545 if (code == GE || !TARGET_IEEE_FP)
16547 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16548 code = EQ;
16550 else
16552 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16553 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16554 code = NE;
16556 break;
16557 case LE:
16558 case UNLE:
16559 if (code == LE && TARGET_IEEE_FP)
16561 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16562 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16563 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16564 intcmp_mode = CCmode;
16565 code = LTU;
16567 else
16569 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16570 code = NE;
16572 break;
16573 case EQ:
16574 case UNEQ:
16575 if (code == EQ && TARGET_IEEE_FP)
16577 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16578 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16579 intcmp_mode = CCmode;
16580 code = EQ;
16582 else
16584 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16585 code = NE;
16587 break;
16588 case NE:
16589 case LTGT:
16590 if (code == NE && TARGET_IEEE_FP)
16592 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16593 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16594 GEN_INT (0x40)));
16595 code = NE;
16597 else
16599 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16600 code = EQ;
16602 break;
16604 case UNORDERED:
16605 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16606 code = NE;
16607 break;
16608 case ORDERED:
16609 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16610 code = EQ;
16611 break;
16613 default:
16614 gcc_unreachable ();
16616 break;
16618 default:
16619 gcc_unreachable();
16622 /* Return the test that should be put into the flags user, i.e.
16623 the bcc, scc, or cmov instruction. */
16624 return gen_rtx_fmt_ee (code, VOIDmode,
16625 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16626 const0_rtx);
16629 static rtx
16630 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16632 rtx ret;
16634 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16635 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16637 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16639 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16640 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16642 else
16643 ret = ix86_expand_int_compare (code, op0, op1);
16645 return ret;
16648 void
16649 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16651 rtx tmp;
16653 switch (GET_MODE (op0))
16655 case SFmode:
16656 case DFmode:
16657 case XFmode:
16658 case QImode:
16659 case HImode:
16660 case SImode:
16661 simple:
16662 tmp = ix86_expand_compare (code, op0, op1);
16663 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16664 gen_rtx_LABEL_REF (VOIDmode, label),
16665 pc_rtx);
16666 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16667 return;
16669 case DImode:
16670 if (TARGET_64BIT)
16671 goto simple;
16672 case TImode:
16673 /* Expand DImode branch into multiple compare+branch. */
16675 rtx lo[2], hi[2], label2;
16676 enum rtx_code code1, code2, code3;
16677 enum machine_mode submode;
16679 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16681 tmp = op0, op0 = op1, op1 = tmp;
16682 code = swap_condition (code);
16684 if (GET_MODE (op0) == DImode)
16686 split_di (&op0, 1, lo+0, hi+0);
16687 split_di (&op1, 1, lo+1, hi+1);
16688 submode = SImode;
16690 else
16692 split_ti (&op0, 1, lo+0, hi+0);
16693 split_ti (&op1, 1, lo+1, hi+1);
16694 submode = DImode;
16697 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16698 avoid two branches. This costs one extra insn, so disable when
16699 optimizing for size. */
16701 if ((code == EQ || code == NE)
16702 && (!optimize_insn_for_size_p ()
16703 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16705 rtx xor0, xor1;
16707 xor1 = hi[0];
16708 if (hi[1] != const0_rtx)
16709 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16710 NULL_RTX, 0, OPTAB_WIDEN);
16712 xor0 = lo[0];
16713 if (lo[1] != const0_rtx)
16714 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16715 NULL_RTX, 0, OPTAB_WIDEN);
16717 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16718 NULL_RTX, 0, OPTAB_WIDEN);
16720 ix86_expand_branch (code, tmp, const0_rtx, label);
16721 return;
16724 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16725 op1 is a constant and the low word is zero, then we can just
16726 examine the high word. Similarly for low word -1 and
16727 less-or-equal-than or greater-than. */
16729 if (CONST_INT_P (hi[1]))
16730 switch (code)
16732 case LT: case LTU: case GE: case GEU:
16733 if (lo[1] == const0_rtx)
16735 ix86_expand_branch (code, hi[0], hi[1], label);
16736 return;
16738 break;
16739 case LE: case LEU: case GT: case GTU:
16740 if (lo[1] == constm1_rtx)
16742 ix86_expand_branch (code, hi[0], hi[1], label);
16743 return;
16745 break;
16746 default:
16747 break;
16750 /* Otherwise, we need two or three jumps. */
16752 label2 = gen_label_rtx ();
16754 code1 = code;
16755 code2 = swap_condition (code);
16756 code3 = unsigned_condition (code);
16758 switch (code)
16760 case LT: case GT: case LTU: case GTU:
16761 break;
16763 case LE: code1 = LT; code2 = GT; break;
16764 case GE: code1 = GT; code2 = LT; break;
16765 case LEU: code1 = LTU; code2 = GTU; break;
16766 case GEU: code1 = GTU; code2 = LTU; break;
16768 case EQ: code1 = UNKNOWN; code2 = NE; break;
16769 case NE: code2 = UNKNOWN; break;
16771 default:
16772 gcc_unreachable ();
16776 * a < b =>
16777 * if (hi(a) < hi(b)) goto true;
16778 * if (hi(a) > hi(b)) goto false;
16779 * if (lo(a) < lo(b)) goto true;
16780 * false:
16783 if (code1 != UNKNOWN)
16784 ix86_expand_branch (code1, hi[0], hi[1], label);
16785 if (code2 != UNKNOWN)
16786 ix86_expand_branch (code2, hi[0], hi[1], label2);
16788 ix86_expand_branch (code3, lo[0], lo[1], label);
16790 if (code2 != UNKNOWN)
16791 emit_label (label2);
16792 return;
16795 default:
16796 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16797 goto simple;
16801 /* Split branch based on floating point condition. */
16802 void
16803 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16804 rtx target1, rtx target2, rtx tmp, rtx pushed)
16806 rtx condition;
16807 rtx i;
16809 if (target2 != pc_rtx)
16811 rtx tmp = target2;
16812 code = reverse_condition_maybe_unordered (code);
16813 target2 = target1;
16814 target1 = tmp;
16817 condition = ix86_expand_fp_compare (code, op1, op2,
16818 tmp);
16820 /* Remove pushed operand from stack. */
16821 if (pushed)
16822 ix86_free_from_memory (GET_MODE (pushed));
16824 i = emit_jump_insn (gen_rtx_SET
16825 (VOIDmode, pc_rtx,
16826 gen_rtx_IF_THEN_ELSE (VOIDmode,
16827 condition, target1, target2)));
16828 if (split_branch_probability >= 0)
16829 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16832 void
16833 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16835 rtx ret;
16837 gcc_assert (GET_MODE (dest) == QImode);
16839 ret = ix86_expand_compare (code, op0, op1);
16840 PUT_MODE (ret, QImode);
16841 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16844 /* Expand comparison setting or clearing carry flag. Return true when
16845 successful and set pop for the operation. */
16846 static bool
16847 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16849 enum machine_mode mode =
16850 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16852 /* Do not handle DImode compares that go through special path. */
16853 if (mode == (TARGET_64BIT ? TImode : DImode))
16854 return false;
16856 if (SCALAR_FLOAT_MODE_P (mode))
16858 rtx compare_op, compare_seq;
16860 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16862 /* Shortcut: following common codes never translate
16863 into carry flag compares. */
16864 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16865 || code == ORDERED || code == UNORDERED)
16866 return false;
16868 /* These comparisons require zero flag; swap operands so they won't. */
16869 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16870 && !TARGET_IEEE_FP)
16872 rtx tmp = op0;
16873 op0 = op1;
16874 op1 = tmp;
16875 code = swap_condition (code);
16878 /* Try to expand the comparison and verify that we end up with
16879 carry flag based comparison. This fails to be true only when
16880 we decide to expand comparison using arithmetic that is not
16881 too common scenario. */
16882 start_sequence ();
16883 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16884 compare_seq = get_insns ();
16885 end_sequence ();
16887 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16888 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16889 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16890 else
16891 code = GET_CODE (compare_op);
16893 if (code != LTU && code != GEU)
16894 return false;
16896 emit_insn (compare_seq);
16897 *pop = compare_op;
16898 return true;
16901 if (!INTEGRAL_MODE_P (mode))
16902 return false;
16904 switch (code)
16906 case LTU:
16907 case GEU:
16908 break;
16910 /* Convert a==0 into (unsigned)a<1. */
16911 case EQ:
16912 case NE:
16913 if (op1 != const0_rtx)
16914 return false;
16915 op1 = const1_rtx;
16916 code = (code == EQ ? LTU : GEU);
16917 break;
16919 /* Convert a>b into b<a or a>=b-1. */
16920 case GTU:
16921 case LEU:
16922 if (CONST_INT_P (op1))
16924 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16925 /* Bail out on overflow. We still can swap operands but that
16926 would force loading of the constant into register. */
16927 if (op1 == const0_rtx
16928 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16929 return false;
16930 code = (code == GTU ? GEU : LTU);
16932 else
16934 rtx tmp = op1;
16935 op1 = op0;
16936 op0 = tmp;
16937 code = (code == GTU ? LTU : GEU);
16939 break;
16941 /* Convert a>=0 into (unsigned)a<0x80000000. */
16942 case LT:
16943 case GE:
16944 if (mode == DImode || op1 != const0_rtx)
16945 return false;
16946 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16947 code = (code == LT ? GEU : LTU);
16948 break;
16949 case LE:
16950 case GT:
16951 if (mode == DImode || op1 != constm1_rtx)
16952 return false;
16953 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16954 code = (code == LE ? GEU : LTU);
16955 break;
16957 default:
16958 return false;
16960 /* Swapping operands may cause constant to appear as first operand. */
16961 if (!nonimmediate_operand (op0, VOIDmode))
16963 if (!can_create_pseudo_p ())
16964 return false;
16965 op0 = force_reg (mode, op0);
16967 *pop = ix86_expand_compare (code, op0, op1);
16968 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16969 return true;
16972 bool
16973 ix86_expand_int_movcc (rtx operands[])
16975 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16976 rtx compare_seq, compare_op;
16977 enum machine_mode mode = GET_MODE (operands[0]);
16978 bool sign_bit_compare_p = false;
16979 rtx op0 = XEXP (operands[1], 0);
16980 rtx op1 = XEXP (operands[1], 1);
16982 start_sequence ();
16983 compare_op = ix86_expand_compare (code, op0, op1);
16984 compare_seq = get_insns ();
16985 end_sequence ();
16987 compare_code = GET_CODE (compare_op);
16989 if ((op1 == const0_rtx && (code == GE || code == LT))
16990 || (op1 == constm1_rtx && (code == GT || code == LE)))
16991 sign_bit_compare_p = true;
16993 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16994 HImode insns, we'd be swallowed in word prefix ops. */
16996 if ((mode != HImode || TARGET_FAST_PREFIX)
16997 && (mode != (TARGET_64BIT ? TImode : DImode))
16998 && CONST_INT_P (operands[2])
16999 && CONST_INT_P (operands[3]))
17001 rtx out = operands[0];
17002 HOST_WIDE_INT ct = INTVAL (operands[2]);
17003 HOST_WIDE_INT cf = INTVAL (operands[3]);
17004 HOST_WIDE_INT diff;
17006 diff = ct - cf;
17007 /* Sign bit compares are better done using shifts than we do by using
17008 sbb. */
17009 if (sign_bit_compare_p
17010 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17012 /* Detect overlap between destination and compare sources. */
17013 rtx tmp = out;
17015 if (!sign_bit_compare_p)
17017 rtx flags;
17018 bool fpcmp = false;
17020 compare_code = GET_CODE (compare_op);
17022 flags = XEXP (compare_op, 0);
17024 if (GET_MODE (flags) == CCFPmode
17025 || GET_MODE (flags) == CCFPUmode)
17027 fpcmp = true;
17028 compare_code
17029 = ix86_fp_compare_code_to_integer (compare_code);
17032 /* To simplify rest of code, restrict to the GEU case. */
17033 if (compare_code == LTU)
17035 HOST_WIDE_INT tmp = ct;
17036 ct = cf;
17037 cf = tmp;
17038 compare_code = reverse_condition (compare_code);
17039 code = reverse_condition (code);
17041 else
17043 if (fpcmp)
17044 PUT_CODE (compare_op,
17045 reverse_condition_maybe_unordered
17046 (GET_CODE (compare_op)));
17047 else
17048 PUT_CODE (compare_op,
17049 reverse_condition (GET_CODE (compare_op)));
17051 diff = ct - cf;
17053 if (reg_overlap_mentioned_p (out, op0)
17054 || reg_overlap_mentioned_p (out, op1))
17055 tmp = gen_reg_rtx (mode);
17057 if (mode == DImode)
17058 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17059 else
17060 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17061 flags, compare_op));
17063 else
17065 if (code == GT || code == GE)
17066 code = reverse_condition (code);
17067 else
17069 HOST_WIDE_INT tmp = ct;
17070 ct = cf;
17071 cf = tmp;
17072 diff = ct - cf;
17074 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17077 if (diff == 1)
17080 * cmpl op0,op1
17081 * sbbl dest,dest
17082 * [addl dest, ct]
17084 * Size 5 - 8.
17086 if (ct)
17087 tmp = expand_simple_binop (mode, PLUS,
17088 tmp, GEN_INT (ct),
17089 copy_rtx (tmp), 1, OPTAB_DIRECT);
17091 else if (cf == -1)
17094 * cmpl op0,op1
17095 * sbbl dest,dest
17096 * orl $ct, dest
17098 * Size 8.
17100 tmp = expand_simple_binop (mode, IOR,
17101 tmp, GEN_INT (ct),
17102 copy_rtx (tmp), 1, OPTAB_DIRECT);
17104 else if (diff == -1 && ct)
17107 * cmpl op0,op1
17108 * sbbl dest,dest
17109 * notl dest
17110 * [addl dest, cf]
17112 * Size 8 - 11.
17114 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17115 if (cf)
17116 tmp = expand_simple_binop (mode, PLUS,
17117 copy_rtx (tmp), GEN_INT (cf),
17118 copy_rtx (tmp), 1, OPTAB_DIRECT);
17120 else
17123 * cmpl op0,op1
17124 * sbbl dest,dest
17125 * [notl dest]
17126 * andl cf - ct, dest
17127 * [addl dest, ct]
17129 * Size 8 - 11.
17132 if (cf == 0)
17134 cf = ct;
17135 ct = 0;
17136 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17139 tmp = expand_simple_binop (mode, AND,
17140 copy_rtx (tmp),
17141 gen_int_mode (cf - ct, mode),
17142 copy_rtx (tmp), 1, OPTAB_DIRECT);
17143 if (ct)
17144 tmp = expand_simple_binop (mode, PLUS,
17145 copy_rtx (tmp), GEN_INT (ct),
17146 copy_rtx (tmp), 1, OPTAB_DIRECT);
17149 if (!rtx_equal_p (tmp, out))
17150 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17152 return true;
17155 if (diff < 0)
17157 enum machine_mode cmp_mode = GET_MODE (op0);
17159 HOST_WIDE_INT tmp;
17160 tmp = ct, ct = cf, cf = tmp;
17161 diff = -diff;
17163 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17165 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17167 /* We may be reversing unordered compare to normal compare, that
17168 is not valid in general (we may convert non-trapping condition
17169 to trapping one), however on i386 we currently emit all
17170 comparisons unordered. */
17171 compare_code = reverse_condition_maybe_unordered (compare_code);
17172 code = reverse_condition_maybe_unordered (code);
17174 else
17176 compare_code = reverse_condition (compare_code);
17177 code = reverse_condition (code);
17181 compare_code = UNKNOWN;
17182 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17183 && CONST_INT_P (op1))
17185 if (op1 == const0_rtx
17186 && (code == LT || code == GE))
17187 compare_code = code;
17188 else if (op1 == constm1_rtx)
17190 if (code == LE)
17191 compare_code = LT;
17192 else if (code == GT)
17193 compare_code = GE;
17197 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17198 if (compare_code != UNKNOWN
17199 && GET_MODE (op0) == GET_MODE (out)
17200 && (cf == -1 || ct == -1))
17202 /* If lea code below could be used, only optimize
17203 if it results in a 2 insn sequence. */
17205 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17206 || diff == 3 || diff == 5 || diff == 9)
17207 || (compare_code == LT && ct == -1)
17208 || (compare_code == GE && cf == -1))
17211 * notl op1 (if necessary)
17212 * sarl $31, op1
17213 * orl cf, op1
17215 if (ct != -1)
17217 cf = ct;
17218 ct = -1;
17219 code = reverse_condition (code);
17222 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17224 out = expand_simple_binop (mode, IOR,
17225 out, GEN_INT (cf),
17226 out, 1, OPTAB_DIRECT);
17227 if (out != operands[0])
17228 emit_move_insn (operands[0], out);
17230 return true;
17235 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17236 || diff == 3 || diff == 5 || diff == 9)
17237 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17238 && (mode != DImode
17239 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17242 * xorl dest,dest
17243 * cmpl op1,op2
17244 * setcc dest
17245 * lea cf(dest*(ct-cf)),dest
17247 * Size 14.
17249 * This also catches the degenerate setcc-only case.
17252 rtx tmp;
17253 int nops;
17255 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17257 nops = 0;
17258 /* On x86_64 the lea instruction operates on Pmode, so we need
17259 to get arithmetics done in proper mode to match. */
17260 if (diff == 1)
17261 tmp = copy_rtx (out);
17262 else
17264 rtx out1;
17265 out1 = copy_rtx (out);
17266 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17267 nops++;
17268 if (diff & 1)
17270 tmp = gen_rtx_PLUS (mode, tmp, out1);
17271 nops++;
17274 if (cf != 0)
17276 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17277 nops++;
17279 if (!rtx_equal_p (tmp, out))
17281 if (nops == 1)
17282 out = force_operand (tmp, copy_rtx (out));
17283 else
17284 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17286 if (!rtx_equal_p (out, operands[0]))
17287 emit_move_insn (operands[0], copy_rtx (out));
17289 return true;
17293 * General case: Jumpful:
17294 * xorl dest,dest cmpl op1, op2
17295 * cmpl op1, op2 movl ct, dest
17296 * setcc dest jcc 1f
17297 * decl dest movl cf, dest
17298 * andl (cf-ct),dest 1:
17299 * addl ct,dest
17301 * Size 20. Size 14.
17303 * This is reasonably steep, but branch mispredict costs are
17304 * high on modern cpus, so consider failing only if optimizing
17305 * for space.
17308 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17309 && BRANCH_COST (optimize_insn_for_speed_p (),
17310 false) >= 2)
17312 if (cf == 0)
17314 enum machine_mode cmp_mode = GET_MODE (op0);
17316 cf = ct;
17317 ct = 0;
17319 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17321 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17323 /* We may be reversing unordered compare to normal compare,
17324 that is not valid in general (we may convert non-trapping
17325 condition to trapping one), however on i386 we currently
17326 emit all comparisons unordered. */
17327 code = reverse_condition_maybe_unordered (code);
17329 else
17331 code = reverse_condition (code);
17332 if (compare_code != UNKNOWN)
17333 compare_code = reverse_condition (compare_code);
17337 if (compare_code != UNKNOWN)
17339 /* notl op1 (if needed)
17340 sarl $31, op1
17341 andl (cf-ct), op1
17342 addl ct, op1
17344 For x < 0 (resp. x <= -1) there will be no notl,
17345 so if possible swap the constants to get rid of the
17346 complement.
17347 True/false will be -1/0 while code below (store flag
17348 followed by decrement) is 0/-1, so the constants need
17349 to be exchanged once more. */
17351 if (compare_code == GE || !cf)
17353 code = reverse_condition (code);
17354 compare_code = LT;
17356 else
17358 HOST_WIDE_INT tmp = cf;
17359 cf = ct;
17360 ct = tmp;
17363 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17365 else
17367 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17369 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17370 constm1_rtx,
17371 copy_rtx (out), 1, OPTAB_DIRECT);
17374 out = expand_simple_binop (mode, AND, copy_rtx (out),
17375 gen_int_mode (cf - ct, mode),
17376 copy_rtx (out), 1, OPTAB_DIRECT);
17377 if (ct)
17378 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17379 copy_rtx (out), 1, OPTAB_DIRECT);
17380 if (!rtx_equal_p (out, operands[0]))
17381 emit_move_insn (operands[0], copy_rtx (out));
17383 return true;
17387 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17389 /* Try a few things more with specific constants and a variable. */
17391 optab op;
17392 rtx var, orig_out, out, tmp;
17394 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17395 return false;
17397 /* If one of the two operands is an interesting constant, load a
17398 constant with the above and mask it in with a logical operation. */
17400 if (CONST_INT_P (operands[2]))
17402 var = operands[3];
17403 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17404 operands[3] = constm1_rtx, op = and_optab;
17405 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17406 operands[3] = const0_rtx, op = ior_optab;
17407 else
17408 return false;
17410 else if (CONST_INT_P (operands[3]))
17412 var = operands[2];
17413 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
17414 operands[2] = constm1_rtx, op = and_optab;
17415 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
17416 operands[2] = const0_rtx, op = ior_optab;
17417 else
17418 return false;
17420 else
17421 return false;
17423 orig_out = operands[0];
17424 tmp = gen_reg_rtx (mode);
17425 operands[0] = tmp;
17427 /* Recurse to get the constant loaded. */
17428 if (ix86_expand_int_movcc (operands) == 0)
17429 return false;
17431 /* Mask in the interesting variable. */
17432 out = expand_binop (mode, op, var, tmp, orig_out, 0,
17433 OPTAB_WIDEN);
17434 if (!rtx_equal_p (out, orig_out))
17435 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
17437 return true;
17441 * For comparison with above,
17443 * movl cf,dest
17444 * movl ct,tmp
17445 * cmpl op1,op2
17446 * cmovcc tmp,dest
17448 * Size 15.
17451 if (! nonimmediate_operand (operands[2], mode))
17452 operands[2] = force_reg (mode, operands[2]);
17453 if (! nonimmediate_operand (operands[3], mode))
17454 operands[3] = force_reg (mode, operands[3]);
17456 if (! register_operand (operands[2], VOIDmode)
17457 && (mode == QImode
17458 || ! register_operand (operands[3], VOIDmode)))
17459 operands[2] = force_reg (mode, operands[2]);
17461 if (mode == QImode
17462 && ! register_operand (operands[3], VOIDmode))
17463 operands[3] = force_reg (mode, operands[3]);
17465 emit_insn (compare_seq);
17466 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17467 gen_rtx_IF_THEN_ELSE (mode,
17468 compare_op, operands[2],
17469 operands[3])));
17470 return true;
17473 /* Swap, force into registers, or otherwise massage the two operands
17474 to an sse comparison with a mask result. Thus we differ a bit from
17475 ix86_prepare_fp_compare_args which expects to produce a flags result.
17477 The DEST operand exists to help determine whether to commute commutative
17478 operators. The POP0/POP1 operands are updated in place. The new
17479 comparison code is returned, or UNKNOWN if not implementable. */
17481 static enum rtx_code
17482 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
17483 rtx *pop0, rtx *pop1)
17485 rtx tmp;
17487 switch (code)
17489 case LTGT:
17490 case UNEQ:
17491 /* We have no LTGT as an operator. We could implement it with
17492 NE & ORDERED, but this requires an extra temporary. It's
17493 not clear that it's worth it. */
17494 return UNKNOWN;
17496 case LT:
17497 case LE:
17498 case UNGT:
17499 case UNGE:
17500 /* These are supported directly. */
17501 break;
17503 case EQ:
17504 case NE:
17505 case UNORDERED:
17506 case ORDERED:
17507 /* For commutative operators, try to canonicalize the destination
17508 operand to be first in the comparison - this helps reload to
17509 avoid extra moves. */
17510 if (!dest || !rtx_equal_p (dest, *pop1))
17511 break;
17512 /* FALLTHRU */
17514 case GE:
17515 case GT:
17516 case UNLE:
17517 case UNLT:
17518 /* These are not supported directly. Swap the comparison operands
17519 to transform into something that is supported. */
17520 tmp = *pop0;
17521 *pop0 = *pop1;
17522 *pop1 = tmp;
17523 code = swap_condition (code);
17524 break;
17526 default:
17527 gcc_unreachable ();
17530 return code;
17533 /* Detect conditional moves that exactly match min/max operational
17534 semantics. Note that this is IEEE safe, as long as we don't
17535 interchange the operands.
17537 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17538 and TRUE if the operation is successful and instructions are emitted. */
17540 static bool
17541 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17542 rtx cmp_op1, rtx if_true, rtx if_false)
17544 enum machine_mode mode;
17545 bool is_min;
17546 rtx tmp;
17548 if (code == LT)
17550 else if (code == UNGE)
17552 tmp = if_true;
17553 if_true = if_false;
17554 if_false = tmp;
17556 else
17557 return false;
17559 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17560 is_min = true;
17561 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17562 is_min = false;
17563 else
17564 return false;
17566 mode = GET_MODE (dest);
17568 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17569 but MODE may be a vector mode and thus not appropriate. */
17570 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17572 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17573 rtvec v;
17575 if_true = force_reg (mode, if_true);
17576 v = gen_rtvec (2, if_true, if_false);
17577 tmp = gen_rtx_UNSPEC (mode, v, u);
17579 else
17581 code = is_min ? SMIN : SMAX;
17582 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17585 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17586 return true;
17589 /* Expand an sse vector comparison. Return the register with the result. */
17591 static rtx
17592 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17593 rtx op_true, rtx op_false)
17595 enum machine_mode mode = GET_MODE (dest);
17596 rtx x;
17598 cmp_op0 = force_reg (mode, cmp_op0);
17599 if (!nonimmediate_operand (cmp_op1, mode))
17600 cmp_op1 = force_reg (mode, cmp_op1);
17602 if (optimize
17603 || reg_overlap_mentioned_p (dest, op_true)
17604 || reg_overlap_mentioned_p (dest, op_false))
17605 dest = gen_reg_rtx (mode);
17607 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17608 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17610 return dest;
17613 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17614 operations. This is used for both scalar and vector conditional moves. */
17616 static void
17617 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17619 enum machine_mode mode = GET_MODE (dest);
17620 rtx t2, t3, x;
17622 if (op_false == CONST0_RTX (mode))
17624 op_true = force_reg (mode, op_true);
17625 x = gen_rtx_AND (mode, cmp, op_true);
17626 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17628 else if (op_true == CONST0_RTX (mode))
17630 op_false = force_reg (mode, op_false);
17631 x = gen_rtx_NOT (mode, cmp);
17632 x = gen_rtx_AND (mode, x, op_false);
17633 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17635 else if (TARGET_XOP)
17637 rtx pcmov = gen_rtx_SET (mode, dest,
17638 gen_rtx_IF_THEN_ELSE (mode, cmp,
17639 op_true,
17640 op_false));
17641 emit_insn (pcmov);
17643 else
17645 op_true = force_reg (mode, op_true);
17646 op_false = force_reg (mode, op_false);
17648 t2 = gen_reg_rtx (mode);
17649 if (optimize)
17650 t3 = gen_reg_rtx (mode);
17651 else
17652 t3 = dest;
17654 x = gen_rtx_AND (mode, op_true, cmp);
17655 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17657 x = gen_rtx_NOT (mode, cmp);
17658 x = gen_rtx_AND (mode, x, op_false);
17659 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17661 x = gen_rtx_IOR (mode, t3, t2);
17662 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17666 /* Expand a floating-point conditional move. Return true if successful. */
17668 bool
17669 ix86_expand_fp_movcc (rtx operands[])
17671 enum machine_mode mode = GET_MODE (operands[0]);
17672 enum rtx_code code = GET_CODE (operands[1]);
17673 rtx tmp, compare_op;
17674 rtx op0 = XEXP (operands[1], 0);
17675 rtx op1 = XEXP (operands[1], 1);
17677 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17679 enum machine_mode cmode;
17681 /* Since we've no cmove for sse registers, don't force bad register
17682 allocation just to gain access to it. Deny movcc when the
17683 comparison mode doesn't match the move mode. */
17684 cmode = GET_MODE (op0);
17685 if (cmode == VOIDmode)
17686 cmode = GET_MODE (op1);
17687 if (cmode != mode)
17688 return false;
17690 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17691 if (code == UNKNOWN)
17692 return false;
17694 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17695 operands[2], operands[3]))
17696 return true;
17698 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17699 operands[2], operands[3]);
17700 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17701 return true;
17704 /* The floating point conditional move instructions don't directly
17705 support conditions resulting from a signed integer comparison. */
17707 compare_op = ix86_expand_compare (code, op0, op1);
17708 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17710 tmp = gen_reg_rtx (QImode);
17711 ix86_expand_setcc (tmp, code, op0, op1);
17713 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17716 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17717 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17718 operands[2], operands[3])));
17720 return true;
17723 /* Expand a floating-point vector conditional move; a vcond operation
17724 rather than a movcc operation. */
17726 bool
17727 ix86_expand_fp_vcond (rtx operands[])
17729 enum rtx_code code = GET_CODE (operands[3]);
17730 rtx cmp;
17732 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17733 &operands[4], &operands[5]);
17734 if (code == UNKNOWN)
17735 return false;
17737 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17738 operands[5], operands[1], operands[2]))
17739 return true;
17741 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17742 operands[1], operands[2]);
17743 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17744 return true;
17747 /* Expand a signed/unsigned integral vector conditional move. */
17749 bool
17750 ix86_expand_int_vcond (rtx operands[])
17752 enum machine_mode mode = GET_MODE (operands[0]);
17753 enum rtx_code code = GET_CODE (operands[3]);
17754 bool negate = false;
17755 rtx x, cop0, cop1;
17757 cop0 = operands[4];
17758 cop1 = operands[5];
17760 /* XOP supports all of the comparisons on all vector int types. */
17761 if (!TARGET_XOP)
17763 /* Canonicalize the comparison to EQ, GT, GTU. */
17764 switch (code)
17766 case EQ:
17767 case GT:
17768 case GTU:
17769 break;
17771 case NE:
17772 case LE:
17773 case LEU:
17774 code = reverse_condition (code);
17775 negate = true;
17776 break;
17778 case GE:
17779 case GEU:
17780 code = reverse_condition (code);
17781 negate = true;
17782 /* FALLTHRU */
17784 case LT:
17785 case LTU:
17786 code = swap_condition (code);
17787 x = cop0, cop0 = cop1, cop1 = x;
17788 break;
17790 default:
17791 gcc_unreachable ();
17794 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17795 if (mode == V2DImode)
17797 switch (code)
17799 case EQ:
17800 /* SSE4.1 supports EQ. */
17801 if (!TARGET_SSE4_1)
17802 return false;
17803 break;
17805 case GT:
17806 case GTU:
17807 /* SSE4.2 supports GT/GTU. */
17808 if (!TARGET_SSE4_2)
17809 return false;
17810 break;
17812 default:
17813 gcc_unreachable ();
17817 /* Unsigned parallel compare is not supported by the hardware.
17818 Play some tricks to turn this into a signed comparison
17819 against 0. */
17820 if (code == GTU)
17822 cop0 = force_reg (mode, cop0);
17824 switch (mode)
17826 case V4SImode:
17827 case V2DImode:
17829 rtx t1, t2, mask;
17830 rtx (*gen_sub3) (rtx, rtx, rtx);
17832 /* Subtract (-(INT MAX) - 1) from both operands to make
17833 them signed. */
17834 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17835 true, false);
17836 gen_sub3 = (mode == V4SImode
17837 ? gen_subv4si3 : gen_subv2di3);
17838 t1 = gen_reg_rtx (mode);
17839 emit_insn (gen_sub3 (t1, cop0, mask));
17841 t2 = gen_reg_rtx (mode);
17842 emit_insn (gen_sub3 (t2, cop1, mask));
17844 cop0 = t1;
17845 cop1 = t2;
17846 code = GT;
17848 break;
17850 case V16QImode:
17851 case V8HImode:
17852 /* Perform a parallel unsigned saturating subtraction. */
17853 x = gen_reg_rtx (mode);
17854 emit_insn (gen_rtx_SET (VOIDmode, x,
17855 gen_rtx_US_MINUS (mode, cop0, cop1)));
17857 cop0 = x;
17858 cop1 = CONST0_RTX (mode);
17859 code = EQ;
17860 negate = !negate;
17861 break;
17863 default:
17864 gcc_unreachable ();
17869 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17870 operands[1+negate], operands[2-negate]);
17872 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17873 operands[2-negate]);
17874 return true;
17877 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17878 true if we should do zero extension, else sign extension. HIGH_P is
17879 true if we want the N/2 high elements, else the low elements. */
17881 void
17882 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17884 enum machine_mode imode = GET_MODE (operands[1]);
17885 rtx (*unpack)(rtx, rtx, rtx);
17886 rtx se, dest;
17888 switch (imode)
17890 case V16QImode:
17891 if (high_p)
17892 unpack = gen_vec_interleave_highv16qi;
17893 else
17894 unpack = gen_vec_interleave_lowv16qi;
17895 break;
17896 case V8HImode:
17897 if (high_p)
17898 unpack = gen_vec_interleave_highv8hi;
17899 else
17900 unpack = gen_vec_interleave_lowv8hi;
17901 break;
17902 case V4SImode:
17903 if (high_p)
17904 unpack = gen_vec_interleave_highv4si;
17905 else
17906 unpack = gen_vec_interleave_lowv4si;
17907 break;
17908 default:
17909 gcc_unreachable ();
17912 dest = gen_lowpart (imode, operands[0]);
17914 if (unsigned_p)
17915 se = force_reg (imode, CONST0_RTX (imode));
17916 else
17917 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17918 operands[1], pc_rtx, pc_rtx);
17920 emit_insn (unpack (dest, operands[1], se));
17923 /* This function performs the same task as ix86_expand_sse_unpack,
17924 but with SSE4.1 instructions. */
17926 void
17927 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17929 enum machine_mode imode = GET_MODE (operands[1]);
17930 rtx (*unpack)(rtx, rtx);
17931 rtx src, dest;
17933 switch (imode)
17935 case V16QImode:
17936 if (unsigned_p)
17937 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17938 else
17939 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
17940 break;
17941 case V8HImode:
17942 if (unsigned_p)
17943 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17944 else
17945 unpack = gen_sse4_1_sign_extendv4hiv4si2;
17946 break;
17947 case V4SImode:
17948 if (unsigned_p)
17949 unpack = gen_sse4_1_zero_extendv2siv2di2;
17950 else
17951 unpack = gen_sse4_1_sign_extendv2siv2di2;
17952 break;
17953 default:
17954 gcc_unreachable ();
17957 dest = operands[0];
17958 if (high_p)
17960 /* Shift higher 8 bytes to lower 8 bytes. */
17961 src = gen_reg_rtx (imode);
17962 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17963 gen_lowpart (V1TImode, operands[1]),
17964 GEN_INT (64)));
17966 else
17967 src = operands[1];
17969 emit_insn (unpack (dest, src));
17972 /* Expand conditional increment or decrement using adb/sbb instructions.
17973 The default case using setcc followed by the conditional move can be
17974 done by generic code. */
17975 bool
17976 ix86_expand_int_addcc (rtx operands[])
17978 enum rtx_code code = GET_CODE (operands[1]);
17979 rtx flags;
17980 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17981 rtx compare_op;
17982 rtx val = const0_rtx;
17983 bool fpcmp = false;
17984 enum machine_mode mode;
17985 rtx op0 = XEXP (operands[1], 0);
17986 rtx op1 = XEXP (operands[1], 1);
17988 if (operands[3] != const1_rtx
17989 && operands[3] != constm1_rtx)
17990 return false;
17991 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17992 return false;
17993 code = GET_CODE (compare_op);
17995 flags = XEXP (compare_op, 0);
17997 if (GET_MODE (flags) == CCFPmode
17998 || GET_MODE (flags) == CCFPUmode)
18000 fpcmp = true;
18001 code = ix86_fp_compare_code_to_integer (code);
18004 if (code != LTU)
18006 val = constm1_rtx;
18007 if (fpcmp)
18008 PUT_CODE (compare_op,
18009 reverse_condition_maybe_unordered
18010 (GET_CODE (compare_op)));
18011 else
18012 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18015 mode = GET_MODE (operands[0]);
18017 /* Construct either adc or sbb insn. */
18018 if ((code == LTU) == (operands[3] == constm1_rtx))
18020 switch (mode)
18022 case QImode:
18023 insn = gen_subqi3_carry;
18024 break;
18025 case HImode:
18026 insn = gen_subhi3_carry;
18027 break;
18028 case SImode:
18029 insn = gen_subsi3_carry;
18030 break;
18031 case DImode:
18032 insn = gen_subdi3_carry;
18033 break;
18034 default:
18035 gcc_unreachable ();
18038 else
18040 switch (mode)
18042 case QImode:
18043 insn = gen_addqi3_carry;
18044 break;
18045 case HImode:
18046 insn = gen_addhi3_carry;
18047 break;
18048 case SImode:
18049 insn = gen_addsi3_carry;
18050 break;
18051 case DImode:
18052 insn = gen_adddi3_carry;
18053 break;
18054 default:
18055 gcc_unreachable ();
18058 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18060 return true;
18064 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
18065 works for floating pointer parameters and nonoffsetable memories.
18066 For pushes, it returns just stack offsets; the values will be saved
18067 in the right order. Maximally three parts are generated. */
18069 static int
18070 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18072 int size;
18074 if (!TARGET_64BIT)
18075 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18076 else
18077 size = (GET_MODE_SIZE (mode) + 4) / 8;
18079 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18080 gcc_assert (size >= 2 && size <= 4);
18082 /* Optimize constant pool reference to immediates. This is used by fp
18083 moves, that force all constants to memory to allow combining. */
18084 if (MEM_P (operand) && MEM_READONLY_P (operand))
18086 rtx tmp = maybe_get_pool_constant (operand);
18087 if (tmp)
18088 operand = tmp;
18091 if (MEM_P (operand) && !offsettable_memref_p (operand))
18093 /* The only non-offsetable memories we handle are pushes. */
18094 int ok = push_operand (operand, VOIDmode);
18096 gcc_assert (ok);
18098 operand = copy_rtx (operand);
18099 PUT_MODE (operand, Pmode);
18100 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18101 return size;
18104 if (GET_CODE (operand) == CONST_VECTOR)
18106 enum machine_mode imode = int_mode_for_mode (mode);
18107 /* Caution: if we looked through a constant pool memory above,
18108 the operand may actually have a different mode now. That's
18109 ok, since we want to pun this all the way back to an integer. */
18110 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18111 gcc_assert (operand != NULL);
18112 mode = imode;
18115 if (!TARGET_64BIT)
18117 if (mode == DImode)
18118 split_di (&operand, 1, &parts[0], &parts[1]);
18119 else
18121 int i;
18123 if (REG_P (operand))
18125 gcc_assert (reload_completed);
18126 for (i = 0; i < size; i++)
18127 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18129 else if (offsettable_memref_p (operand))
18131 operand = adjust_address (operand, SImode, 0);
18132 parts[0] = operand;
18133 for (i = 1; i < size; i++)
18134 parts[i] = adjust_address (operand, SImode, 4 * i);
18136 else if (GET_CODE (operand) == CONST_DOUBLE)
18138 REAL_VALUE_TYPE r;
18139 long l[4];
18141 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18142 switch (mode)
18144 case TFmode:
18145 real_to_target (l, &r, mode);
18146 parts[3] = gen_int_mode (l[3], SImode);
18147 parts[2] = gen_int_mode (l[2], SImode);
18148 break;
18149 case XFmode:
18150 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18151 parts[2] = gen_int_mode (l[2], SImode);
18152 break;
18153 case DFmode:
18154 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18155 break;
18156 default:
18157 gcc_unreachable ();
18159 parts[1] = gen_int_mode (l[1], SImode);
18160 parts[0] = gen_int_mode (l[0], SImode);
18162 else
18163 gcc_unreachable ();
18166 else
18168 if (mode == TImode)
18169 split_ti (&operand, 1, &parts[0], &parts[1]);
18170 if (mode == XFmode || mode == TFmode)
18172 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18173 if (REG_P (operand))
18175 gcc_assert (reload_completed);
18176 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18177 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18179 else if (offsettable_memref_p (operand))
18181 operand = adjust_address (operand, DImode, 0);
18182 parts[0] = operand;
18183 parts[1] = adjust_address (operand, upper_mode, 8);
18185 else if (GET_CODE (operand) == CONST_DOUBLE)
18187 REAL_VALUE_TYPE r;
18188 long l[4];
18190 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18191 real_to_target (l, &r, mode);
18193 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18194 if (HOST_BITS_PER_WIDE_INT >= 64)
18195 parts[0]
18196 = gen_int_mode
18197 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18198 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18199 DImode);
18200 else
18201 parts[0] = immed_double_const (l[0], l[1], DImode);
18203 if (upper_mode == SImode)
18204 parts[1] = gen_int_mode (l[2], SImode);
18205 else if (HOST_BITS_PER_WIDE_INT >= 64)
18206 parts[1]
18207 = gen_int_mode
18208 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18209 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18210 DImode);
18211 else
18212 parts[1] = immed_double_const (l[2], l[3], DImode);
18214 else
18215 gcc_unreachable ();
18219 return size;
18222 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18223 Return false when normal moves are needed; true when all required
18224 insns have been emitted. Operands 2-4 contain the input values
18225 int the correct order; operands 5-7 contain the output values. */
18227 void
18228 ix86_split_long_move (rtx operands[])
18230 rtx part[2][4];
18231 int nparts, i, j;
18232 int push = 0;
18233 int collisions = 0;
18234 enum machine_mode mode = GET_MODE (operands[0]);
18235 bool collisionparts[4];
18237 /* The DFmode expanders may ask us to move double.
18238 For 64bit target this is single move. By hiding the fact
18239 here we simplify i386.md splitters. */
18240 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
18242 /* Optimize constant pool reference to immediates. This is used by
18243 fp moves, that force all constants to memory to allow combining. */
18245 if (MEM_P (operands[1])
18246 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18247 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18248 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18249 if (push_operand (operands[0], VOIDmode))
18251 operands[0] = copy_rtx (operands[0]);
18252 PUT_MODE (operands[0], Pmode);
18254 else
18255 operands[0] = gen_lowpart (DImode, operands[0]);
18256 operands[1] = gen_lowpart (DImode, operands[1]);
18257 emit_move_insn (operands[0], operands[1]);
18258 return;
18261 /* The only non-offsettable memory we handle is push. */
18262 if (push_operand (operands[0], VOIDmode))
18263 push = 1;
18264 else
18265 gcc_assert (!MEM_P (operands[0])
18266 || offsettable_memref_p (operands[0]));
18268 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18269 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18271 /* When emitting push, take care for source operands on the stack. */
18272 if (push && MEM_P (operands[1])
18273 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18275 rtx src_base = XEXP (part[1][nparts - 1], 0);
18277 /* Compensate for the stack decrement by 4. */
18278 if (!TARGET_64BIT && nparts == 3
18279 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18280 src_base = plus_constant (src_base, 4);
18282 /* src_base refers to the stack pointer and is
18283 automatically decreased by emitted push. */
18284 for (i = 0; i < nparts; i++)
18285 part[1][i] = change_address (part[1][i],
18286 GET_MODE (part[1][i]), src_base);
18289 /* We need to do copy in the right order in case an address register
18290 of the source overlaps the destination. */
18291 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18293 rtx tmp;
18295 for (i = 0; i < nparts; i++)
18297 collisionparts[i]
18298 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18299 if (collisionparts[i])
18300 collisions++;
18303 /* Collision in the middle part can be handled by reordering. */
18304 if (collisions == 1 && nparts == 3 && collisionparts [1])
18306 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18307 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18309 else if (collisions == 1
18310 && nparts == 4
18311 && (collisionparts [1] || collisionparts [2]))
18313 if (collisionparts [1])
18315 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18316 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18318 else
18320 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18321 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18325 /* If there are more collisions, we can't handle it by reordering.
18326 Do an lea to the last part and use only one colliding move. */
18327 else if (collisions > 1)
18329 rtx base;
18331 collisions = 1;
18333 base = part[0][nparts - 1];
18335 /* Handle the case when the last part isn't valid for lea.
18336 Happens in 64-bit mode storing the 12-byte XFmode. */
18337 if (GET_MODE (base) != Pmode)
18338 base = gen_rtx_REG (Pmode, REGNO (base));
18340 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18341 part[1][0] = replace_equiv_address (part[1][0], base);
18342 for (i = 1; i < nparts; i++)
18344 tmp = plus_constant (base, UNITS_PER_WORD * i);
18345 part[1][i] = replace_equiv_address (part[1][i], tmp);
18350 if (push)
18352 if (!TARGET_64BIT)
18354 if (nparts == 3)
18356 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
18357 emit_insn (gen_addsi3 (stack_pointer_rtx,
18358 stack_pointer_rtx, GEN_INT (-4)));
18359 emit_move_insn (part[0][2], part[1][2]);
18361 else if (nparts == 4)
18363 emit_move_insn (part[0][3], part[1][3]);
18364 emit_move_insn (part[0][2], part[1][2]);
18367 else
18369 /* In 64bit mode we don't have 32bit push available. In case this is
18370 register, it is OK - we will just use larger counterpart. We also
18371 retype memory - these comes from attempt to avoid REX prefix on
18372 moving of second half of TFmode value. */
18373 if (GET_MODE (part[1][1]) == SImode)
18375 switch (GET_CODE (part[1][1]))
18377 case MEM:
18378 part[1][1] = adjust_address (part[1][1], DImode, 0);
18379 break;
18381 case REG:
18382 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18383 break;
18385 default:
18386 gcc_unreachable ();
18389 if (GET_MODE (part[1][0]) == SImode)
18390 part[1][0] = part[1][1];
18393 emit_move_insn (part[0][1], part[1][1]);
18394 emit_move_insn (part[0][0], part[1][0]);
18395 return;
18398 /* Choose correct order to not overwrite the source before it is copied. */
18399 if ((REG_P (part[0][0])
18400 && REG_P (part[1][1])
18401 && (REGNO (part[0][0]) == REGNO (part[1][1])
18402 || (nparts == 3
18403 && REGNO (part[0][0]) == REGNO (part[1][2]))
18404 || (nparts == 4
18405 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18406 || (collisions > 0
18407 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18409 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18411 operands[2 + i] = part[0][j];
18412 operands[6 + i] = part[1][j];
18415 else
18417 for (i = 0; i < nparts; i++)
18419 operands[2 + i] = part[0][i];
18420 operands[6 + i] = part[1][i];
18424 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
18425 if (optimize_insn_for_size_p ())
18427 for (j = 0; j < nparts - 1; j++)
18428 if (CONST_INT_P (operands[6 + j])
18429 && operands[6 + j] != const0_rtx
18430 && REG_P (operands[2 + j]))
18431 for (i = j; i < nparts - 1; i++)
18432 if (CONST_INT_P (operands[7 + i])
18433 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
18434 operands[7 + i] = operands[2 + j];
18437 for (i = 0; i < nparts; i++)
18438 emit_move_insn (operands[2 + i], operands[6 + i]);
18440 return;
18443 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
18444 left shift by a constant, either using a single shift or
18445 a sequence of add instructions. */
18447 static void
18448 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
18450 if (count == 1)
18452 emit_insn ((mode == DImode
18453 ? gen_addsi3
18454 : gen_adddi3) (operand, operand, operand));
18456 else if (!optimize_insn_for_size_p ()
18457 && count * ix86_cost->add <= ix86_cost->shift_const)
18459 int i;
18460 for (i=0; i<count; i++)
18462 emit_insn ((mode == DImode
18463 ? gen_addsi3
18464 : gen_adddi3) (operand, operand, operand));
18467 else
18468 emit_insn ((mode == DImode
18469 ? gen_ashlsi3
18470 : gen_ashldi3) (operand, operand, GEN_INT (count)));
18473 void
18474 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
18476 rtx low[2], high[2];
18477 int count;
18478 const int single_width = mode == DImode ? 32 : 64;
18480 if (CONST_INT_P (operands[2]))
18482 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18483 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18485 if (count >= single_width)
18487 emit_move_insn (high[0], low[1]);
18488 emit_move_insn (low[0], const0_rtx);
18490 if (count > single_width)
18491 ix86_expand_ashl_const (high[0], count - single_width, mode);
18493 else
18495 if (!rtx_equal_p (operands[0], operands[1]))
18496 emit_move_insn (operands[0], operands[1]);
18497 emit_insn ((mode == DImode
18498 ? gen_x86_shld
18499 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
18500 ix86_expand_ashl_const (low[0], count, mode);
18502 return;
18505 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18507 if (operands[1] == const1_rtx)
18509 /* Assuming we've chosen a QImode capable registers, then 1 << N
18510 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18511 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18513 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18515 ix86_expand_clear (low[0]);
18516 ix86_expand_clear (high[0]);
18517 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
18519 d = gen_lowpart (QImode, low[0]);
18520 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18521 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18522 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18524 d = gen_lowpart (QImode, high[0]);
18525 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18526 s = gen_rtx_NE (QImode, flags, const0_rtx);
18527 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18530 /* Otherwise, we can get the same results by manually performing
18531 a bit extract operation on bit 5/6, and then performing the two
18532 shifts. The two methods of getting 0/1 into low/high are exactly
18533 the same size. Avoiding the shift in the bit extract case helps
18534 pentium4 a bit; no one else seems to care much either way. */
18535 else
18537 rtx x;
18539 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18540 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
18541 else
18542 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
18543 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18545 emit_insn ((mode == DImode
18546 ? gen_lshrsi3
18547 : gen_lshrdi3) (high[0], high[0],
18548 GEN_INT (mode == DImode ? 5 : 6)));
18549 emit_insn ((mode == DImode
18550 ? gen_andsi3
18551 : gen_anddi3) (high[0], high[0], const1_rtx));
18552 emit_move_insn (low[0], high[0]);
18553 emit_insn ((mode == DImode
18554 ? gen_xorsi3
18555 : gen_xordi3) (low[0], low[0], const1_rtx));
18558 emit_insn ((mode == DImode
18559 ? gen_ashlsi3
18560 : gen_ashldi3) (low[0], low[0], operands[2]));
18561 emit_insn ((mode == DImode
18562 ? gen_ashlsi3
18563 : gen_ashldi3) (high[0], high[0], operands[2]));
18564 return;
18567 if (operands[1] == constm1_rtx)
18569 /* For -1 << N, we can avoid the shld instruction, because we
18570 know that we're shifting 0...31/63 ones into a -1. */
18571 emit_move_insn (low[0], constm1_rtx);
18572 if (optimize_insn_for_size_p ())
18573 emit_move_insn (high[0], low[0]);
18574 else
18575 emit_move_insn (high[0], constm1_rtx);
18577 else
18579 if (!rtx_equal_p (operands[0], operands[1]))
18580 emit_move_insn (operands[0], operands[1]);
18582 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18583 emit_insn ((mode == DImode
18584 ? gen_x86_shld
18585 : gen_x86_64_shld) (high[0], low[0], operands[2]));
18588 emit_insn ((mode == DImode
18589 ? gen_ashlsi3
18590 : gen_ashldi3) (low[0], low[0], operands[2]));
18592 if (TARGET_CMOVE && scratch)
18594 ix86_expand_clear (scratch);
18595 emit_insn ((mode == DImode
18596 ? gen_x86_shiftsi_adj_1
18597 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
18598 scratch));
18600 else
18601 emit_insn ((mode == DImode
18602 ? gen_x86_shiftsi_adj_2
18603 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
18606 void
18607 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18609 rtx low[2], high[2];
18610 int count;
18611 const int single_width = mode == DImode ? 32 : 64;
18613 if (CONST_INT_P (operands[2]))
18615 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18616 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18618 if (count == single_width * 2 - 1)
18620 emit_move_insn (high[0], high[1]);
18621 emit_insn ((mode == DImode
18622 ? gen_ashrsi3
18623 : gen_ashrdi3) (high[0], high[0],
18624 GEN_INT (single_width - 1)));
18625 emit_move_insn (low[0], high[0]);
18628 else if (count >= single_width)
18630 emit_move_insn (low[0], high[1]);
18631 emit_move_insn (high[0], low[0]);
18632 emit_insn ((mode == DImode
18633 ? gen_ashrsi3
18634 : gen_ashrdi3) (high[0], high[0],
18635 GEN_INT (single_width - 1)));
18636 if (count > single_width)
18637 emit_insn ((mode == DImode
18638 ? gen_ashrsi3
18639 : gen_ashrdi3) (low[0], low[0],
18640 GEN_INT (count - single_width)));
18642 else
18644 if (!rtx_equal_p (operands[0], operands[1]))
18645 emit_move_insn (operands[0], operands[1]);
18646 emit_insn ((mode == DImode
18647 ? gen_x86_shrd
18648 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18649 emit_insn ((mode == DImode
18650 ? gen_ashrsi3
18651 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
18654 else
18656 if (!rtx_equal_p (operands[0], operands[1]))
18657 emit_move_insn (operands[0], operands[1]);
18659 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18661 emit_insn ((mode == DImode
18662 ? gen_x86_shrd
18663 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18664 emit_insn ((mode == DImode
18665 ? gen_ashrsi3
18666 : gen_ashrdi3) (high[0], high[0], operands[2]));
18668 if (TARGET_CMOVE && scratch)
18670 emit_move_insn (scratch, high[0]);
18671 emit_insn ((mode == DImode
18672 ? gen_ashrsi3
18673 : gen_ashrdi3) (scratch, scratch,
18674 GEN_INT (single_width - 1)));
18675 emit_insn ((mode == DImode
18676 ? gen_x86_shiftsi_adj_1
18677 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18678 scratch));
18680 else
18681 emit_insn ((mode == DImode
18682 ? gen_x86_shiftsi_adj_3
18683 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
18687 void
18688 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18690 rtx low[2], high[2];
18691 int count;
18692 const int single_width = mode == DImode ? 32 : 64;
18694 if (CONST_INT_P (operands[2]))
18696 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18697 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18699 if (count >= single_width)
18701 emit_move_insn (low[0], high[1]);
18702 ix86_expand_clear (high[0]);
18704 if (count > single_width)
18705 emit_insn ((mode == DImode
18706 ? gen_lshrsi3
18707 : gen_lshrdi3) (low[0], low[0],
18708 GEN_INT (count - single_width)));
18710 else
18712 if (!rtx_equal_p (operands[0], operands[1]))
18713 emit_move_insn (operands[0], operands[1]);
18714 emit_insn ((mode == DImode
18715 ? gen_x86_shrd
18716 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18717 emit_insn ((mode == DImode
18718 ? gen_lshrsi3
18719 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18722 else
18724 if (!rtx_equal_p (operands[0], operands[1]))
18725 emit_move_insn (operands[0], operands[1]);
18727 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18729 emit_insn ((mode == DImode
18730 ? gen_x86_shrd
18731 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18732 emit_insn ((mode == DImode
18733 ? gen_lshrsi3
18734 : gen_lshrdi3) (high[0], high[0], operands[2]));
18736 /* Heh. By reversing the arguments, we can reuse this pattern. */
18737 if (TARGET_CMOVE && scratch)
18739 ix86_expand_clear (scratch);
18740 emit_insn ((mode == DImode
18741 ? gen_x86_shiftsi_adj_1
18742 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18743 scratch));
18745 else
18746 emit_insn ((mode == DImode
18747 ? gen_x86_shiftsi_adj_2
18748 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18752 /* Predict just emitted jump instruction to be taken with probability PROB. */
18753 static void
18754 predict_jump (int prob)
18756 rtx insn = get_last_insn ();
18757 gcc_assert (JUMP_P (insn));
18758 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18761 /* Helper function for the string operations below. Dest VARIABLE whether
18762 it is aligned to VALUE bytes. If true, jump to the label. */
18763 static rtx
18764 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18766 rtx label = gen_label_rtx ();
18767 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18768 if (GET_MODE (variable) == DImode)
18769 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18770 else
18771 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18772 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18773 1, label);
18774 if (epilogue)
18775 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18776 else
18777 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18778 return label;
18781 /* Adjust COUNTER by the VALUE. */
18782 static void
18783 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18785 if (GET_MODE (countreg) == DImode)
18786 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18787 else
18788 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18791 /* Zero extend possibly SImode EXP to Pmode register. */
18793 ix86_zero_extend_to_Pmode (rtx exp)
18795 rtx r;
18796 if (GET_MODE (exp) == VOIDmode)
18797 return force_reg (Pmode, exp);
18798 if (GET_MODE (exp) == Pmode)
18799 return copy_to_mode_reg (Pmode, exp);
18800 r = gen_reg_rtx (Pmode);
18801 emit_insn (gen_zero_extendsidi2 (r, exp));
18802 return r;
18805 /* Divide COUNTREG by SCALE. */
18806 static rtx
18807 scale_counter (rtx countreg, int scale)
18809 rtx sc;
18811 if (scale == 1)
18812 return countreg;
18813 if (CONST_INT_P (countreg))
18814 return GEN_INT (INTVAL (countreg) / scale);
18815 gcc_assert (REG_P (countreg));
18817 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18818 GEN_INT (exact_log2 (scale)),
18819 NULL, 1, OPTAB_DIRECT);
18820 return sc;
18823 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18824 DImode for constant loop counts. */
18826 static enum machine_mode
18827 counter_mode (rtx count_exp)
18829 if (GET_MODE (count_exp) != VOIDmode)
18830 return GET_MODE (count_exp);
18831 if (!CONST_INT_P (count_exp))
18832 return Pmode;
18833 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18834 return DImode;
18835 return SImode;
18838 /* When SRCPTR is non-NULL, output simple loop to move memory
18839 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18840 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18841 equivalent loop to set memory by VALUE (supposed to be in MODE).
18843 The size is rounded down to whole number of chunk size moved at once.
18844 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18847 static void
18848 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18849 rtx destptr, rtx srcptr, rtx value,
18850 rtx count, enum machine_mode mode, int unroll,
18851 int expected_size)
18853 rtx out_label, top_label, iter, tmp;
18854 enum machine_mode iter_mode = counter_mode (count);
18855 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18856 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18857 rtx size;
18858 rtx x_addr;
18859 rtx y_addr;
18860 int i;
18862 top_label = gen_label_rtx ();
18863 out_label = gen_label_rtx ();
18864 iter = gen_reg_rtx (iter_mode);
18866 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18867 NULL, 1, OPTAB_DIRECT);
18868 /* Those two should combine. */
18869 if (piece_size == const1_rtx)
18871 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18872 true, out_label);
18873 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18875 emit_move_insn (iter, const0_rtx);
18877 emit_label (top_label);
18879 tmp = convert_modes (Pmode, iter_mode, iter, true);
18880 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18881 destmem = change_address (destmem, mode, x_addr);
18883 if (srcmem)
18885 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18886 srcmem = change_address (srcmem, mode, y_addr);
18888 /* When unrolling for chips that reorder memory reads and writes,
18889 we can save registers by using single temporary.
18890 Also using 4 temporaries is overkill in 32bit mode. */
18891 if (!TARGET_64BIT && 0)
18893 for (i = 0; i < unroll; i++)
18895 if (i)
18897 destmem =
18898 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18899 srcmem =
18900 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18902 emit_move_insn (destmem, srcmem);
18905 else
18907 rtx tmpreg[4];
18908 gcc_assert (unroll <= 4);
18909 for (i = 0; i < unroll; i++)
18911 tmpreg[i] = gen_reg_rtx (mode);
18912 if (i)
18914 srcmem =
18915 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18917 emit_move_insn (tmpreg[i], srcmem);
18919 for (i = 0; i < unroll; i++)
18921 if (i)
18923 destmem =
18924 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18926 emit_move_insn (destmem, tmpreg[i]);
18930 else
18931 for (i = 0; i < unroll; i++)
18933 if (i)
18934 destmem =
18935 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18936 emit_move_insn (destmem, value);
18939 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18940 true, OPTAB_LIB_WIDEN);
18941 if (tmp != iter)
18942 emit_move_insn (iter, tmp);
18944 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18945 true, top_label);
18946 if (expected_size != -1)
18948 expected_size /= GET_MODE_SIZE (mode) * unroll;
18949 if (expected_size == 0)
18950 predict_jump (0);
18951 else if (expected_size > REG_BR_PROB_BASE)
18952 predict_jump (REG_BR_PROB_BASE - 1);
18953 else
18954 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18956 else
18957 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18958 iter = ix86_zero_extend_to_Pmode (iter);
18959 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18960 true, OPTAB_LIB_WIDEN);
18961 if (tmp != destptr)
18962 emit_move_insn (destptr, tmp);
18963 if (srcptr)
18965 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18966 true, OPTAB_LIB_WIDEN);
18967 if (tmp != srcptr)
18968 emit_move_insn (srcptr, tmp);
18970 emit_label (out_label);
18973 /* Output "rep; mov" instruction.
18974 Arguments have same meaning as for previous function */
18975 static void
18976 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18977 rtx destptr, rtx srcptr,
18978 rtx count,
18979 enum machine_mode mode)
18981 rtx destexp;
18982 rtx srcexp;
18983 rtx countreg;
18985 /* If the size is known, it is shorter to use rep movs. */
18986 if (mode == QImode && CONST_INT_P (count)
18987 && !(INTVAL (count) & 3))
18988 mode = SImode;
18990 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18991 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18992 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18993 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18994 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18995 if (mode != QImode)
18997 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18998 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18999 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19000 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19001 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19002 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19004 else
19006 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19007 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19009 if (CONST_INT_P (count))
19011 count = GEN_INT (INTVAL (count)
19012 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19013 destmem = shallow_copy_rtx (destmem);
19014 srcmem = shallow_copy_rtx (srcmem);
19015 set_mem_size (destmem, count);
19016 set_mem_size (srcmem, count);
19018 else
19020 if (MEM_SIZE (destmem))
19021 set_mem_size (destmem, NULL_RTX);
19022 if (MEM_SIZE (srcmem))
19023 set_mem_size (srcmem, NULL_RTX);
19025 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19026 destexp, srcexp));
19029 /* Output "rep; stos" instruction.
19030 Arguments have same meaning as for previous function */
19031 static void
19032 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19033 rtx count, enum machine_mode mode,
19034 rtx orig_value)
19036 rtx destexp;
19037 rtx countreg;
19039 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19040 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19041 value = force_reg (mode, gen_lowpart (mode, value));
19042 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19043 if (mode != QImode)
19045 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19046 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19047 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19049 else
19050 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19051 if (orig_value == const0_rtx && CONST_INT_P (count))
19053 count = GEN_INT (INTVAL (count)
19054 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19055 destmem = shallow_copy_rtx (destmem);
19056 set_mem_size (destmem, count);
19058 else if (MEM_SIZE (destmem))
19059 set_mem_size (destmem, NULL_RTX);
19060 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19063 static void
19064 emit_strmov (rtx destmem, rtx srcmem,
19065 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19067 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19068 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19069 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19072 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19073 static void
19074 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19075 rtx destptr, rtx srcptr, rtx count, int max_size)
19077 rtx src, dest;
19078 if (CONST_INT_P (count))
19080 HOST_WIDE_INT countval = INTVAL (count);
19081 int offset = 0;
19083 if ((countval & 0x10) && max_size > 16)
19085 if (TARGET_64BIT)
19087 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19088 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19090 else
19091 gcc_unreachable ();
19092 offset += 16;
19094 if ((countval & 0x08) && max_size > 8)
19096 if (TARGET_64BIT)
19097 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19098 else
19100 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19101 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19103 offset += 8;
19105 if ((countval & 0x04) && max_size > 4)
19107 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19108 offset += 4;
19110 if ((countval & 0x02) && max_size > 2)
19112 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19113 offset += 2;
19115 if ((countval & 0x01) && max_size > 1)
19117 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19118 offset += 1;
19120 return;
19122 if (max_size > 8)
19124 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19125 count, 1, OPTAB_DIRECT);
19126 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19127 count, QImode, 1, 4);
19128 return;
19131 /* When there are stringops, we can cheaply increase dest and src pointers.
19132 Otherwise we save code size by maintaining offset (zero is readily
19133 available from preceding rep operation) and using x86 addressing modes.
19135 if (TARGET_SINGLE_STRINGOP)
19137 if (max_size > 4)
19139 rtx label = ix86_expand_aligntest (count, 4, true);
19140 src = change_address (srcmem, SImode, srcptr);
19141 dest = change_address (destmem, SImode, destptr);
19142 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19143 emit_label (label);
19144 LABEL_NUSES (label) = 1;
19146 if (max_size > 2)
19148 rtx label = ix86_expand_aligntest (count, 2, true);
19149 src = change_address (srcmem, HImode, srcptr);
19150 dest = change_address (destmem, HImode, destptr);
19151 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19152 emit_label (label);
19153 LABEL_NUSES (label) = 1;
19155 if (max_size > 1)
19157 rtx label = ix86_expand_aligntest (count, 1, true);
19158 src = change_address (srcmem, QImode, srcptr);
19159 dest = change_address (destmem, QImode, destptr);
19160 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19161 emit_label (label);
19162 LABEL_NUSES (label) = 1;
19165 else
19167 rtx offset = force_reg (Pmode, const0_rtx);
19168 rtx tmp;
19170 if (max_size > 4)
19172 rtx label = ix86_expand_aligntest (count, 4, true);
19173 src = change_address (srcmem, SImode, srcptr);
19174 dest = change_address (destmem, SImode, destptr);
19175 emit_move_insn (dest, src);
19176 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19177 true, OPTAB_LIB_WIDEN);
19178 if (tmp != offset)
19179 emit_move_insn (offset, tmp);
19180 emit_label (label);
19181 LABEL_NUSES (label) = 1;
19183 if (max_size > 2)
19185 rtx label = ix86_expand_aligntest (count, 2, true);
19186 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19187 src = change_address (srcmem, HImode, tmp);
19188 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19189 dest = change_address (destmem, HImode, tmp);
19190 emit_move_insn (dest, src);
19191 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19192 true, OPTAB_LIB_WIDEN);
19193 if (tmp != offset)
19194 emit_move_insn (offset, tmp);
19195 emit_label (label);
19196 LABEL_NUSES (label) = 1;
19198 if (max_size > 1)
19200 rtx label = ix86_expand_aligntest (count, 1, true);
19201 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19202 src = change_address (srcmem, QImode, tmp);
19203 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19204 dest = change_address (destmem, QImode, tmp);
19205 emit_move_insn (dest, src);
19206 emit_label (label);
19207 LABEL_NUSES (label) = 1;
19212 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19213 static void
19214 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19215 rtx count, int max_size)
19217 count =
19218 expand_simple_binop (counter_mode (count), AND, count,
19219 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19220 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19221 gen_lowpart (QImode, value), count, QImode,
19222 1, max_size / 2);
19225 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19226 static void
19227 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19229 rtx dest;
19231 if (CONST_INT_P (count))
19233 HOST_WIDE_INT countval = INTVAL (count);
19234 int offset = 0;
19236 if ((countval & 0x10) && max_size > 16)
19238 if (TARGET_64BIT)
19240 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19241 emit_insn (gen_strset (destptr, dest, value));
19242 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19243 emit_insn (gen_strset (destptr, dest, value));
19245 else
19246 gcc_unreachable ();
19247 offset += 16;
19249 if ((countval & 0x08) && max_size > 8)
19251 if (TARGET_64BIT)
19253 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19254 emit_insn (gen_strset (destptr, dest, value));
19256 else
19258 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19259 emit_insn (gen_strset (destptr, dest, value));
19260 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19261 emit_insn (gen_strset (destptr, dest, value));
19263 offset += 8;
19265 if ((countval & 0x04) && max_size > 4)
19267 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19268 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19269 offset += 4;
19271 if ((countval & 0x02) && max_size > 2)
19273 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19274 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19275 offset += 2;
19277 if ((countval & 0x01) && max_size > 1)
19279 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19280 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19281 offset += 1;
19283 return;
19285 if (max_size > 32)
19287 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19288 return;
19290 if (max_size > 16)
19292 rtx label = ix86_expand_aligntest (count, 16, true);
19293 if (TARGET_64BIT)
19295 dest = change_address (destmem, DImode, destptr);
19296 emit_insn (gen_strset (destptr, dest, value));
19297 emit_insn (gen_strset (destptr, dest, value));
19299 else
19301 dest = change_address (destmem, SImode, destptr);
19302 emit_insn (gen_strset (destptr, dest, value));
19303 emit_insn (gen_strset (destptr, dest, value));
19304 emit_insn (gen_strset (destptr, dest, value));
19305 emit_insn (gen_strset (destptr, dest, value));
19307 emit_label (label);
19308 LABEL_NUSES (label) = 1;
19310 if (max_size > 8)
19312 rtx label = ix86_expand_aligntest (count, 8, true);
19313 if (TARGET_64BIT)
19315 dest = change_address (destmem, DImode, destptr);
19316 emit_insn (gen_strset (destptr, dest, value));
19318 else
19320 dest = change_address (destmem, SImode, destptr);
19321 emit_insn (gen_strset (destptr, dest, value));
19322 emit_insn (gen_strset (destptr, dest, value));
19324 emit_label (label);
19325 LABEL_NUSES (label) = 1;
19327 if (max_size > 4)
19329 rtx label = ix86_expand_aligntest (count, 4, true);
19330 dest = change_address (destmem, SImode, destptr);
19331 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19332 emit_label (label);
19333 LABEL_NUSES (label) = 1;
19335 if (max_size > 2)
19337 rtx label = ix86_expand_aligntest (count, 2, true);
19338 dest = change_address (destmem, HImode, destptr);
19339 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19340 emit_label (label);
19341 LABEL_NUSES (label) = 1;
19343 if (max_size > 1)
19345 rtx label = ix86_expand_aligntest (count, 1, true);
19346 dest = change_address (destmem, QImode, destptr);
19347 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19348 emit_label (label);
19349 LABEL_NUSES (label) = 1;
19353 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
19354 DESIRED_ALIGNMENT. */
19355 static void
19356 expand_movmem_prologue (rtx destmem, rtx srcmem,
19357 rtx destptr, rtx srcptr, rtx count,
19358 int align, int desired_alignment)
19360 if (align <= 1 && desired_alignment > 1)
19362 rtx label = ix86_expand_aligntest (destptr, 1, false);
19363 srcmem = change_address (srcmem, QImode, srcptr);
19364 destmem = change_address (destmem, QImode, destptr);
19365 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19366 ix86_adjust_counter (count, 1);
19367 emit_label (label);
19368 LABEL_NUSES (label) = 1;
19370 if (align <= 2 && desired_alignment > 2)
19372 rtx label = ix86_expand_aligntest (destptr, 2, false);
19373 srcmem = change_address (srcmem, HImode, srcptr);
19374 destmem = change_address (destmem, HImode, destptr);
19375 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19376 ix86_adjust_counter (count, 2);
19377 emit_label (label);
19378 LABEL_NUSES (label) = 1;
19380 if (align <= 4 && desired_alignment > 4)
19382 rtx label = ix86_expand_aligntest (destptr, 4, false);
19383 srcmem = change_address (srcmem, SImode, srcptr);
19384 destmem = change_address (destmem, SImode, destptr);
19385 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19386 ix86_adjust_counter (count, 4);
19387 emit_label (label);
19388 LABEL_NUSES (label) = 1;
19390 gcc_assert (desired_alignment <= 8);
19393 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19394 ALIGN_BYTES is how many bytes need to be copied. */
19395 static rtx
19396 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19397 int desired_align, int align_bytes)
19399 rtx src = *srcp;
19400 rtx src_size, dst_size;
19401 int off = 0;
19402 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
19403 if (src_align_bytes >= 0)
19404 src_align_bytes = desired_align - src_align_bytes;
19405 src_size = MEM_SIZE (src);
19406 dst_size = MEM_SIZE (dst);
19407 if (align_bytes & 1)
19409 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19410 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
19411 off = 1;
19412 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19414 if (align_bytes & 2)
19416 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19417 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
19418 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19419 set_mem_align (dst, 2 * BITS_PER_UNIT);
19420 if (src_align_bytes >= 0
19421 && (src_align_bytes & 1) == (align_bytes & 1)
19422 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
19423 set_mem_align (src, 2 * BITS_PER_UNIT);
19424 off = 2;
19425 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19427 if (align_bytes & 4)
19429 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19430 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
19431 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19432 set_mem_align (dst, 4 * BITS_PER_UNIT);
19433 if (src_align_bytes >= 0)
19435 unsigned int src_align = 0;
19436 if ((src_align_bytes & 3) == (align_bytes & 3))
19437 src_align = 4;
19438 else if ((src_align_bytes & 1) == (align_bytes & 1))
19439 src_align = 2;
19440 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19441 set_mem_align (src, src_align * BITS_PER_UNIT);
19443 off = 4;
19444 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19446 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19447 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
19448 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19449 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19450 if (src_align_bytes >= 0)
19452 unsigned int src_align = 0;
19453 if ((src_align_bytes & 7) == (align_bytes & 7))
19454 src_align = 8;
19455 else if ((src_align_bytes & 3) == (align_bytes & 3))
19456 src_align = 4;
19457 else if ((src_align_bytes & 1) == (align_bytes & 1))
19458 src_align = 2;
19459 if (src_align > (unsigned int) desired_align)
19460 src_align = desired_align;
19461 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19462 set_mem_align (src, src_align * BITS_PER_UNIT);
19464 if (dst_size)
19465 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19466 if (src_size)
19467 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
19468 *srcp = src;
19469 return dst;
19472 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
19473 DESIRED_ALIGNMENT. */
19474 static void
19475 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
19476 int align, int desired_alignment)
19478 if (align <= 1 && desired_alignment > 1)
19480 rtx label = ix86_expand_aligntest (destptr, 1, false);
19481 destmem = change_address (destmem, QImode, destptr);
19482 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
19483 ix86_adjust_counter (count, 1);
19484 emit_label (label);
19485 LABEL_NUSES (label) = 1;
19487 if (align <= 2 && desired_alignment > 2)
19489 rtx label = ix86_expand_aligntest (destptr, 2, false);
19490 destmem = change_address (destmem, HImode, destptr);
19491 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
19492 ix86_adjust_counter (count, 2);
19493 emit_label (label);
19494 LABEL_NUSES (label) = 1;
19496 if (align <= 4 && desired_alignment > 4)
19498 rtx label = ix86_expand_aligntest (destptr, 4, false);
19499 destmem = change_address (destmem, SImode, destptr);
19500 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19501 ix86_adjust_counter (count, 4);
19502 emit_label (label);
19503 LABEL_NUSES (label) = 1;
19505 gcc_assert (desired_alignment <= 8);
19508 /* Set enough from DST to align DST known to by aligned by ALIGN to
19509 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19510 static rtx
19511 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19512 int desired_align, int align_bytes)
19514 int off = 0;
19515 rtx dst_size = MEM_SIZE (dst);
19516 if (align_bytes & 1)
19518 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19519 off = 1;
19520 emit_insn (gen_strset (destreg, dst,
19521 gen_lowpart (QImode, value)));
19523 if (align_bytes & 2)
19525 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19526 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19527 set_mem_align (dst, 2 * BITS_PER_UNIT);
19528 off = 2;
19529 emit_insn (gen_strset (destreg, dst,
19530 gen_lowpart (HImode, value)));
19532 if (align_bytes & 4)
19534 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19535 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19536 set_mem_align (dst, 4 * BITS_PER_UNIT);
19537 off = 4;
19538 emit_insn (gen_strset (destreg, dst,
19539 gen_lowpart (SImode, value)));
19541 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19542 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19543 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19544 if (dst_size)
19545 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19546 return dst;
19549 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19550 static enum stringop_alg
19551 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19552 int *dynamic_check)
19554 const struct stringop_algs * algs;
19555 bool optimize_for_speed;
19556 /* Algorithms using the rep prefix want at least edi and ecx;
19557 additionally, memset wants eax and memcpy wants esi. Don't
19558 consider such algorithms if the user has appropriated those
19559 registers for their own purposes. */
19560 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19561 || (memset
19562 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19564 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19565 || (alg != rep_prefix_1_byte \
19566 && alg != rep_prefix_4_byte \
19567 && alg != rep_prefix_8_byte))
19568 const struct processor_costs *cost;
19570 /* Even if the string operation call is cold, we still might spend a lot
19571 of time processing large blocks. */
19572 if (optimize_function_for_size_p (cfun)
19573 || (optimize_insn_for_size_p ()
19574 && expected_size != -1 && expected_size < 256))
19575 optimize_for_speed = false;
19576 else
19577 optimize_for_speed = true;
19579 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19581 *dynamic_check = -1;
19582 if (memset)
19583 algs = &cost->memset[TARGET_64BIT != 0];
19584 else
19585 algs = &cost->memcpy[TARGET_64BIT != 0];
19586 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19587 return stringop_alg;
19588 /* rep; movq or rep; movl is the smallest variant. */
19589 else if (!optimize_for_speed)
19591 if (!count || (count & 3))
19592 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19593 else
19594 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19596 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19598 else if (expected_size != -1 && expected_size < 4)
19599 return loop_1_byte;
19600 else if (expected_size != -1)
19602 unsigned int i;
19603 enum stringop_alg alg = libcall;
19604 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19606 /* We get here if the algorithms that were not libcall-based
19607 were rep-prefix based and we are unable to use rep prefixes
19608 based on global register usage. Break out of the loop and
19609 use the heuristic below. */
19610 if (algs->size[i].max == 0)
19611 break;
19612 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19614 enum stringop_alg candidate = algs->size[i].alg;
19616 if (candidate != libcall && ALG_USABLE_P (candidate))
19617 alg = candidate;
19618 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19619 last non-libcall inline algorithm. */
19620 if (TARGET_INLINE_ALL_STRINGOPS)
19622 /* When the current size is best to be copied by a libcall,
19623 but we are still forced to inline, run the heuristic below
19624 that will pick code for medium sized blocks. */
19625 if (alg != libcall)
19626 return alg;
19627 break;
19629 else if (ALG_USABLE_P (candidate))
19630 return candidate;
19633 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19635 /* When asked to inline the call anyway, try to pick meaningful choice.
19636 We look for maximal size of block that is faster to copy by hand and
19637 take blocks of at most of that size guessing that average size will
19638 be roughly half of the block.
19640 If this turns out to be bad, we might simply specify the preferred
19641 choice in ix86_costs. */
19642 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19643 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19645 int max = -1;
19646 enum stringop_alg alg;
19647 int i;
19648 bool any_alg_usable_p = true;
19650 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19652 enum stringop_alg candidate = algs->size[i].alg;
19653 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19655 if (candidate != libcall && candidate
19656 && ALG_USABLE_P (candidate))
19657 max = algs->size[i].max;
19659 /* If there aren't any usable algorithms, then recursing on
19660 smaller sizes isn't going to find anything. Just return the
19661 simple byte-at-a-time copy loop. */
19662 if (!any_alg_usable_p)
19664 /* Pick something reasonable. */
19665 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19666 *dynamic_check = 128;
19667 return loop_1_byte;
19669 if (max == -1)
19670 max = 4096;
19671 alg = decide_alg (count, max / 2, memset, dynamic_check);
19672 gcc_assert (*dynamic_check == -1);
19673 gcc_assert (alg != libcall);
19674 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19675 *dynamic_check = max;
19676 return alg;
19678 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19679 #undef ALG_USABLE_P
19682 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19683 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19684 static int
19685 decide_alignment (int align,
19686 enum stringop_alg alg,
19687 int expected_size)
19689 int desired_align = 0;
19690 switch (alg)
19692 case no_stringop:
19693 gcc_unreachable ();
19694 case loop:
19695 case unrolled_loop:
19696 desired_align = GET_MODE_SIZE (Pmode);
19697 break;
19698 case rep_prefix_8_byte:
19699 desired_align = 8;
19700 break;
19701 case rep_prefix_4_byte:
19702 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19703 copying whole cacheline at once. */
19704 if (TARGET_PENTIUMPRO)
19705 desired_align = 8;
19706 else
19707 desired_align = 4;
19708 break;
19709 case rep_prefix_1_byte:
19710 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19711 copying whole cacheline at once. */
19712 if (TARGET_PENTIUMPRO)
19713 desired_align = 8;
19714 else
19715 desired_align = 1;
19716 break;
19717 case loop_1_byte:
19718 desired_align = 1;
19719 break;
19720 case libcall:
19721 return 0;
19724 if (optimize_size)
19725 desired_align = 1;
19726 if (desired_align < align)
19727 desired_align = align;
19728 if (expected_size != -1 && expected_size < 4)
19729 desired_align = align;
19730 return desired_align;
19733 /* Return the smallest power of 2 greater than VAL. */
19734 static int
19735 smallest_pow2_greater_than (int val)
19737 int ret = 1;
19738 while (ret <= val)
19739 ret <<= 1;
19740 return ret;
19743 /* Expand string move (memcpy) operation. Use i386 string operations when
19744 profitable. expand_setmem contains similar code. The code depends upon
19745 architecture, block size and alignment, but always has the same
19746 overall structure:
19748 1) Prologue guard: Conditional that jumps up to epilogues for small
19749 blocks that can be handled by epilogue alone. This is faster but
19750 also needed for correctness, since prologue assume the block is larger
19751 than the desired alignment.
19753 Optional dynamic check for size and libcall for large
19754 blocks is emitted here too, with -minline-stringops-dynamically.
19756 2) Prologue: copy first few bytes in order to get destination aligned
19757 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19758 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19759 We emit either a jump tree on power of two sized blocks, or a byte loop.
19761 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19762 with specified algorithm.
19764 4) Epilogue: code copying tail of the block that is too small to be
19765 handled by main body (or up to size guarded by prologue guard). */
19767 bool
19768 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19769 rtx expected_align_exp, rtx expected_size_exp)
19771 rtx destreg;
19772 rtx srcreg;
19773 rtx label = NULL;
19774 rtx tmp;
19775 rtx jump_around_label = NULL;
19776 HOST_WIDE_INT align = 1;
19777 unsigned HOST_WIDE_INT count = 0;
19778 HOST_WIDE_INT expected_size = -1;
19779 int size_needed = 0, epilogue_size_needed;
19780 int desired_align = 0, align_bytes = 0;
19781 enum stringop_alg alg;
19782 int dynamic_check;
19783 bool need_zero_guard = false;
19785 if (CONST_INT_P (align_exp))
19786 align = INTVAL (align_exp);
19787 /* i386 can do misaligned access on reasonably increased cost. */
19788 if (CONST_INT_P (expected_align_exp)
19789 && INTVAL (expected_align_exp) > align)
19790 align = INTVAL (expected_align_exp);
19791 /* ALIGN is the minimum of destination and source alignment, but we care here
19792 just about destination alignment. */
19793 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19794 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19796 if (CONST_INT_P (count_exp))
19797 count = expected_size = INTVAL (count_exp);
19798 if (CONST_INT_P (expected_size_exp) && count == 0)
19799 expected_size = INTVAL (expected_size_exp);
19801 /* Make sure we don't need to care about overflow later on. */
19802 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19803 return false;
19805 /* Step 0: Decide on preferred algorithm, desired alignment and
19806 size of chunks to be copied by main loop. */
19808 alg = decide_alg (count, expected_size, false, &dynamic_check);
19809 desired_align = decide_alignment (align, alg, expected_size);
19811 if (!TARGET_ALIGN_STRINGOPS)
19812 align = desired_align;
19814 if (alg == libcall)
19815 return false;
19816 gcc_assert (alg != no_stringop);
19817 if (!count)
19818 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19819 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19820 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19821 switch (alg)
19823 case libcall:
19824 case no_stringop:
19825 gcc_unreachable ();
19826 case loop:
19827 need_zero_guard = true;
19828 size_needed = GET_MODE_SIZE (Pmode);
19829 break;
19830 case unrolled_loop:
19831 need_zero_guard = true;
19832 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19833 break;
19834 case rep_prefix_8_byte:
19835 size_needed = 8;
19836 break;
19837 case rep_prefix_4_byte:
19838 size_needed = 4;
19839 break;
19840 case rep_prefix_1_byte:
19841 size_needed = 1;
19842 break;
19843 case loop_1_byte:
19844 need_zero_guard = true;
19845 size_needed = 1;
19846 break;
19849 epilogue_size_needed = size_needed;
19851 /* Step 1: Prologue guard. */
19853 /* Alignment code needs count to be in register. */
19854 if (CONST_INT_P (count_exp) && desired_align > align)
19856 if (INTVAL (count_exp) > desired_align
19857 && INTVAL (count_exp) > size_needed)
19859 align_bytes
19860 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19861 if (align_bytes <= 0)
19862 align_bytes = 0;
19863 else
19864 align_bytes = desired_align - align_bytes;
19866 if (align_bytes == 0)
19867 count_exp = force_reg (counter_mode (count_exp), count_exp);
19869 gcc_assert (desired_align >= 1 && align >= 1);
19871 /* Ensure that alignment prologue won't copy past end of block. */
19872 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19874 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19875 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19876 Make sure it is power of 2. */
19877 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19879 if (count)
19881 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19883 /* If main algorithm works on QImode, no epilogue is needed.
19884 For small sizes just don't align anything. */
19885 if (size_needed == 1)
19886 desired_align = align;
19887 else
19888 goto epilogue;
19891 else
19893 label = gen_label_rtx ();
19894 emit_cmp_and_jump_insns (count_exp,
19895 GEN_INT (epilogue_size_needed),
19896 LTU, 0, counter_mode (count_exp), 1, label);
19897 if (expected_size == -1 || expected_size < epilogue_size_needed)
19898 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19899 else
19900 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19904 /* Emit code to decide on runtime whether library call or inline should be
19905 used. */
19906 if (dynamic_check != -1)
19908 if (CONST_INT_P (count_exp))
19910 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19912 emit_block_move_via_libcall (dst, src, count_exp, false);
19913 count_exp = const0_rtx;
19914 goto epilogue;
19917 else
19919 rtx hot_label = gen_label_rtx ();
19920 jump_around_label = gen_label_rtx ();
19921 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19922 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19923 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19924 emit_block_move_via_libcall (dst, src, count_exp, false);
19925 emit_jump (jump_around_label);
19926 emit_label (hot_label);
19930 /* Step 2: Alignment prologue. */
19932 if (desired_align > align)
19934 if (align_bytes == 0)
19936 /* Except for the first move in epilogue, we no longer know
19937 constant offset in aliasing info. It don't seems to worth
19938 the pain to maintain it for the first move, so throw away
19939 the info early. */
19940 src = change_address (src, BLKmode, srcreg);
19941 dst = change_address (dst, BLKmode, destreg);
19942 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19943 desired_align);
19945 else
19947 /* If we know how many bytes need to be stored before dst is
19948 sufficiently aligned, maintain aliasing info accurately. */
19949 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19950 desired_align, align_bytes);
19951 count_exp = plus_constant (count_exp, -align_bytes);
19952 count -= align_bytes;
19954 if (need_zero_guard
19955 && (count < (unsigned HOST_WIDE_INT) size_needed
19956 || (align_bytes == 0
19957 && count < ((unsigned HOST_WIDE_INT) size_needed
19958 + desired_align - align))))
19960 /* It is possible that we copied enough so the main loop will not
19961 execute. */
19962 gcc_assert (size_needed > 1);
19963 if (label == NULL_RTX)
19964 label = gen_label_rtx ();
19965 emit_cmp_and_jump_insns (count_exp,
19966 GEN_INT (size_needed),
19967 LTU, 0, counter_mode (count_exp), 1, label);
19968 if (expected_size == -1
19969 || expected_size < (desired_align - align) / 2 + size_needed)
19970 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19971 else
19972 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19975 if (label && size_needed == 1)
19977 emit_label (label);
19978 LABEL_NUSES (label) = 1;
19979 label = NULL;
19980 epilogue_size_needed = 1;
19982 else if (label == NULL_RTX)
19983 epilogue_size_needed = size_needed;
19985 /* Step 3: Main loop. */
19987 switch (alg)
19989 case libcall:
19990 case no_stringop:
19991 gcc_unreachable ();
19992 case loop_1_byte:
19993 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19994 count_exp, QImode, 1, expected_size);
19995 break;
19996 case loop:
19997 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19998 count_exp, Pmode, 1, expected_size);
19999 break;
20000 case unrolled_loop:
20001 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20002 registers for 4 temporaries anyway. */
20003 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20004 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20005 expected_size);
20006 break;
20007 case rep_prefix_8_byte:
20008 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20009 DImode);
20010 break;
20011 case rep_prefix_4_byte:
20012 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20013 SImode);
20014 break;
20015 case rep_prefix_1_byte:
20016 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20017 QImode);
20018 break;
20020 /* Adjust properly the offset of src and dest memory for aliasing. */
20021 if (CONST_INT_P (count_exp))
20023 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20024 (count / size_needed) * size_needed);
20025 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20026 (count / size_needed) * size_needed);
20028 else
20030 src = change_address (src, BLKmode, srcreg);
20031 dst = change_address (dst, BLKmode, destreg);
20034 /* Step 4: Epilogue to copy the remaining bytes. */
20035 epilogue:
20036 if (label)
20038 /* When the main loop is done, COUNT_EXP might hold original count,
20039 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20040 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20041 bytes. Compensate if needed. */
20043 if (size_needed < epilogue_size_needed)
20045 tmp =
20046 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20047 GEN_INT (size_needed - 1), count_exp, 1,
20048 OPTAB_DIRECT);
20049 if (tmp != count_exp)
20050 emit_move_insn (count_exp, tmp);
20052 emit_label (label);
20053 LABEL_NUSES (label) = 1;
20056 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20057 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20058 epilogue_size_needed);
20059 if (jump_around_label)
20060 emit_label (jump_around_label);
20061 return true;
20064 /* Helper function for memcpy. For QImode value 0xXY produce
20065 0xXYXYXYXY of wide specified by MODE. This is essentially
20066 a * 0x10101010, but we can do slightly better than
20067 synth_mult by unwinding the sequence by hand on CPUs with
20068 slow multiply. */
20069 static rtx
20070 promote_duplicated_reg (enum machine_mode mode, rtx val)
20072 enum machine_mode valmode = GET_MODE (val);
20073 rtx tmp;
20074 int nops = mode == DImode ? 3 : 2;
20076 gcc_assert (mode == SImode || mode == DImode);
20077 if (val == const0_rtx)
20078 return copy_to_mode_reg (mode, const0_rtx);
20079 if (CONST_INT_P (val))
20081 HOST_WIDE_INT v = INTVAL (val) & 255;
20083 v |= v << 8;
20084 v |= v << 16;
20085 if (mode == DImode)
20086 v |= (v << 16) << 16;
20087 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20090 if (valmode == VOIDmode)
20091 valmode = QImode;
20092 if (valmode != QImode)
20093 val = gen_lowpart (QImode, val);
20094 if (mode == QImode)
20095 return val;
20096 if (!TARGET_PARTIAL_REG_STALL)
20097 nops--;
20098 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20099 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20100 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20101 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20103 rtx reg = convert_modes (mode, QImode, val, true);
20104 tmp = promote_duplicated_reg (mode, const1_rtx);
20105 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20106 OPTAB_DIRECT);
20108 else
20110 rtx reg = convert_modes (mode, QImode, val, true);
20112 if (!TARGET_PARTIAL_REG_STALL)
20113 if (mode == SImode)
20114 emit_insn (gen_movsi_insv_1 (reg, reg));
20115 else
20116 emit_insn (gen_movdi_insv_1 (reg, reg));
20117 else
20119 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20120 NULL, 1, OPTAB_DIRECT);
20121 reg =
20122 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20124 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20125 NULL, 1, OPTAB_DIRECT);
20126 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20127 if (mode == SImode)
20128 return reg;
20129 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20130 NULL, 1, OPTAB_DIRECT);
20131 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20132 return reg;
20136 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20137 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20138 alignment from ALIGN to DESIRED_ALIGN. */
20139 static rtx
20140 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20142 rtx promoted_val;
20144 if (TARGET_64BIT
20145 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20146 promoted_val = promote_duplicated_reg (DImode, val);
20147 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20148 promoted_val = promote_duplicated_reg (SImode, val);
20149 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20150 promoted_val = promote_duplicated_reg (HImode, val);
20151 else
20152 promoted_val = val;
20154 return promoted_val;
20157 /* Expand string clear operation (bzero). Use i386 string operations when
20158 profitable. See expand_movmem comment for explanation of individual
20159 steps performed. */
20160 bool
20161 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20162 rtx expected_align_exp, rtx expected_size_exp)
20164 rtx destreg;
20165 rtx label = NULL;
20166 rtx tmp;
20167 rtx jump_around_label = NULL;
20168 HOST_WIDE_INT align = 1;
20169 unsigned HOST_WIDE_INT count = 0;
20170 HOST_WIDE_INT expected_size = -1;
20171 int size_needed = 0, epilogue_size_needed;
20172 int desired_align = 0, align_bytes = 0;
20173 enum stringop_alg alg;
20174 rtx promoted_val = NULL;
20175 bool force_loopy_epilogue = false;
20176 int dynamic_check;
20177 bool need_zero_guard = false;
20179 if (CONST_INT_P (align_exp))
20180 align = INTVAL (align_exp);
20181 /* i386 can do misaligned access on reasonably increased cost. */
20182 if (CONST_INT_P (expected_align_exp)
20183 && INTVAL (expected_align_exp) > align)
20184 align = INTVAL (expected_align_exp);
20185 if (CONST_INT_P (count_exp))
20186 count = expected_size = INTVAL (count_exp);
20187 if (CONST_INT_P (expected_size_exp) && count == 0)
20188 expected_size = INTVAL (expected_size_exp);
20190 /* Make sure we don't need to care about overflow later on. */
20191 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20192 return false;
20194 /* Step 0: Decide on preferred algorithm, desired alignment and
20195 size of chunks to be copied by main loop. */
20197 alg = decide_alg (count, expected_size, true, &dynamic_check);
20198 desired_align = decide_alignment (align, alg, expected_size);
20200 if (!TARGET_ALIGN_STRINGOPS)
20201 align = desired_align;
20203 if (alg == libcall)
20204 return false;
20205 gcc_assert (alg != no_stringop);
20206 if (!count)
20207 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20208 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20209 switch (alg)
20211 case libcall:
20212 case no_stringop:
20213 gcc_unreachable ();
20214 case loop:
20215 need_zero_guard = true;
20216 size_needed = GET_MODE_SIZE (Pmode);
20217 break;
20218 case unrolled_loop:
20219 need_zero_guard = true;
20220 size_needed = GET_MODE_SIZE (Pmode) * 4;
20221 break;
20222 case rep_prefix_8_byte:
20223 size_needed = 8;
20224 break;
20225 case rep_prefix_4_byte:
20226 size_needed = 4;
20227 break;
20228 case rep_prefix_1_byte:
20229 size_needed = 1;
20230 break;
20231 case loop_1_byte:
20232 need_zero_guard = true;
20233 size_needed = 1;
20234 break;
20236 epilogue_size_needed = size_needed;
20238 /* Step 1: Prologue guard. */
20240 /* Alignment code needs count to be in register. */
20241 if (CONST_INT_P (count_exp) && desired_align > align)
20243 if (INTVAL (count_exp) > desired_align
20244 && INTVAL (count_exp) > size_needed)
20246 align_bytes
20247 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20248 if (align_bytes <= 0)
20249 align_bytes = 0;
20250 else
20251 align_bytes = desired_align - align_bytes;
20253 if (align_bytes == 0)
20255 enum machine_mode mode = SImode;
20256 if (TARGET_64BIT && (count & ~0xffffffff))
20257 mode = DImode;
20258 count_exp = force_reg (mode, count_exp);
20261 /* Do the cheap promotion to allow better CSE across the
20262 main loop and epilogue (ie one load of the big constant in the
20263 front of all code. */
20264 if (CONST_INT_P (val_exp))
20265 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20266 desired_align, align);
20267 /* Ensure that alignment prologue won't copy past end of block. */
20268 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20270 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20271 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20272 Make sure it is power of 2. */
20273 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20275 /* To improve performance of small blocks, we jump around the VAL
20276 promoting mode. This mean that if the promoted VAL is not constant,
20277 we might not use it in the epilogue and have to use byte
20278 loop variant. */
20279 if (epilogue_size_needed > 2 && !promoted_val)
20280 force_loopy_epilogue = true;
20281 if (count)
20283 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20285 /* If main algorithm works on QImode, no epilogue is needed.
20286 For small sizes just don't align anything. */
20287 if (size_needed == 1)
20288 desired_align = align;
20289 else
20290 goto epilogue;
20293 else
20295 label = gen_label_rtx ();
20296 emit_cmp_and_jump_insns (count_exp,
20297 GEN_INT (epilogue_size_needed),
20298 LTU, 0, counter_mode (count_exp), 1, label);
20299 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20300 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20301 else
20302 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20305 if (dynamic_check != -1)
20307 rtx hot_label = gen_label_rtx ();
20308 jump_around_label = gen_label_rtx ();
20309 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20310 LEU, 0, counter_mode (count_exp), 1, hot_label);
20311 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20312 set_storage_via_libcall (dst, count_exp, val_exp, false);
20313 emit_jump (jump_around_label);
20314 emit_label (hot_label);
20317 /* Step 2: Alignment prologue. */
20319 /* Do the expensive promotion once we branched off the small blocks. */
20320 if (!promoted_val)
20321 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20322 desired_align, align);
20323 gcc_assert (desired_align >= 1 && align >= 1);
20325 if (desired_align > align)
20327 if (align_bytes == 0)
20329 /* Except for the first move in epilogue, we no longer know
20330 constant offset in aliasing info. It don't seems to worth
20331 the pain to maintain it for the first move, so throw away
20332 the info early. */
20333 dst = change_address (dst, BLKmode, destreg);
20334 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20335 desired_align);
20337 else
20339 /* If we know how many bytes need to be stored before dst is
20340 sufficiently aligned, maintain aliasing info accurately. */
20341 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
20342 desired_align, align_bytes);
20343 count_exp = plus_constant (count_exp, -align_bytes);
20344 count -= align_bytes;
20346 if (need_zero_guard
20347 && (count < (unsigned HOST_WIDE_INT) size_needed
20348 || (align_bytes == 0
20349 && count < ((unsigned HOST_WIDE_INT) size_needed
20350 + desired_align - align))))
20352 /* It is possible that we copied enough so the main loop will not
20353 execute. */
20354 gcc_assert (size_needed > 1);
20355 if (label == NULL_RTX)
20356 label = gen_label_rtx ();
20357 emit_cmp_and_jump_insns (count_exp,
20358 GEN_INT (size_needed),
20359 LTU, 0, counter_mode (count_exp), 1, label);
20360 if (expected_size == -1
20361 || expected_size < (desired_align - align) / 2 + size_needed)
20362 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20363 else
20364 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20367 if (label && size_needed == 1)
20369 emit_label (label);
20370 LABEL_NUSES (label) = 1;
20371 label = NULL;
20372 promoted_val = val_exp;
20373 epilogue_size_needed = 1;
20375 else if (label == NULL_RTX)
20376 epilogue_size_needed = size_needed;
20378 /* Step 3: Main loop. */
20380 switch (alg)
20382 case libcall:
20383 case no_stringop:
20384 gcc_unreachable ();
20385 case loop_1_byte:
20386 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20387 count_exp, QImode, 1, expected_size);
20388 break;
20389 case loop:
20390 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20391 count_exp, Pmode, 1, expected_size);
20392 break;
20393 case unrolled_loop:
20394 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20395 count_exp, Pmode, 4, expected_size);
20396 break;
20397 case rep_prefix_8_byte:
20398 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20399 DImode, val_exp);
20400 break;
20401 case rep_prefix_4_byte:
20402 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20403 SImode, val_exp);
20404 break;
20405 case rep_prefix_1_byte:
20406 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20407 QImode, val_exp);
20408 break;
20410 /* Adjust properly the offset of src and dest memory for aliasing. */
20411 if (CONST_INT_P (count_exp))
20412 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20413 (count / size_needed) * size_needed);
20414 else
20415 dst = change_address (dst, BLKmode, destreg);
20417 /* Step 4: Epilogue to copy the remaining bytes. */
20419 if (label)
20421 /* When the main loop is done, COUNT_EXP might hold original count,
20422 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20423 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20424 bytes. Compensate if needed. */
20426 if (size_needed < epilogue_size_needed)
20428 tmp =
20429 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20430 GEN_INT (size_needed - 1), count_exp, 1,
20431 OPTAB_DIRECT);
20432 if (tmp != count_exp)
20433 emit_move_insn (count_exp, tmp);
20435 emit_label (label);
20436 LABEL_NUSES (label) = 1;
20438 epilogue:
20439 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20441 if (force_loopy_epilogue)
20442 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
20443 epilogue_size_needed);
20444 else
20445 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
20446 epilogue_size_needed);
20448 if (jump_around_label)
20449 emit_label (jump_around_label);
20450 return true;
20453 /* Expand the appropriate insns for doing strlen if not just doing
20454 repnz; scasb
20456 out = result, initialized with the start address
20457 align_rtx = alignment of the address.
20458 scratch = scratch register, initialized with the startaddress when
20459 not aligned, otherwise undefined
20461 This is just the body. It needs the initializations mentioned above and
20462 some address computing at the end. These things are done in i386.md. */
20464 static void
20465 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
20467 int align;
20468 rtx tmp;
20469 rtx align_2_label = NULL_RTX;
20470 rtx align_3_label = NULL_RTX;
20471 rtx align_4_label = gen_label_rtx ();
20472 rtx end_0_label = gen_label_rtx ();
20473 rtx mem;
20474 rtx tmpreg = gen_reg_rtx (SImode);
20475 rtx scratch = gen_reg_rtx (SImode);
20476 rtx cmp;
20478 align = 0;
20479 if (CONST_INT_P (align_rtx))
20480 align = INTVAL (align_rtx);
20482 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
20484 /* Is there a known alignment and is it less than 4? */
20485 if (align < 4)
20487 rtx scratch1 = gen_reg_rtx (Pmode);
20488 emit_move_insn (scratch1, out);
20489 /* Is there a known alignment and is it not 2? */
20490 if (align != 2)
20492 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
20493 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
20495 /* Leave just the 3 lower bits. */
20496 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20497 NULL_RTX, 0, OPTAB_WIDEN);
20499 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20500 Pmode, 1, align_4_label);
20501 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20502 Pmode, 1, align_2_label);
20503 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20504 Pmode, 1, align_3_label);
20506 else
20508 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20509 check if is aligned to 4 - byte. */
20511 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20512 NULL_RTX, 0, OPTAB_WIDEN);
20514 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20515 Pmode, 1, align_4_label);
20518 mem = change_address (src, QImode, out);
20520 /* Now compare the bytes. */
20522 /* Compare the first n unaligned byte on a byte per byte basis. */
20523 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20524 QImode, 1, end_0_label);
20526 /* Increment the address. */
20527 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20529 /* Not needed with an alignment of 2 */
20530 if (align != 2)
20532 emit_label (align_2_label);
20534 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20535 end_0_label);
20537 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20539 emit_label (align_3_label);
20542 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20543 end_0_label);
20545 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20548 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20549 align this loop. It gives only huge programs, but does not help to
20550 speed up. */
20551 emit_label (align_4_label);
20553 mem = change_address (src, SImode, out);
20554 emit_move_insn (scratch, mem);
20555 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20557 /* This formula yields a nonzero result iff one of the bytes is zero.
20558 This saves three branches inside loop and many cycles. */
20560 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20561 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20562 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20563 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20564 gen_int_mode (0x80808080, SImode)));
20565 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20566 align_4_label);
20568 if (TARGET_CMOVE)
20570 rtx reg = gen_reg_rtx (SImode);
20571 rtx reg2 = gen_reg_rtx (Pmode);
20572 emit_move_insn (reg, tmpreg);
20573 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20575 /* If zero is not in the first two bytes, move two bytes forward. */
20576 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20577 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20578 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20579 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20580 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20581 reg,
20582 tmpreg)));
20583 /* Emit lea manually to avoid clobbering of flags. */
20584 emit_insn (gen_rtx_SET (SImode, reg2,
20585 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20587 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20588 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20589 emit_insn (gen_rtx_SET (VOIDmode, out,
20590 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20591 reg2,
20592 out)));
20594 else
20596 rtx end_2_label = gen_label_rtx ();
20597 /* Is zero in the first two bytes? */
20599 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20600 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20601 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20602 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20603 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20604 pc_rtx);
20605 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20606 JUMP_LABEL (tmp) = end_2_label;
20608 /* Not in the first two. Move two bytes forward. */
20609 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20610 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20612 emit_label (end_2_label);
20616 /* Avoid branch in fixing the byte. */
20617 tmpreg = gen_lowpart (QImode, tmpreg);
20618 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20619 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20620 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20621 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20623 emit_label (end_0_label);
20626 /* Expand strlen. */
20628 bool
20629 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20631 rtx addr, scratch1, scratch2, scratch3, scratch4;
20633 /* The generic case of strlen expander is long. Avoid it's
20634 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20636 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20637 && !TARGET_INLINE_ALL_STRINGOPS
20638 && !optimize_insn_for_size_p ()
20639 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20640 return false;
20642 addr = force_reg (Pmode, XEXP (src, 0));
20643 scratch1 = gen_reg_rtx (Pmode);
20645 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20646 && !optimize_insn_for_size_p ())
20648 /* Well it seems that some optimizer does not combine a call like
20649 foo(strlen(bar), strlen(bar));
20650 when the move and the subtraction is done here. It does calculate
20651 the length just once when these instructions are done inside of
20652 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20653 often used and I use one fewer register for the lifetime of
20654 output_strlen_unroll() this is better. */
20656 emit_move_insn (out, addr);
20658 ix86_expand_strlensi_unroll_1 (out, src, align);
20660 /* strlensi_unroll_1 returns the address of the zero at the end of
20661 the string, like memchr(), so compute the length by subtracting
20662 the start address. */
20663 emit_insn (ix86_gen_sub3 (out, out, addr));
20665 else
20667 rtx unspec;
20669 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20670 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20671 return false;
20673 scratch2 = gen_reg_rtx (Pmode);
20674 scratch3 = gen_reg_rtx (Pmode);
20675 scratch4 = force_reg (Pmode, constm1_rtx);
20677 emit_move_insn (scratch3, addr);
20678 eoschar = force_reg (QImode, eoschar);
20680 src = replace_equiv_address_nv (src, scratch3);
20682 /* If .md starts supporting :P, this can be done in .md. */
20683 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20684 scratch4), UNSPEC_SCAS);
20685 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20686 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20687 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20689 return true;
20692 /* For given symbol (function) construct code to compute address of it's PLT
20693 entry in large x86-64 PIC model. */
20695 construct_plt_address (rtx symbol)
20697 rtx tmp = gen_reg_rtx (Pmode);
20698 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20700 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20701 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20703 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20704 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20705 return tmp;
20709 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20710 rtx callarg2,
20711 rtx pop, int sibcall)
20713 rtx use = NULL, call;
20715 if (pop == const0_rtx)
20716 pop = NULL;
20717 gcc_assert (!TARGET_64BIT || !pop);
20719 if (TARGET_MACHO && !TARGET_64BIT)
20721 #if TARGET_MACHO
20722 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20723 fnaddr = machopic_indirect_call_target (fnaddr);
20724 #endif
20726 else
20728 /* Static functions and indirect calls don't need the pic register. */
20729 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20730 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20731 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20732 use_reg (&use, pic_offset_table_rtx);
20735 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20737 rtx al = gen_rtx_REG (QImode, AX_REG);
20738 emit_move_insn (al, callarg2);
20739 use_reg (&use, al);
20742 if (ix86_cmodel == CM_LARGE_PIC
20743 && MEM_P (fnaddr)
20744 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20745 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20746 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20747 else if (sibcall
20748 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20749 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20751 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20752 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20755 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20756 if (retval)
20757 call = gen_rtx_SET (VOIDmode, retval, call);
20758 if (pop)
20760 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20761 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20762 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20764 if (TARGET_64BIT
20765 && ix86_cfun_abi () == MS_ABI
20766 && (!callarg2 || INTVAL (callarg2) != -2))
20768 /* We need to represent that SI and DI registers are clobbered
20769 by SYSV calls. */
20770 static int clobbered_registers[] = {
20771 XMM6_REG, XMM7_REG, XMM8_REG,
20772 XMM9_REG, XMM10_REG, XMM11_REG,
20773 XMM12_REG, XMM13_REG, XMM14_REG,
20774 XMM15_REG, SI_REG, DI_REG
20776 unsigned int i;
20777 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20778 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20779 UNSPEC_MS_TO_SYSV_CALL);
20781 vec[0] = call;
20782 vec[1] = unspec;
20783 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20784 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20785 ? TImode : DImode,
20786 gen_rtx_REG
20787 (SSE_REGNO_P (clobbered_registers[i])
20788 ? TImode : DImode,
20789 clobbered_registers[i]));
20791 call = gen_rtx_PARALLEL (VOIDmode,
20792 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20793 + 2, vec));
20796 call = emit_call_insn (call);
20797 if (use)
20798 CALL_INSN_FUNCTION_USAGE (call) = use;
20800 return call;
20804 /* Clear stack slot assignments remembered from previous functions.
20805 This is called from INIT_EXPANDERS once before RTL is emitted for each
20806 function. */
20808 static struct machine_function *
20809 ix86_init_machine_status (void)
20811 struct machine_function *f;
20813 f = ggc_alloc_cleared_machine_function ();
20814 f->use_fast_prologue_epilogue_nregs = -1;
20815 f->tls_descriptor_call_expanded_p = 0;
20816 f->call_abi = ix86_abi;
20818 return f;
20821 /* Return a MEM corresponding to a stack slot with mode MODE.
20822 Allocate a new slot if necessary.
20824 The RTL for a function can have several slots available: N is
20825 which slot to use. */
20828 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20830 struct stack_local_entry *s;
20832 gcc_assert (n < MAX_386_STACK_LOCALS);
20834 /* Virtual slot is valid only before vregs are instantiated. */
20835 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20837 for (s = ix86_stack_locals; s; s = s->next)
20838 if (s->mode == mode && s->n == n)
20839 return copy_rtx (s->rtl);
20841 s = ggc_alloc_stack_local_entry ();
20842 s->n = n;
20843 s->mode = mode;
20844 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20846 s->next = ix86_stack_locals;
20847 ix86_stack_locals = s;
20848 return s->rtl;
20851 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20853 static GTY(()) rtx ix86_tls_symbol;
20855 ix86_tls_get_addr (void)
20858 if (!ix86_tls_symbol)
20860 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20861 (TARGET_ANY_GNU_TLS
20862 && !TARGET_64BIT)
20863 ? "___tls_get_addr"
20864 : "__tls_get_addr");
20867 return ix86_tls_symbol;
20870 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20872 static GTY(()) rtx ix86_tls_module_base_symbol;
20874 ix86_tls_module_base (void)
20877 if (!ix86_tls_module_base_symbol)
20879 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20880 "_TLS_MODULE_BASE_");
20881 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20882 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20885 return ix86_tls_module_base_symbol;
20888 /* Calculate the length of the memory address in the instruction
20889 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20892 memory_address_length (rtx addr)
20894 struct ix86_address parts;
20895 rtx base, index, disp;
20896 int len;
20897 int ok;
20899 if (GET_CODE (addr) == PRE_DEC
20900 || GET_CODE (addr) == POST_INC
20901 || GET_CODE (addr) == PRE_MODIFY
20902 || GET_CODE (addr) == POST_MODIFY)
20903 return 0;
20905 ok = ix86_decompose_address (addr, &parts);
20906 gcc_assert (ok);
20908 if (parts.base && GET_CODE (parts.base) == SUBREG)
20909 parts.base = SUBREG_REG (parts.base);
20910 if (parts.index && GET_CODE (parts.index) == SUBREG)
20911 parts.index = SUBREG_REG (parts.index);
20913 base = parts.base;
20914 index = parts.index;
20915 disp = parts.disp;
20916 len = 0;
20918 /* Rule of thumb:
20919 - esp as the base always wants an index,
20920 - ebp as the base always wants a displacement,
20921 - r12 as the base always wants an index,
20922 - r13 as the base always wants a displacement. */
20924 /* Register Indirect. */
20925 if (base && !index && !disp)
20927 /* esp (for its index) and ebp (for its displacement) need
20928 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20929 code. */
20930 if (REG_P (addr)
20931 && (addr == arg_pointer_rtx
20932 || addr == frame_pointer_rtx
20933 || REGNO (addr) == SP_REG
20934 || REGNO (addr) == BP_REG
20935 || REGNO (addr) == R12_REG
20936 || REGNO (addr) == R13_REG))
20937 len = 1;
20940 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20941 is not disp32, but disp32(%rip), so for disp32
20942 SIB byte is needed, unless print_operand_address
20943 optimizes it into disp32(%rip) or (%rip) is implied
20944 by UNSPEC. */
20945 else if (disp && !base && !index)
20947 len = 4;
20948 if (TARGET_64BIT)
20950 rtx symbol = disp;
20952 if (GET_CODE (disp) == CONST)
20953 symbol = XEXP (disp, 0);
20954 if (GET_CODE (symbol) == PLUS
20955 && CONST_INT_P (XEXP (symbol, 1)))
20956 symbol = XEXP (symbol, 0);
20958 if (GET_CODE (symbol) != LABEL_REF
20959 && (GET_CODE (symbol) != SYMBOL_REF
20960 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20961 && (GET_CODE (symbol) != UNSPEC
20962 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20963 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20964 len += 1;
20968 else
20970 /* Find the length of the displacement constant. */
20971 if (disp)
20973 if (base && satisfies_constraint_K (disp))
20974 len = 1;
20975 else
20976 len = 4;
20978 /* ebp always wants a displacement. Similarly r13. */
20979 else if (base && REG_P (base)
20980 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20981 len = 1;
20983 /* An index requires the two-byte modrm form.... */
20984 if (index
20985 /* ...like esp (or r12), which always wants an index. */
20986 || base == arg_pointer_rtx
20987 || base == frame_pointer_rtx
20988 || (base && REG_P (base)
20989 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20990 len += 1;
20993 switch (parts.seg)
20995 case SEG_FS:
20996 case SEG_GS:
20997 len += 1;
20998 break;
20999 default:
21000 break;
21003 return len;
21006 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21007 is set, expect that insn have 8bit immediate alternative. */
21009 ix86_attr_length_immediate_default (rtx insn, int shortform)
21011 int len = 0;
21012 int i;
21013 extract_insn_cached (insn);
21014 for (i = recog_data.n_operands - 1; i >= 0; --i)
21015 if (CONSTANT_P (recog_data.operand[i]))
21017 enum attr_mode mode = get_attr_mode (insn);
21019 gcc_assert (!len);
21020 if (shortform && CONST_INT_P (recog_data.operand[i]))
21022 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21023 switch (mode)
21025 case MODE_QI:
21026 len = 1;
21027 continue;
21028 case MODE_HI:
21029 ival = trunc_int_for_mode (ival, HImode);
21030 break;
21031 case MODE_SI:
21032 ival = trunc_int_for_mode (ival, SImode);
21033 break;
21034 default:
21035 break;
21037 if (IN_RANGE (ival, -128, 127))
21039 len = 1;
21040 continue;
21043 switch (mode)
21045 case MODE_QI:
21046 len = 1;
21047 break;
21048 case MODE_HI:
21049 len = 2;
21050 break;
21051 case MODE_SI:
21052 len = 4;
21053 break;
21054 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21055 case MODE_DI:
21056 len = 4;
21057 break;
21058 default:
21059 fatal_insn ("unknown insn mode", insn);
21062 return len;
21064 /* Compute default value for "length_address" attribute. */
21066 ix86_attr_length_address_default (rtx insn)
21068 int i;
21070 if (get_attr_type (insn) == TYPE_LEA)
21072 rtx set = PATTERN (insn), addr;
21074 if (GET_CODE (set) == PARALLEL)
21075 set = XVECEXP (set, 0, 0);
21077 gcc_assert (GET_CODE (set) == SET);
21079 addr = SET_SRC (set);
21080 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21082 if (GET_CODE (addr) == ZERO_EXTEND)
21083 addr = XEXP (addr, 0);
21084 if (GET_CODE (addr) == SUBREG)
21085 addr = SUBREG_REG (addr);
21088 return memory_address_length (addr);
21091 extract_insn_cached (insn);
21092 for (i = recog_data.n_operands - 1; i >= 0; --i)
21093 if (MEM_P (recog_data.operand[i]))
21095 constrain_operands_cached (reload_completed);
21096 if (which_alternative != -1)
21098 const char *constraints = recog_data.constraints[i];
21099 int alt = which_alternative;
21101 while (*constraints == '=' || *constraints == '+')
21102 constraints++;
21103 while (alt-- > 0)
21104 while (*constraints++ != ',')
21106 /* Skip ignored operands. */
21107 if (*constraints == 'X')
21108 continue;
21110 return memory_address_length (XEXP (recog_data.operand[i], 0));
21112 return 0;
21115 /* Compute default value for "length_vex" attribute. It includes
21116 2 or 3 byte VEX prefix and 1 opcode byte. */
21119 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21120 int has_vex_w)
21122 int i;
21124 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21125 byte VEX prefix. */
21126 if (!has_0f_opcode || has_vex_w)
21127 return 3 + 1;
21129 /* We can always use 2 byte VEX prefix in 32bit. */
21130 if (!TARGET_64BIT)
21131 return 2 + 1;
21133 extract_insn_cached (insn);
21135 for (i = recog_data.n_operands - 1; i >= 0; --i)
21136 if (REG_P (recog_data.operand[i]))
21138 /* REX.W bit uses 3 byte VEX prefix. */
21139 if (GET_MODE (recog_data.operand[i]) == DImode
21140 && GENERAL_REG_P (recog_data.operand[i]))
21141 return 3 + 1;
21143 else
21145 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21146 if (MEM_P (recog_data.operand[i])
21147 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21148 return 3 + 1;
21151 return 2 + 1;
21154 /* Return the maximum number of instructions a cpu can issue. */
21156 static int
21157 ix86_issue_rate (void)
21159 switch (ix86_tune)
21161 case PROCESSOR_PENTIUM:
21162 case PROCESSOR_ATOM:
21163 case PROCESSOR_K6:
21164 return 2;
21166 case PROCESSOR_PENTIUMPRO:
21167 case PROCESSOR_PENTIUM4:
21168 case PROCESSOR_ATHLON:
21169 case PROCESSOR_K8:
21170 case PROCESSOR_AMDFAM10:
21171 case PROCESSOR_NOCONA:
21172 case PROCESSOR_GENERIC32:
21173 case PROCESSOR_GENERIC64:
21174 case PROCESSOR_BDVER1:
21175 return 3;
21177 case PROCESSOR_CORE2:
21178 return 4;
21180 default:
21181 return 1;
21185 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
21186 by DEP_INSN and nothing set by DEP_INSN. */
21188 static int
21189 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21191 rtx set, set2;
21193 /* Simplify the test for uninteresting insns. */
21194 if (insn_type != TYPE_SETCC
21195 && insn_type != TYPE_ICMOV
21196 && insn_type != TYPE_FCMOV
21197 && insn_type != TYPE_IBR)
21198 return 0;
21200 if ((set = single_set (dep_insn)) != 0)
21202 set = SET_DEST (set);
21203 set2 = NULL_RTX;
21205 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21206 && XVECLEN (PATTERN (dep_insn), 0) == 2
21207 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21208 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21210 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21211 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21213 else
21214 return 0;
21216 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21217 return 0;
21219 /* This test is true if the dependent insn reads the flags but
21220 not any other potentially set register. */
21221 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21222 return 0;
21224 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21225 return 0;
21227 return 1;
21230 /* Return true iff USE_INSN has a memory address with operands set by
21231 SET_INSN. */
21233 bool
21234 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21236 int i;
21237 extract_insn_cached (use_insn);
21238 for (i = recog_data.n_operands - 1; i >= 0; --i)
21239 if (MEM_P (recog_data.operand[i]))
21241 rtx addr = XEXP (recog_data.operand[i], 0);
21242 return modified_in_p (addr, set_insn) != 0;
21244 return false;
21247 static int
21248 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21250 enum attr_type insn_type, dep_insn_type;
21251 enum attr_memory memory;
21252 rtx set, set2;
21253 int dep_insn_code_number;
21255 /* Anti and output dependencies have zero cost on all CPUs. */
21256 if (REG_NOTE_KIND (link) != 0)
21257 return 0;
21259 dep_insn_code_number = recog_memoized (dep_insn);
21261 /* If we can't recognize the insns, we can't really do anything. */
21262 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21263 return cost;
21265 insn_type = get_attr_type (insn);
21266 dep_insn_type = get_attr_type (dep_insn);
21268 switch (ix86_tune)
21270 case PROCESSOR_PENTIUM:
21271 /* Address Generation Interlock adds a cycle of latency. */
21272 if (insn_type == TYPE_LEA)
21274 rtx addr = PATTERN (insn);
21276 if (GET_CODE (addr) == PARALLEL)
21277 addr = XVECEXP (addr, 0, 0);
21279 gcc_assert (GET_CODE (addr) == SET);
21281 addr = SET_SRC (addr);
21282 if (modified_in_p (addr, dep_insn))
21283 cost += 1;
21285 else if (ix86_agi_dependent (dep_insn, insn))
21286 cost += 1;
21288 /* ??? Compares pair with jump/setcc. */
21289 if (ix86_flags_dependent (insn, dep_insn, insn_type))
21290 cost = 0;
21292 /* Floating point stores require value to be ready one cycle earlier. */
21293 if (insn_type == TYPE_FMOV
21294 && get_attr_memory (insn) == MEMORY_STORE
21295 && !ix86_agi_dependent (dep_insn, insn))
21296 cost += 1;
21297 break;
21299 case PROCESSOR_PENTIUMPRO:
21300 memory = get_attr_memory (insn);
21302 /* INT->FP conversion is expensive. */
21303 if (get_attr_fp_int_src (dep_insn))
21304 cost += 5;
21306 /* There is one cycle extra latency between an FP op and a store. */
21307 if (insn_type == TYPE_FMOV
21308 && (set = single_set (dep_insn)) != NULL_RTX
21309 && (set2 = single_set (insn)) != NULL_RTX
21310 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
21311 && MEM_P (SET_DEST (set2)))
21312 cost += 1;
21314 /* Show ability of reorder buffer to hide latency of load by executing
21315 in parallel with previous instruction in case
21316 previous instruction is not needed to compute the address. */
21317 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21318 && !ix86_agi_dependent (dep_insn, insn))
21320 /* Claim moves to take one cycle, as core can issue one load
21321 at time and the next load can start cycle later. */
21322 if (dep_insn_type == TYPE_IMOV
21323 || dep_insn_type == TYPE_FMOV)
21324 cost = 1;
21325 else if (cost > 1)
21326 cost--;
21328 break;
21330 case PROCESSOR_K6:
21331 memory = get_attr_memory (insn);
21333 /* The esp dependency is resolved before the instruction is really
21334 finished. */
21335 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
21336 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
21337 return 1;
21339 /* INT->FP conversion is expensive. */
21340 if (get_attr_fp_int_src (dep_insn))
21341 cost += 5;
21343 /* Show ability of reorder buffer to hide latency of load by executing
21344 in parallel with previous instruction in case
21345 previous instruction is not needed to compute the address. */
21346 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21347 && !ix86_agi_dependent (dep_insn, insn))
21349 /* Claim moves to take one cycle, as core can issue one load
21350 at time and the next load can start cycle later. */
21351 if (dep_insn_type == TYPE_IMOV
21352 || dep_insn_type == TYPE_FMOV)
21353 cost = 1;
21354 else if (cost > 2)
21355 cost -= 2;
21356 else
21357 cost = 1;
21359 break;
21361 case PROCESSOR_ATHLON:
21362 case PROCESSOR_K8:
21363 case PROCESSOR_AMDFAM10:
21364 case PROCESSOR_BDVER1:
21365 case PROCESSOR_ATOM:
21366 case PROCESSOR_GENERIC32:
21367 case PROCESSOR_GENERIC64:
21368 memory = get_attr_memory (insn);
21370 /* Show ability of reorder buffer to hide latency of load by executing
21371 in parallel with previous instruction in case
21372 previous instruction is not needed to compute the address. */
21373 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21374 && !ix86_agi_dependent (dep_insn, insn))
21376 enum attr_unit unit = get_attr_unit (insn);
21377 int loadcost = 3;
21379 /* Because of the difference between the length of integer and
21380 floating unit pipeline preparation stages, the memory operands
21381 for floating point are cheaper.
21383 ??? For Athlon it the difference is most probably 2. */
21384 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
21385 loadcost = 3;
21386 else
21387 loadcost = TARGET_ATHLON ? 2 : 0;
21389 if (cost >= loadcost)
21390 cost -= loadcost;
21391 else
21392 cost = 0;
21395 default:
21396 break;
21399 return cost;
21402 /* How many alternative schedules to try. This should be as wide as the
21403 scheduling freedom in the DFA, but no wider. Making this value too
21404 large results extra work for the scheduler. */
21406 static int
21407 ia32_multipass_dfa_lookahead (void)
21409 switch (ix86_tune)
21411 case PROCESSOR_PENTIUM:
21412 return 2;
21414 case PROCESSOR_PENTIUMPRO:
21415 case PROCESSOR_K6:
21416 return 1;
21418 default:
21419 return 0;
21424 /* Compute the alignment given to a constant that is being placed in memory.
21425 EXP is the constant and ALIGN is the alignment that the object would
21426 ordinarily have.
21427 The value of this function is used instead of that alignment to align
21428 the object. */
21431 ix86_constant_alignment (tree exp, int align)
21433 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
21434 || TREE_CODE (exp) == INTEGER_CST)
21436 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
21437 return 64;
21438 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
21439 return 128;
21441 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
21442 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
21443 return BITS_PER_WORD;
21445 return align;
21448 /* Compute the alignment for a static variable.
21449 TYPE is the data type, and ALIGN is the alignment that
21450 the object would ordinarily have. The value of this function is used
21451 instead of that alignment to align the object. */
21454 ix86_data_alignment (tree type, int align)
21456 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
21458 if (AGGREGATE_TYPE_P (type)
21459 && TYPE_SIZE (type)
21460 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21461 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
21462 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
21463 && align < max_align)
21464 align = max_align;
21466 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21467 to 16byte boundary. */
21468 if (TARGET_64BIT)
21470 if (AGGREGATE_TYPE_P (type)
21471 && TYPE_SIZE (type)
21472 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21473 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
21474 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21475 return 128;
21478 if (TREE_CODE (type) == ARRAY_TYPE)
21480 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21481 return 64;
21482 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21483 return 128;
21485 else if (TREE_CODE (type) == COMPLEX_TYPE)
21488 if (TYPE_MODE (type) == DCmode && align < 64)
21489 return 64;
21490 if ((TYPE_MODE (type) == XCmode
21491 || TYPE_MODE (type) == TCmode) && align < 128)
21492 return 128;
21494 else if ((TREE_CODE (type) == RECORD_TYPE
21495 || TREE_CODE (type) == UNION_TYPE
21496 || TREE_CODE (type) == QUAL_UNION_TYPE)
21497 && TYPE_FIELDS (type))
21499 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21500 return 64;
21501 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21502 return 128;
21504 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21505 || TREE_CODE (type) == INTEGER_TYPE)
21507 if (TYPE_MODE (type) == DFmode && align < 64)
21508 return 64;
21509 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21510 return 128;
21513 return align;
21516 /* Compute the alignment for a local variable or a stack slot. EXP is
21517 the data type or decl itself, MODE is the widest mode available and
21518 ALIGN is the alignment that the object would ordinarily have. The
21519 value of this macro is used instead of that alignment to align the
21520 object. */
21522 unsigned int
21523 ix86_local_alignment (tree exp, enum machine_mode mode,
21524 unsigned int align)
21526 tree type, decl;
21528 if (exp && DECL_P (exp))
21530 type = TREE_TYPE (exp);
21531 decl = exp;
21533 else
21535 type = exp;
21536 decl = NULL;
21539 /* Don't do dynamic stack realignment for long long objects with
21540 -mpreferred-stack-boundary=2. */
21541 if (!TARGET_64BIT
21542 && align == 64
21543 && ix86_preferred_stack_boundary < 64
21544 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21545 && (!type || !TYPE_USER_ALIGN (type))
21546 && (!decl || !DECL_USER_ALIGN (decl)))
21547 align = 32;
21549 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21550 register in MODE. We will return the largest alignment of XF
21551 and DF. */
21552 if (!type)
21554 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21555 align = GET_MODE_ALIGNMENT (DFmode);
21556 return align;
21559 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21560 to 16byte boundary. Exact wording is:
21562 An array uses the same alignment as its elements, except that a local or
21563 global array variable of length at least 16 bytes or
21564 a C99 variable-length array variable always has alignment of at least 16 bytes.
21566 This was added to allow use of aligned SSE instructions at arrays. This
21567 rule is meant for static storage (where compiler can not do the analysis
21568 by itself). We follow it for automatic variables only when convenient.
21569 We fully control everything in the function compiled and functions from
21570 other unit can not rely on the alignment.
21572 Exclude va_list type. It is the common case of local array where
21573 we can not benefit from the alignment. */
21574 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21575 && TARGET_SSE)
21577 if (AGGREGATE_TYPE_P (type)
21578 && (TYPE_MAIN_VARIANT (type)
21579 != TYPE_MAIN_VARIANT (va_list_type_node))
21580 && TYPE_SIZE (type)
21581 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21582 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21583 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21584 return 128;
21586 if (TREE_CODE (type) == ARRAY_TYPE)
21588 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21589 return 64;
21590 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21591 return 128;
21593 else if (TREE_CODE (type) == COMPLEX_TYPE)
21595 if (TYPE_MODE (type) == DCmode && align < 64)
21596 return 64;
21597 if ((TYPE_MODE (type) == XCmode
21598 || TYPE_MODE (type) == TCmode) && align < 128)
21599 return 128;
21601 else if ((TREE_CODE (type) == RECORD_TYPE
21602 || TREE_CODE (type) == UNION_TYPE
21603 || TREE_CODE (type) == QUAL_UNION_TYPE)
21604 && TYPE_FIELDS (type))
21606 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21607 return 64;
21608 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21609 return 128;
21611 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21612 || TREE_CODE (type) == INTEGER_TYPE)
21615 if (TYPE_MODE (type) == DFmode && align < 64)
21616 return 64;
21617 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21618 return 128;
21620 return align;
21623 /* Compute the minimum required alignment for dynamic stack realignment
21624 purposes for a local variable, parameter or a stack slot. EXP is
21625 the data type or decl itself, MODE is its mode and ALIGN is the
21626 alignment that the object would ordinarily have. */
21628 unsigned int
21629 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21630 unsigned int align)
21632 tree type, decl;
21634 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21635 return align;
21637 if (exp && DECL_P (exp))
21639 type = TREE_TYPE (exp);
21640 decl = exp;
21642 else
21644 type = exp;
21645 decl = NULL;
21648 /* Don't do dynamic stack realignment for long long objects with
21649 -mpreferred-stack-boundary=2. */
21650 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21651 && (!type || !TYPE_USER_ALIGN (type))
21652 && (!decl || !DECL_USER_ALIGN (decl)))
21653 return 32;
21655 return align;
21658 /* Find a location for the static chain incoming to a nested function.
21659 This is a register, unless all free registers are used by arguments. */
21661 static rtx
21662 ix86_static_chain (const_tree fndecl, bool incoming_p)
21664 unsigned regno;
21666 if (!DECL_STATIC_CHAIN (fndecl))
21667 return NULL;
21669 if (TARGET_64BIT)
21671 /* We always use R10 in 64-bit mode. */
21672 regno = R10_REG;
21674 else
21676 tree fntype;
21677 /* By default in 32-bit mode we use ECX to pass the static chain. */
21678 regno = CX_REG;
21680 fntype = TREE_TYPE (fndecl);
21681 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21683 /* Fastcall functions use ecx/edx for arguments, which leaves
21684 us with EAX for the static chain. */
21685 regno = AX_REG;
21687 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21689 /* Thiscall functions use ecx for arguments, which leaves
21690 us with EAX for the static chain. */
21691 regno = AX_REG;
21693 else if (ix86_function_regparm (fntype, fndecl) == 3)
21695 /* For regparm 3, we have no free call-clobbered registers in
21696 which to store the static chain. In order to implement this,
21697 we have the trampoline push the static chain to the stack.
21698 However, we can't push a value below the return address when
21699 we call the nested function directly, so we have to use an
21700 alternate entry point. For this we use ESI, and have the
21701 alternate entry point push ESI, so that things appear the
21702 same once we're executing the nested function. */
21703 if (incoming_p)
21705 if (fndecl == current_function_decl)
21706 ix86_static_chain_on_stack = true;
21707 return gen_frame_mem (SImode,
21708 plus_constant (arg_pointer_rtx, -8));
21710 regno = SI_REG;
21714 return gen_rtx_REG (Pmode, regno);
21717 /* Emit RTL insns to initialize the variable parts of a trampoline.
21718 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21719 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21720 to be passed to the target function. */
21722 static void
21723 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21725 rtx mem, fnaddr;
21727 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21729 if (!TARGET_64BIT)
21731 rtx disp, chain;
21732 int opcode;
21734 /* Depending on the static chain location, either load a register
21735 with a constant, or push the constant to the stack. All of the
21736 instructions are the same size. */
21737 chain = ix86_static_chain (fndecl, true);
21738 if (REG_P (chain))
21740 if (REGNO (chain) == CX_REG)
21741 opcode = 0xb9;
21742 else if (REGNO (chain) == AX_REG)
21743 opcode = 0xb8;
21744 else
21745 gcc_unreachable ();
21747 else
21748 opcode = 0x68;
21750 mem = adjust_address (m_tramp, QImode, 0);
21751 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21753 mem = adjust_address (m_tramp, SImode, 1);
21754 emit_move_insn (mem, chain_value);
21756 /* Compute offset from the end of the jmp to the target function.
21757 In the case in which the trampoline stores the static chain on
21758 the stack, we need to skip the first insn which pushes the
21759 (call-saved) register static chain; this push is 1 byte. */
21760 disp = expand_binop (SImode, sub_optab, fnaddr,
21761 plus_constant (XEXP (m_tramp, 0),
21762 MEM_P (chain) ? 9 : 10),
21763 NULL_RTX, 1, OPTAB_DIRECT);
21765 mem = adjust_address (m_tramp, QImode, 5);
21766 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21768 mem = adjust_address (m_tramp, SImode, 6);
21769 emit_move_insn (mem, disp);
21771 else
21773 int offset = 0;
21775 /* Load the function address to r11. Try to load address using
21776 the shorter movl instead of movabs. We may want to support
21777 movq for kernel mode, but kernel does not use trampolines at
21778 the moment. */
21779 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21781 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21783 mem = adjust_address (m_tramp, HImode, offset);
21784 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21786 mem = adjust_address (m_tramp, SImode, offset + 2);
21787 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21788 offset += 6;
21790 else
21792 mem = adjust_address (m_tramp, HImode, offset);
21793 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21795 mem = adjust_address (m_tramp, DImode, offset + 2);
21796 emit_move_insn (mem, fnaddr);
21797 offset += 10;
21800 /* Load static chain using movabs to r10. */
21801 mem = adjust_address (m_tramp, HImode, offset);
21802 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21804 mem = adjust_address (m_tramp, DImode, offset + 2);
21805 emit_move_insn (mem, chain_value);
21806 offset += 10;
21808 /* Jump to r11; the last (unused) byte is a nop, only there to
21809 pad the write out to a single 32-bit store. */
21810 mem = adjust_address (m_tramp, SImode, offset);
21811 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21812 offset += 4;
21814 gcc_assert (offset <= TRAMPOLINE_SIZE);
21817 #ifdef ENABLE_EXECUTE_STACK
21818 #ifdef CHECK_EXECUTE_STACK_ENABLED
21819 if (CHECK_EXECUTE_STACK_ENABLED)
21820 #endif
21821 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21822 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21823 #endif
21826 /* The following file contains several enumerations and data structures
21827 built from the definitions in i386-builtin-types.def. */
21829 #include "i386-builtin-types.inc"
21831 /* Table for the ix86 builtin non-function types. */
21832 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21834 /* Retrieve an element from the above table, building some of
21835 the types lazily. */
21837 static tree
21838 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21840 unsigned int index;
21841 tree type, itype;
21843 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21845 type = ix86_builtin_type_tab[(int) tcode];
21846 if (type != NULL)
21847 return type;
21849 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21850 if (tcode <= IX86_BT_LAST_VECT)
21852 enum machine_mode mode;
21854 index = tcode - IX86_BT_LAST_PRIM - 1;
21855 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21856 mode = ix86_builtin_type_vect_mode[index];
21858 type = build_vector_type_for_mode (itype, mode);
21860 else
21862 int quals;
21864 index = tcode - IX86_BT_LAST_VECT - 1;
21865 if (tcode <= IX86_BT_LAST_PTR)
21866 quals = TYPE_UNQUALIFIED;
21867 else
21868 quals = TYPE_QUAL_CONST;
21870 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21871 if (quals != TYPE_UNQUALIFIED)
21872 itype = build_qualified_type (itype, quals);
21874 type = build_pointer_type (itype);
21877 ix86_builtin_type_tab[(int) tcode] = type;
21878 return type;
21881 /* Table for the ix86 builtin function types. */
21882 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21884 /* Retrieve an element from the above table, building some of
21885 the types lazily. */
21887 static tree
21888 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21890 tree type;
21892 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21894 type = ix86_builtin_func_type_tab[(int) tcode];
21895 if (type != NULL)
21896 return type;
21898 if (tcode <= IX86_BT_LAST_FUNC)
21900 unsigned start = ix86_builtin_func_start[(int) tcode];
21901 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21902 tree rtype, atype, args = void_list_node;
21903 unsigned i;
21905 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21906 for (i = after - 1; i > start; --i)
21908 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21909 args = tree_cons (NULL, atype, args);
21912 type = build_function_type (rtype, args);
21914 else
21916 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21917 enum ix86_builtin_func_type icode;
21919 icode = ix86_builtin_func_alias_base[index];
21920 type = ix86_get_builtin_func_type (icode);
21923 ix86_builtin_func_type_tab[(int) tcode] = type;
21924 return type;
21928 /* Codes for all the SSE/MMX builtins. */
21929 enum ix86_builtins
21931 IX86_BUILTIN_ADDPS,
21932 IX86_BUILTIN_ADDSS,
21933 IX86_BUILTIN_DIVPS,
21934 IX86_BUILTIN_DIVSS,
21935 IX86_BUILTIN_MULPS,
21936 IX86_BUILTIN_MULSS,
21937 IX86_BUILTIN_SUBPS,
21938 IX86_BUILTIN_SUBSS,
21940 IX86_BUILTIN_CMPEQPS,
21941 IX86_BUILTIN_CMPLTPS,
21942 IX86_BUILTIN_CMPLEPS,
21943 IX86_BUILTIN_CMPGTPS,
21944 IX86_BUILTIN_CMPGEPS,
21945 IX86_BUILTIN_CMPNEQPS,
21946 IX86_BUILTIN_CMPNLTPS,
21947 IX86_BUILTIN_CMPNLEPS,
21948 IX86_BUILTIN_CMPNGTPS,
21949 IX86_BUILTIN_CMPNGEPS,
21950 IX86_BUILTIN_CMPORDPS,
21951 IX86_BUILTIN_CMPUNORDPS,
21952 IX86_BUILTIN_CMPEQSS,
21953 IX86_BUILTIN_CMPLTSS,
21954 IX86_BUILTIN_CMPLESS,
21955 IX86_BUILTIN_CMPNEQSS,
21956 IX86_BUILTIN_CMPNLTSS,
21957 IX86_BUILTIN_CMPNLESS,
21958 IX86_BUILTIN_CMPNGTSS,
21959 IX86_BUILTIN_CMPNGESS,
21960 IX86_BUILTIN_CMPORDSS,
21961 IX86_BUILTIN_CMPUNORDSS,
21963 IX86_BUILTIN_COMIEQSS,
21964 IX86_BUILTIN_COMILTSS,
21965 IX86_BUILTIN_COMILESS,
21966 IX86_BUILTIN_COMIGTSS,
21967 IX86_BUILTIN_COMIGESS,
21968 IX86_BUILTIN_COMINEQSS,
21969 IX86_BUILTIN_UCOMIEQSS,
21970 IX86_BUILTIN_UCOMILTSS,
21971 IX86_BUILTIN_UCOMILESS,
21972 IX86_BUILTIN_UCOMIGTSS,
21973 IX86_BUILTIN_UCOMIGESS,
21974 IX86_BUILTIN_UCOMINEQSS,
21976 IX86_BUILTIN_CVTPI2PS,
21977 IX86_BUILTIN_CVTPS2PI,
21978 IX86_BUILTIN_CVTSI2SS,
21979 IX86_BUILTIN_CVTSI642SS,
21980 IX86_BUILTIN_CVTSS2SI,
21981 IX86_BUILTIN_CVTSS2SI64,
21982 IX86_BUILTIN_CVTTPS2PI,
21983 IX86_BUILTIN_CVTTSS2SI,
21984 IX86_BUILTIN_CVTTSS2SI64,
21986 IX86_BUILTIN_MAXPS,
21987 IX86_BUILTIN_MAXSS,
21988 IX86_BUILTIN_MINPS,
21989 IX86_BUILTIN_MINSS,
21991 IX86_BUILTIN_LOADUPS,
21992 IX86_BUILTIN_STOREUPS,
21993 IX86_BUILTIN_MOVSS,
21995 IX86_BUILTIN_MOVHLPS,
21996 IX86_BUILTIN_MOVLHPS,
21997 IX86_BUILTIN_LOADHPS,
21998 IX86_BUILTIN_LOADLPS,
21999 IX86_BUILTIN_STOREHPS,
22000 IX86_BUILTIN_STORELPS,
22002 IX86_BUILTIN_MASKMOVQ,
22003 IX86_BUILTIN_MOVMSKPS,
22004 IX86_BUILTIN_PMOVMSKB,
22006 IX86_BUILTIN_MOVNTPS,
22007 IX86_BUILTIN_MOVNTQ,
22009 IX86_BUILTIN_LOADDQU,
22010 IX86_BUILTIN_STOREDQU,
22012 IX86_BUILTIN_PACKSSWB,
22013 IX86_BUILTIN_PACKSSDW,
22014 IX86_BUILTIN_PACKUSWB,
22016 IX86_BUILTIN_PADDB,
22017 IX86_BUILTIN_PADDW,
22018 IX86_BUILTIN_PADDD,
22019 IX86_BUILTIN_PADDQ,
22020 IX86_BUILTIN_PADDSB,
22021 IX86_BUILTIN_PADDSW,
22022 IX86_BUILTIN_PADDUSB,
22023 IX86_BUILTIN_PADDUSW,
22024 IX86_BUILTIN_PSUBB,
22025 IX86_BUILTIN_PSUBW,
22026 IX86_BUILTIN_PSUBD,
22027 IX86_BUILTIN_PSUBQ,
22028 IX86_BUILTIN_PSUBSB,
22029 IX86_BUILTIN_PSUBSW,
22030 IX86_BUILTIN_PSUBUSB,
22031 IX86_BUILTIN_PSUBUSW,
22033 IX86_BUILTIN_PAND,
22034 IX86_BUILTIN_PANDN,
22035 IX86_BUILTIN_POR,
22036 IX86_BUILTIN_PXOR,
22038 IX86_BUILTIN_PAVGB,
22039 IX86_BUILTIN_PAVGW,
22041 IX86_BUILTIN_PCMPEQB,
22042 IX86_BUILTIN_PCMPEQW,
22043 IX86_BUILTIN_PCMPEQD,
22044 IX86_BUILTIN_PCMPGTB,
22045 IX86_BUILTIN_PCMPGTW,
22046 IX86_BUILTIN_PCMPGTD,
22048 IX86_BUILTIN_PMADDWD,
22050 IX86_BUILTIN_PMAXSW,
22051 IX86_BUILTIN_PMAXUB,
22052 IX86_BUILTIN_PMINSW,
22053 IX86_BUILTIN_PMINUB,
22055 IX86_BUILTIN_PMULHUW,
22056 IX86_BUILTIN_PMULHW,
22057 IX86_BUILTIN_PMULLW,
22059 IX86_BUILTIN_PSADBW,
22060 IX86_BUILTIN_PSHUFW,
22062 IX86_BUILTIN_PSLLW,
22063 IX86_BUILTIN_PSLLD,
22064 IX86_BUILTIN_PSLLQ,
22065 IX86_BUILTIN_PSRAW,
22066 IX86_BUILTIN_PSRAD,
22067 IX86_BUILTIN_PSRLW,
22068 IX86_BUILTIN_PSRLD,
22069 IX86_BUILTIN_PSRLQ,
22070 IX86_BUILTIN_PSLLWI,
22071 IX86_BUILTIN_PSLLDI,
22072 IX86_BUILTIN_PSLLQI,
22073 IX86_BUILTIN_PSRAWI,
22074 IX86_BUILTIN_PSRADI,
22075 IX86_BUILTIN_PSRLWI,
22076 IX86_BUILTIN_PSRLDI,
22077 IX86_BUILTIN_PSRLQI,
22079 IX86_BUILTIN_PUNPCKHBW,
22080 IX86_BUILTIN_PUNPCKHWD,
22081 IX86_BUILTIN_PUNPCKHDQ,
22082 IX86_BUILTIN_PUNPCKLBW,
22083 IX86_BUILTIN_PUNPCKLWD,
22084 IX86_BUILTIN_PUNPCKLDQ,
22086 IX86_BUILTIN_SHUFPS,
22088 IX86_BUILTIN_RCPPS,
22089 IX86_BUILTIN_RCPSS,
22090 IX86_BUILTIN_RSQRTPS,
22091 IX86_BUILTIN_RSQRTPS_NR,
22092 IX86_BUILTIN_RSQRTSS,
22093 IX86_BUILTIN_RSQRTF,
22094 IX86_BUILTIN_SQRTPS,
22095 IX86_BUILTIN_SQRTPS_NR,
22096 IX86_BUILTIN_SQRTSS,
22098 IX86_BUILTIN_UNPCKHPS,
22099 IX86_BUILTIN_UNPCKLPS,
22101 IX86_BUILTIN_ANDPS,
22102 IX86_BUILTIN_ANDNPS,
22103 IX86_BUILTIN_ORPS,
22104 IX86_BUILTIN_XORPS,
22106 IX86_BUILTIN_EMMS,
22107 IX86_BUILTIN_LDMXCSR,
22108 IX86_BUILTIN_STMXCSR,
22109 IX86_BUILTIN_SFENCE,
22111 /* 3DNow! Original */
22112 IX86_BUILTIN_FEMMS,
22113 IX86_BUILTIN_PAVGUSB,
22114 IX86_BUILTIN_PF2ID,
22115 IX86_BUILTIN_PFACC,
22116 IX86_BUILTIN_PFADD,
22117 IX86_BUILTIN_PFCMPEQ,
22118 IX86_BUILTIN_PFCMPGE,
22119 IX86_BUILTIN_PFCMPGT,
22120 IX86_BUILTIN_PFMAX,
22121 IX86_BUILTIN_PFMIN,
22122 IX86_BUILTIN_PFMUL,
22123 IX86_BUILTIN_PFRCP,
22124 IX86_BUILTIN_PFRCPIT1,
22125 IX86_BUILTIN_PFRCPIT2,
22126 IX86_BUILTIN_PFRSQIT1,
22127 IX86_BUILTIN_PFRSQRT,
22128 IX86_BUILTIN_PFSUB,
22129 IX86_BUILTIN_PFSUBR,
22130 IX86_BUILTIN_PI2FD,
22131 IX86_BUILTIN_PMULHRW,
22133 /* 3DNow! Athlon Extensions */
22134 IX86_BUILTIN_PF2IW,
22135 IX86_BUILTIN_PFNACC,
22136 IX86_BUILTIN_PFPNACC,
22137 IX86_BUILTIN_PI2FW,
22138 IX86_BUILTIN_PSWAPDSI,
22139 IX86_BUILTIN_PSWAPDSF,
22141 /* SSE2 */
22142 IX86_BUILTIN_ADDPD,
22143 IX86_BUILTIN_ADDSD,
22144 IX86_BUILTIN_DIVPD,
22145 IX86_BUILTIN_DIVSD,
22146 IX86_BUILTIN_MULPD,
22147 IX86_BUILTIN_MULSD,
22148 IX86_BUILTIN_SUBPD,
22149 IX86_BUILTIN_SUBSD,
22151 IX86_BUILTIN_CMPEQPD,
22152 IX86_BUILTIN_CMPLTPD,
22153 IX86_BUILTIN_CMPLEPD,
22154 IX86_BUILTIN_CMPGTPD,
22155 IX86_BUILTIN_CMPGEPD,
22156 IX86_BUILTIN_CMPNEQPD,
22157 IX86_BUILTIN_CMPNLTPD,
22158 IX86_BUILTIN_CMPNLEPD,
22159 IX86_BUILTIN_CMPNGTPD,
22160 IX86_BUILTIN_CMPNGEPD,
22161 IX86_BUILTIN_CMPORDPD,
22162 IX86_BUILTIN_CMPUNORDPD,
22163 IX86_BUILTIN_CMPEQSD,
22164 IX86_BUILTIN_CMPLTSD,
22165 IX86_BUILTIN_CMPLESD,
22166 IX86_BUILTIN_CMPNEQSD,
22167 IX86_BUILTIN_CMPNLTSD,
22168 IX86_BUILTIN_CMPNLESD,
22169 IX86_BUILTIN_CMPORDSD,
22170 IX86_BUILTIN_CMPUNORDSD,
22172 IX86_BUILTIN_COMIEQSD,
22173 IX86_BUILTIN_COMILTSD,
22174 IX86_BUILTIN_COMILESD,
22175 IX86_BUILTIN_COMIGTSD,
22176 IX86_BUILTIN_COMIGESD,
22177 IX86_BUILTIN_COMINEQSD,
22178 IX86_BUILTIN_UCOMIEQSD,
22179 IX86_BUILTIN_UCOMILTSD,
22180 IX86_BUILTIN_UCOMILESD,
22181 IX86_BUILTIN_UCOMIGTSD,
22182 IX86_BUILTIN_UCOMIGESD,
22183 IX86_BUILTIN_UCOMINEQSD,
22185 IX86_BUILTIN_MAXPD,
22186 IX86_BUILTIN_MAXSD,
22187 IX86_BUILTIN_MINPD,
22188 IX86_BUILTIN_MINSD,
22190 IX86_BUILTIN_ANDPD,
22191 IX86_BUILTIN_ANDNPD,
22192 IX86_BUILTIN_ORPD,
22193 IX86_BUILTIN_XORPD,
22195 IX86_BUILTIN_SQRTPD,
22196 IX86_BUILTIN_SQRTSD,
22198 IX86_BUILTIN_UNPCKHPD,
22199 IX86_BUILTIN_UNPCKLPD,
22201 IX86_BUILTIN_SHUFPD,
22203 IX86_BUILTIN_LOADUPD,
22204 IX86_BUILTIN_STOREUPD,
22205 IX86_BUILTIN_MOVSD,
22207 IX86_BUILTIN_LOADHPD,
22208 IX86_BUILTIN_LOADLPD,
22210 IX86_BUILTIN_CVTDQ2PD,
22211 IX86_BUILTIN_CVTDQ2PS,
22213 IX86_BUILTIN_CVTPD2DQ,
22214 IX86_BUILTIN_CVTPD2PI,
22215 IX86_BUILTIN_CVTPD2PS,
22216 IX86_BUILTIN_CVTTPD2DQ,
22217 IX86_BUILTIN_CVTTPD2PI,
22219 IX86_BUILTIN_CVTPI2PD,
22220 IX86_BUILTIN_CVTSI2SD,
22221 IX86_BUILTIN_CVTSI642SD,
22223 IX86_BUILTIN_CVTSD2SI,
22224 IX86_BUILTIN_CVTSD2SI64,
22225 IX86_BUILTIN_CVTSD2SS,
22226 IX86_BUILTIN_CVTSS2SD,
22227 IX86_BUILTIN_CVTTSD2SI,
22228 IX86_BUILTIN_CVTTSD2SI64,
22230 IX86_BUILTIN_CVTPS2DQ,
22231 IX86_BUILTIN_CVTPS2PD,
22232 IX86_BUILTIN_CVTTPS2DQ,
22234 IX86_BUILTIN_MOVNTI,
22235 IX86_BUILTIN_MOVNTPD,
22236 IX86_BUILTIN_MOVNTDQ,
22238 IX86_BUILTIN_MOVQ128,
22240 /* SSE2 MMX */
22241 IX86_BUILTIN_MASKMOVDQU,
22242 IX86_BUILTIN_MOVMSKPD,
22243 IX86_BUILTIN_PMOVMSKB128,
22245 IX86_BUILTIN_PACKSSWB128,
22246 IX86_BUILTIN_PACKSSDW128,
22247 IX86_BUILTIN_PACKUSWB128,
22249 IX86_BUILTIN_PADDB128,
22250 IX86_BUILTIN_PADDW128,
22251 IX86_BUILTIN_PADDD128,
22252 IX86_BUILTIN_PADDQ128,
22253 IX86_BUILTIN_PADDSB128,
22254 IX86_BUILTIN_PADDSW128,
22255 IX86_BUILTIN_PADDUSB128,
22256 IX86_BUILTIN_PADDUSW128,
22257 IX86_BUILTIN_PSUBB128,
22258 IX86_BUILTIN_PSUBW128,
22259 IX86_BUILTIN_PSUBD128,
22260 IX86_BUILTIN_PSUBQ128,
22261 IX86_BUILTIN_PSUBSB128,
22262 IX86_BUILTIN_PSUBSW128,
22263 IX86_BUILTIN_PSUBUSB128,
22264 IX86_BUILTIN_PSUBUSW128,
22266 IX86_BUILTIN_PAND128,
22267 IX86_BUILTIN_PANDN128,
22268 IX86_BUILTIN_POR128,
22269 IX86_BUILTIN_PXOR128,
22271 IX86_BUILTIN_PAVGB128,
22272 IX86_BUILTIN_PAVGW128,
22274 IX86_BUILTIN_PCMPEQB128,
22275 IX86_BUILTIN_PCMPEQW128,
22276 IX86_BUILTIN_PCMPEQD128,
22277 IX86_BUILTIN_PCMPGTB128,
22278 IX86_BUILTIN_PCMPGTW128,
22279 IX86_BUILTIN_PCMPGTD128,
22281 IX86_BUILTIN_PMADDWD128,
22283 IX86_BUILTIN_PMAXSW128,
22284 IX86_BUILTIN_PMAXUB128,
22285 IX86_BUILTIN_PMINSW128,
22286 IX86_BUILTIN_PMINUB128,
22288 IX86_BUILTIN_PMULUDQ,
22289 IX86_BUILTIN_PMULUDQ128,
22290 IX86_BUILTIN_PMULHUW128,
22291 IX86_BUILTIN_PMULHW128,
22292 IX86_BUILTIN_PMULLW128,
22294 IX86_BUILTIN_PSADBW128,
22295 IX86_BUILTIN_PSHUFHW,
22296 IX86_BUILTIN_PSHUFLW,
22297 IX86_BUILTIN_PSHUFD,
22299 IX86_BUILTIN_PSLLDQI128,
22300 IX86_BUILTIN_PSLLWI128,
22301 IX86_BUILTIN_PSLLDI128,
22302 IX86_BUILTIN_PSLLQI128,
22303 IX86_BUILTIN_PSRAWI128,
22304 IX86_BUILTIN_PSRADI128,
22305 IX86_BUILTIN_PSRLDQI128,
22306 IX86_BUILTIN_PSRLWI128,
22307 IX86_BUILTIN_PSRLDI128,
22308 IX86_BUILTIN_PSRLQI128,
22310 IX86_BUILTIN_PSLLDQ128,
22311 IX86_BUILTIN_PSLLW128,
22312 IX86_BUILTIN_PSLLD128,
22313 IX86_BUILTIN_PSLLQ128,
22314 IX86_BUILTIN_PSRAW128,
22315 IX86_BUILTIN_PSRAD128,
22316 IX86_BUILTIN_PSRLW128,
22317 IX86_BUILTIN_PSRLD128,
22318 IX86_BUILTIN_PSRLQ128,
22320 IX86_BUILTIN_PUNPCKHBW128,
22321 IX86_BUILTIN_PUNPCKHWD128,
22322 IX86_BUILTIN_PUNPCKHDQ128,
22323 IX86_BUILTIN_PUNPCKHQDQ128,
22324 IX86_BUILTIN_PUNPCKLBW128,
22325 IX86_BUILTIN_PUNPCKLWD128,
22326 IX86_BUILTIN_PUNPCKLDQ128,
22327 IX86_BUILTIN_PUNPCKLQDQ128,
22329 IX86_BUILTIN_CLFLUSH,
22330 IX86_BUILTIN_MFENCE,
22331 IX86_BUILTIN_LFENCE,
22333 IX86_BUILTIN_BSRSI,
22334 IX86_BUILTIN_BSRDI,
22335 IX86_BUILTIN_RDPMC,
22336 IX86_BUILTIN_RDTSC,
22337 IX86_BUILTIN_RDTSCP,
22338 IX86_BUILTIN_ROLQI,
22339 IX86_BUILTIN_ROLHI,
22340 IX86_BUILTIN_RORQI,
22341 IX86_BUILTIN_RORHI,
22343 /* SSE3. */
22344 IX86_BUILTIN_ADDSUBPS,
22345 IX86_BUILTIN_HADDPS,
22346 IX86_BUILTIN_HSUBPS,
22347 IX86_BUILTIN_MOVSHDUP,
22348 IX86_BUILTIN_MOVSLDUP,
22349 IX86_BUILTIN_ADDSUBPD,
22350 IX86_BUILTIN_HADDPD,
22351 IX86_BUILTIN_HSUBPD,
22352 IX86_BUILTIN_LDDQU,
22354 IX86_BUILTIN_MONITOR,
22355 IX86_BUILTIN_MWAIT,
22357 /* SSSE3. */
22358 IX86_BUILTIN_PHADDW,
22359 IX86_BUILTIN_PHADDD,
22360 IX86_BUILTIN_PHADDSW,
22361 IX86_BUILTIN_PHSUBW,
22362 IX86_BUILTIN_PHSUBD,
22363 IX86_BUILTIN_PHSUBSW,
22364 IX86_BUILTIN_PMADDUBSW,
22365 IX86_BUILTIN_PMULHRSW,
22366 IX86_BUILTIN_PSHUFB,
22367 IX86_BUILTIN_PSIGNB,
22368 IX86_BUILTIN_PSIGNW,
22369 IX86_BUILTIN_PSIGND,
22370 IX86_BUILTIN_PALIGNR,
22371 IX86_BUILTIN_PABSB,
22372 IX86_BUILTIN_PABSW,
22373 IX86_BUILTIN_PABSD,
22375 IX86_BUILTIN_PHADDW128,
22376 IX86_BUILTIN_PHADDD128,
22377 IX86_BUILTIN_PHADDSW128,
22378 IX86_BUILTIN_PHSUBW128,
22379 IX86_BUILTIN_PHSUBD128,
22380 IX86_BUILTIN_PHSUBSW128,
22381 IX86_BUILTIN_PMADDUBSW128,
22382 IX86_BUILTIN_PMULHRSW128,
22383 IX86_BUILTIN_PSHUFB128,
22384 IX86_BUILTIN_PSIGNB128,
22385 IX86_BUILTIN_PSIGNW128,
22386 IX86_BUILTIN_PSIGND128,
22387 IX86_BUILTIN_PALIGNR128,
22388 IX86_BUILTIN_PABSB128,
22389 IX86_BUILTIN_PABSW128,
22390 IX86_BUILTIN_PABSD128,
22392 /* AMDFAM10 - SSE4A New Instructions. */
22393 IX86_BUILTIN_MOVNTSD,
22394 IX86_BUILTIN_MOVNTSS,
22395 IX86_BUILTIN_EXTRQI,
22396 IX86_BUILTIN_EXTRQ,
22397 IX86_BUILTIN_INSERTQI,
22398 IX86_BUILTIN_INSERTQ,
22400 /* SSE4.1. */
22401 IX86_BUILTIN_BLENDPD,
22402 IX86_BUILTIN_BLENDPS,
22403 IX86_BUILTIN_BLENDVPD,
22404 IX86_BUILTIN_BLENDVPS,
22405 IX86_BUILTIN_PBLENDVB128,
22406 IX86_BUILTIN_PBLENDW128,
22408 IX86_BUILTIN_DPPD,
22409 IX86_BUILTIN_DPPS,
22411 IX86_BUILTIN_INSERTPS128,
22413 IX86_BUILTIN_MOVNTDQA,
22414 IX86_BUILTIN_MPSADBW128,
22415 IX86_BUILTIN_PACKUSDW128,
22416 IX86_BUILTIN_PCMPEQQ,
22417 IX86_BUILTIN_PHMINPOSUW128,
22419 IX86_BUILTIN_PMAXSB128,
22420 IX86_BUILTIN_PMAXSD128,
22421 IX86_BUILTIN_PMAXUD128,
22422 IX86_BUILTIN_PMAXUW128,
22424 IX86_BUILTIN_PMINSB128,
22425 IX86_BUILTIN_PMINSD128,
22426 IX86_BUILTIN_PMINUD128,
22427 IX86_BUILTIN_PMINUW128,
22429 IX86_BUILTIN_PMOVSXBW128,
22430 IX86_BUILTIN_PMOVSXBD128,
22431 IX86_BUILTIN_PMOVSXBQ128,
22432 IX86_BUILTIN_PMOVSXWD128,
22433 IX86_BUILTIN_PMOVSXWQ128,
22434 IX86_BUILTIN_PMOVSXDQ128,
22436 IX86_BUILTIN_PMOVZXBW128,
22437 IX86_BUILTIN_PMOVZXBD128,
22438 IX86_BUILTIN_PMOVZXBQ128,
22439 IX86_BUILTIN_PMOVZXWD128,
22440 IX86_BUILTIN_PMOVZXWQ128,
22441 IX86_BUILTIN_PMOVZXDQ128,
22443 IX86_BUILTIN_PMULDQ128,
22444 IX86_BUILTIN_PMULLD128,
22446 IX86_BUILTIN_ROUNDPD,
22447 IX86_BUILTIN_ROUNDPS,
22448 IX86_BUILTIN_ROUNDSD,
22449 IX86_BUILTIN_ROUNDSS,
22451 IX86_BUILTIN_PTESTZ,
22452 IX86_BUILTIN_PTESTC,
22453 IX86_BUILTIN_PTESTNZC,
22455 IX86_BUILTIN_VEC_INIT_V2SI,
22456 IX86_BUILTIN_VEC_INIT_V4HI,
22457 IX86_BUILTIN_VEC_INIT_V8QI,
22458 IX86_BUILTIN_VEC_EXT_V2DF,
22459 IX86_BUILTIN_VEC_EXT_V2DI,
22460 IX86_BUILTIN_VEC_EXT_V4SF,
22461 IX86_BUILTIN_VEC_EXT_V4SI,
22462 IX86_BUILTIN_VEC_EXT_V8HI,
22463 IX86_BUILTIN_VEC_EXT_V2SI,
22464 IX86_BUILTIN_VEC_EXT_V4HI,
22465 IX86_BUILTIN_VEC_EXT_V16QI,
22466 IX86_BUILTIN_VEC_SET_V2DI,
22467 IX86_BUILTIN_VEC_SET_V4SF,
22468 IX86_BUILTIN_VEC_SET_V4SI,
22469 IX86_BUILTIN_VEC_SET_V8HI,
22470 IX86_BUILTIN_VEC_SET_V4HI,
22471 IX86_BUILTIN_VEC_SET_V16QI,
22473 IX86_BUILTIN_VEC_PACK_SFIX,
22475 /* SSE4.2. */
22476 IX86_BUILTIN_CRC32QI,
22477 IX86_BUILTIN_CRC32HI,
22478 IX86_BUILTIN_CRC32SI,
22479 IX86_BUILTIN_CRC32DI,
22481 IX86_BUILTIN_PCMPESTRI128,
22482 IX86_BUILTIN_PCMPESTRM128,
22483 IX86_BUILTIN_PCMPESTRA128,
22484 IX86_BUILTIN_PCMPESTRC128,
22485 IX86_BUILTIN_PCMPESTRO128,
22486 IX86_BUILTIN_PCMPESTRS128,
22487 IX86_BUILTIN_PCMPESTRZ128,
22488 IX86_BUILTIN_PCMPISTRI128,
22489 IX86_BUILTIN_PCMPISTRM128,
22490 IX86_BUILTIN_PCMPISTRA128,
22491 IX86_BUILTIN_PCMPISTRC128,
22492 IX86_BUILTIN_PCMPISTRO128,
22493 IX86_BUILTIN_PCMPISTRS128,
22494 IX86_BUILTIN_PCMPISTRZ128,
22496 IX86_BUILTIN_PCMPGTQ,
22498 /* AES instructions */
22499 IX86_BUILTIN_AESENC128,
22500 IX86_BUILTIN_AESENCLAST128,
22501 IX86_BUILTIN_AESDEC128,
22502 IX86_BUILTIN_AESDECLAST128,
22503 IX86_BUILTIN_AESIMC128,
22504 IX86_BUILTIN_AESKEYGENASSIST128,
22506 /* PCLMUL instruction */
22507 IX86_BUILTIN_PCLMULQDQ128,
22509 /* AVX */
22510 IX86_BUILTIN_ADDPD256,
22511 IX86_BUILTIN_ADDPS256,
22512 IX86_BUILTIN_ADDSUBPD256,
22513 IX86_BUILTIN_ADDSUBPS256,
22514 IX86_BUILTIN_ANDPD256,
22515 IX86_BUILTIN_ANDPS256,
22516 IX86_BUILTIN_ANDNPD256,
22517 IX86_BUILTIN_ANDNPS256,
22518 IX86_BUILTIN_BLENDPD256,
22519 IX86_BUILTIN_BLENDPS256,
22520 IX86_BUILTIN_BLENDVPD256,
22521 IX86_BUILTIN_BLENDVPS256,
22522 IX86_BUILTIN_DIVPD256,
22523 IX86_BUILTIN_DIVPS256,
22524 IX86_BUILTIN_DPPS256,
22525 IX86_BUILTIN_HADDPD256,
22526 IX86_BUILTIN_HADDPS256,
22527 IX86_BUILTIN_HSUBPD256,
22528 IX86_BUILTIN_HSUBPS256,
22529 IX86_BUILTIN_MAXPD256,
22530 IX86_BUILTIN_MAXPS256,
22531 IX86_BUILTIN_MINPD256,
22532 IX86_BUILTIN_MINPS256,
22533 IX86_BUILTIN_MULPD256,
22534 IX86_BUILTIN_MULPS256,
22535 IX86_BUILTIN_ORPD256,
22536 IX86_BUILTIN_ORPS256,
22537 IX86_BUILTIN_SHUFPD256,
22538 IX86_BUILTIN_SHUFPS256,
22539 IX86_BUILTIN_SUBPD256,
22540 IX86_BUILTIN_SUBPS256,
22541 IX86_BUILTIN_XORPD256,
22542 IX86_BUILTIN_XORPS256,
22543 IX86_BUILTIN_CMPSD,
22544 IX86_BUILTIN_CMPSS,
22545 IX86_BUILTIN_CMPPD,
22546 IX86_BUILTIN_CMPPS,
22547 IX86_BUILTIN_CMPPD256,
22548 IX86_BUILTIN_CMPPS256,
22549 IX86_BUILTIN_CVTDQ2PD256,
22550 IX86_BUILTIN_CVTDQ2PS256,
22551 IX86_BUILTIN_CVTPD2PS256,
22552 IX86_BUILTIN_CVTPS2DQ256,
22553 IX86_BUILTIN_CVTPS2PD256,
22554 IX86_BUILTIN_CVTTPD2DQ256,
22555 IX86_BUILTIN_CVTPD2DQ256,
22556 IX86_BUILTIN_CVTTPS2DQ256,
22557 IX86_BUILTIN_EXTRACTF128PD256,
22558 IX86_BUILTIN_EXTRACTF128PS256,
22559 IX86_BUILTIN_EXTRACTF128SI256,
22560 IX86_BUILTIN_VZEROALL,
22561 IX86_BUILTIN_VZEROUPPER,
22562 IX86_BUILTIN_VPERMILVARPD,
22563 IX86_BUILTIN_VPERMILVARPS,
22564 IX86_BUILTIN_VPERMILVARPD256,
22565 IX86_BUILTIN_VPERMILVARPS256,
22566 IX86_BUILTIN_VPERMILPD,
22567 IX86_BUILTIN_VPERMILPS,
22568 IX86_BUILTIN_VPERMILPD256,
22569 IX86_BUILTIN_VPERMILPS256,
22570 IX86_BUILTIN_VPERMIL2PD,
22571 IX86_BUILTIN_VPERMIL2PS,
22572 IX86_BUILTIN_VPERMIL2PD256,
22573 IX86_BUILTIN_VPERMIL2PS256,
22574 IX86_BUILTIN_VPERM2F128PD256,
22575 IX86_BUILTIN_VPERM2F128PS256,
22576 IX86_BUILTIN_VPERM2F128SI256,
22577 IX86_BUILTIN_VBROADCASTSS,
22578 IX86_BUILTIN_VBROADCASTSD256,
22579 IX86_BUILTIN_VBROADCASTSS256,
22580 IX86_BUILTIN_VBROADCASTPD256,
22581 IX86_BUILTIN_VBROADCASTPS256,
22582 IX86_BUILTIN_VINSERTF128PD256,
22583 IX86_BUILTIN_VINSERTF128PS256,
22584 IX86_BUILTIN_VINSERTF128SI256,
22585 IX86_BUILTIN_LOADUPD256,
22586 IX86_BUILTIN_LOADUPS256,
22587 IX86_BUILTIN_STOREUPD256,
22588 IX86_BUILTIN_STOREUPS256,
22589 IX86_BUILTIN_LDDQU256,
22590 IX86_BUILTIN_MOVNTDQ256,
22591 IX86_BUILTIN_MOVNTPD256,
22592 IX86_BUILTIN_MOVNTPS256,
22593 IX86_BUILTIN_LOADDQU256,
22594 IX86_BUILTIN_STOREDQU256,
22595 IX86_BUILTIN_MASKLOADPD,
22596 IX86_BUILTIN_MASKLOADPS,
22597 IX86_BUILTIN_MASKSTOREPD,
22598 IX86_BUILTIN_MASKSTOREPS,
22599 IX86_BUILTIN_MASKLOADPD256,
22600 IX86_BUILTIN_MASKLOADPS256,
22601 IX86_BUILTIN_MASKSTOREPD256,
22602 IX86_BUILTIN_MASKSTOREPS256,
22603 IX86_BUILTIN_MOVSHDUP256,
22604 IX86_BUILTIN_MOVSLDUP256,
22605 IX86_BUILTIN_MOVDDUP256,
22607 IX86_BUILTIN_SQRTPD256,
22608 IX86_BUILTIN_SQRTPS256,
22609 IX86_BUILTIN_SQRTPS_NR256,
22610 IX86_BUILTIN_RSQRTPS256,
22611 IX86_BUILTIN_RSQRTPS_NR256,
22613 IX86_BUILTIN_RCPPS256,
22615 IX86_BUILTIN_ROUNDPD256,
22616 IX86_BUILTIN_ROUNDPS256,
22618 IX86_BUILTIN_UNPCKHPD256,
22619 IX86_BUILTIN_UNPCKLPD256,
22620 IX86_BUILTIN_UNPCKHPS256,
22621 IX86_BUILTIN_UNPCKLPS256,
22623 IX86_BUILTIN_SI256_SI,
22624 IX86_BUILTIN_PS256_PS,
22625 IX86_BUILTIN_PD256_PD,
22626 IX86_BUILTIN_SI_SI256,
22627 IX86_BUILTIN_PS_PS256,
22628 IX86_BUILTIN_PD_PD256,
22630 IX86_BUILTIN_VTESTZPD,
22631 IX86_BUILTIN_VTESTCPD,
22632 IX86_BUILTIN_VTESTNZCPD,
22633 IX86_BUILTIN_VTESTZPS,
22634 IX86_BUILTIN_VTESTCPS,
22635 IX86_BUILTIN_VTESTNZCPS,
22636 IX86_BUILTIN_VTESTZPD256,
22637 IX86_BUILTIN_VTESTCPD256,
22638 IX86_BUILTIN_VTESTNZCPD256,
22639 IX86_BUILTIN_VTESTZPS256,
22640 IX86_BUILTIN_VTESTCPS256,
22641 IX86_BUILTIN_VTESTNZCPS256,
22642 IX86_BUILTIN_PTESTZ256,
22643 IX86_BUILTIN_PTESTC256,
22644 IX86_BUILTIN_PTESTNZC256,
22646 IX86_BUILTIN_MOVMSKPD256,
22647 IX86_BUILTIN_MOVMSKPS256,
22649 /* TFmode support builtins. */
22650 IX86_BUILTIN_INFQ,
22651 IX86_BUILTIN_HUGE_VALQ,
22652 IX86_BUILTIN_FABSQ,
22653 IX86_BUILTIN_COPYSIGNQ,
22655 /* Vectorizer support builtins. */
22656 IX86_BUILTIN_CPYSGNPS,
22657 IX86_BUILTIN_CPYSGNPD,
22659 IX86_BUILTIN_CVTUDQ2PS,
22661 IX86_BUILTIN_VEC_PERM_V2DF,
22662 IX86_BUILTIN_VEC_PERM_V4SF,
22663 IX86_BUILTIN_VEC_PERM_V2DI,
22664 IX86_BUILTIN_VEC_PERM_V4SI,
22665 IX86_BUILTIN_VEC_PERM_V8HI,
22666 IX86_BUILTIN_VEC_PERM_V16QI,
22667 IX86_BUILTIN_VEC_PERM_V2DI_U,
22668 IX86_BUILTIN_VEC_PERM_V4SI_U,
22669 IX86_BUILTIN_VEC_PERM_V8HI_U,
22670 IX86_BUILTIN_VEC_PERM_V16QI_U,
22671 IX86_BUILTIN_VEC_PERM_V4DF,
22672 IX86_BUILTIN_VEC_PERM_V8SF,
22674 /* FMA4 and XOP instructions. */
22675 IX86_BUILTIN_VFMADDSS,
22676 IX86_BUILTIN_VFMADDSD,
22677 IX86_BUILTIN_VFMADDPS,
22678 IX86_BUILTIN_VFMADDPD,
22679 IX86_BUILTIN_VFMSUBSS,
22680 IX86_BUILTIN_VFMSUBSD,
22681 IX86_BUILTIN_VFMSUBPS,
22682 IX86_BUILTIN_VFMSUBPD,
22683 IX86_BUILTIN_VFMADDSUBPS,
22684 IX86_BUILTIN_VFMADDSUBPD,
22685 IX86_BUILTIN_VFMSUBADDPS,
22686 IX86_BUILTIN_VFMSUBADDPD,
22687 IX86_BUILTIN_VFNMADDSS,
22688 IX86_BUILTIN_VFNMADDSD,
22689 IX86_BUILTIN_VFNMADDPS,
22690 IX86_BUILTIN_VFNMADDPD,
22691 IX86_BUILTIN_VFNMSUBSS,
22692 IX86_BUILTIN_VFNMSUBSD,
22693 IX86_BUILTIN_VFNMSUBPS,
22694 IX86_BUILTIN_VFNMSUBPD,
22695 IX86_BUILTIN_VFMADDPS256,
22696 IX86_BUILTIN_VFMADDPD256,
22697 IX86_BUILTIN_VFMSUBPS256,
22698 IX86_BUILTIN_VFMSUBPD256,
22699 IX86_BUILTIN_VFMADDSUBPS256,
22700 IX86_BUILTIN_VFMADDSUBPD256,
22701 IX86_BUILTIN_VFMSUBADDPS256,
22702 IX86_BUILTIN_VFMSUBADDPD256,
22703 IX86_BUILTIN_VFNMADDPS256,
22704 IX86_BUILTIN_VFNMADDPD256,
22705 IX86_BUILTIN_VFNMSUBPS256,
22706 IX86_BUILTIN_VFNMSUBPD256,
22708 IX86_BUILTIN_VPCMOV,
22709 IX86_BUILTIN_VPCMOV_V2DI,
22710 IX86_BUILTIN_VPCMOV_V4SI,
22711 IX86_BUILTIN_VPCMOV_V8HI,
22712 IX86_BUILTIN_VPCMOV_V16QI,
22713 IX86_BUILTIN_VPCMOV_V4SF,
22714 IX86_BUILTIN_VPCMOV_V2DF,
22715 IX86_BUILTIN_VPCMOV256,
22716 IX86_BUILTIN_VPCMOV_V4DI256,
22717 IX86_BUILTIN_VPCMOV_V8SI256,
22718 IX86_BUILTIN_VPCMOV_V16HI256,
22719 IX86_BUILTIN_VPCMOV_V32QI256,
22720 IX86_BUILTIN_VPCMOV_V8SF256,
22721 IX86_BUILTIN_VPCMOV_V4DF256,
22723 IX86_BUILTIN_VPPERM,
22725 IX86_BUILTIN_VPMACSSWW,
22726 IX86_BUILTIN_VPMACSWW,
22727 IX86_BUILTIN_VPMACSSWD,
22728 IX86_BUILTIN_VPMACSWD,
22729 IX86_BUILTIN_VPMACSSDD,
22730 IX86_BUILTIN_VPMACSDD,
22731 IX86_BUILTIN_VPMACSSDQL,
22732 IX86_BUILTIN_VPMACSSDQH,
22733 IX86_BUILTIN_VPMACSDQL,
22734 IX86_BUILTIN_VPMACSDQH,
22735 IX86_BUILTIN_VPMADCSSWD,
22736 IX86_BUILTIN_VPMADCSWD,
22738 IX86_BUILTIN_VPHADDBW,
22739 IX86_BUILTIN_VPHADDBD,
22740 IX86_BUILTIN_VPHADDBQ,
22741 IX86_BUILTIN_VPHADDWD,
22742 IX86_BUILTIN_VPHADDWQ,
22743 IX86_BUILTIN_VPHADDDQ,
22744 IX86_BUILTIN_VPHADDUBW,
22745 IX86_BUILTIN_VPHADDUBD,
22746 IX86_BUILTIN_VPHADDUBQ,
22747 IX86_BUILTIN_VPHADDUWD,
22748 IX86_BUILTIN_VPHADDUWQ,
22749 IX86_BUILTIN_VPHADDUDQ,
22750 IX86_BUILTIN_VPHSUBBW,
22751 IX86_BUILTIN_VPHSUBWD,
22752 IX86_BUILTIN_VPHSUBDQ,
22754 IX86_BUILTIN_VPROTB,
22755 IX86_BUILTIN_VPROTW,
22756 IX86_BUILTIN_VPROTD,
22757 IX86_BUILTIN_VPROTQ,
22758 IX86_BUILTIN_VPROTB_IMM,
22759 IX86_BUILTIN_VPROTW_IMM,
22760 IX86_BUILTIN_VPROTD_IMM,
22761 IX86_BUILTIN_VPROTQ_IMM,
22763 IX86_BUILTIN_VPSHLB,
22764 IX86_BUILTIN_VPSHLW,
22765 IX86_BUILTIN_VPSHLD,
22766 IX86_BUILTIN_VPSHLQ,
22767 IX86_BUILTIN_VPSHAB,
22768 IX86_BUILTIN_VPSHAW,
22769 IX86_BUILTIN_VPSHAD,
22770 IX86_BUILTIN_VPSHAQ,
22772 IX86_BUILTIN_VFRCZSS,
22773 IX86_BUILTIN_VFRCZSD,
22774 IX86_BUILTIN_VFRCZPS,
22775 IX86_BUILTIN_VFRCZPD,
22776 IX86_BUILTIN_VFRCZPS256,
22777 IX86_BUILTIN_VFRCZPD256,
22779 IX86_BUILTIN_VPCOMEQUB,
22780 IX86_BUILTIN_VPCOMNEUB,
22781 IX86_BUILTIN_VPCOMLTUB,
22782 IX86_BUILTIN_VPCOMLEUB,
22783 IX86_BUILTIN_VPCOMGTUB,
22784 IX86_BUILTIN_VPCOMGEUB,
22785 IX86_BUILTIN_VPCOMFALSEUB,
22786 IX86_BUILTIN_VPCOMTRUEUB,
22788 IX86_BUILTIN_VPCOMEQUW,
22789 IX86_BUILTIN_VPCOMNEUW,
22790 IX86_BUILTIN_VPCOMLTUW,
22791 IX86_BUILTIN_VPCOMLEUW,
22792 IX86_BUILTIN_VPCOMGTUW,
22793 IX86_BUILTIN_VPCOMGEUW,
22794 IX86_BUILTIN_VPCOMFALSEUW,
22795 IX86_BUILTIN_VPCOMTRUEUW,
22797 IX86_BUILTIN_VPCOMEQUD,
22798 IX86_BUILTIN_VPCOMNEUD,
22799 IX86_BUILTIN_VPCOMLTUD,
22800 IX86_BUILTIN_VPCOMLEUD,
22801 IX86_BUILTIN_VPCOMGTUD,
22802 IX86_BUILTIN_VPCOMGEUD,
22803 IX86_BUILTIN_VPCOMFALSEUD,
22804 IX86_BUILTIN_VPCOMTRUEUD,
22806 IX86_BUILTIN_VPCOMEQUQ,
22807 IX86_BUILTIN_VPCOMNEUQ,
22808 IX86_BUILTIN_VPCOMLTUQ,
22809 IX86_BUILTIN_VPCOMLEUQ,
22810 IX86_BUILTIN_VPCOMGTUQ,
22811 IX86_BUILTIN_VPCOMGEUQ,
22812 IX86_BUILTIN_VPCOMFALSEUQ,
22813 IX86_BUILTIN_VPCOMTRUEUQ,
22815 IX86_BUILTIN_VPCOMEQB,
22816 IX86_BUILTIN_VPCOMNEB,
22817 IX86_BUILTIN_VPCOMLTB,
22818 IX86_BUILTIN_VPCOMLEB,
22819 IX86_BUILTIN_VPCOMGTB,
22820 IX86_BUILTIN_VPCOMGEB,
22821 IX86_BUILTIN_VPCOMFALSEB,
22822 IX86_BUILTIN_VPCOMTRUEB,
22824 IX86_BUILTIN_VPCOMEQW,
22825 IX86_BUILTIN_VPCOMNEW,
22826 IX86_BUILTIN_VPCOMLTW,
22827 IX86_BUILTIN_VPCOMLEW,
22828 IX86_BUILTIN_VPCOMGTW,
22829 IX86_BUILTIN_VPCOMGEW,
22830 IX86_BUILTIN_VPCOMFALSEW,
22831 IX86_BUILTIN_VPCOMTRUEW,
22833 IX86_BUILTIN_VPCOMEQD,
22834 IX86_BUILTIN_VPCOMNED,
22835 IX86_BUILTIN_VPCOMLTD,
22836 IX86_BUILTIN_VPCOMLED,
22837 IX86_BUILTIN_VPCOMGTD,
22838 IX86_BUILTIN_VPCOMGED,
22839 IX86_BUILTIN_VPCOMFALSED,
22840 IX86_BUILTIN_VPCOMTRUED,
22842 IX86_BUILTIN_VPCOMEQQ,
22843 IX86_BUILTIN_VPCOMNEQ,
22844 IX86_BUILTIN_VPCOMLTQ,
22845 IX86_BUILTIN_VPCOMLEQ,
22846 IX86_BUILTIN_VPCOMGTQ,
22847 IX86_BUILTIN_VPCOMGEQ,
22848 IX86_BUILTIN_VPCOMFALSEQ,
22849 IX86_BUILTIN_VPCOMTRUEQ,
22851 /* LWP instructions. */
22852 IX86_BUILTIN_LLWPCB,
22853 IX86_BUILTIN_SLWPCB,
22854 IX86_BUILTIN_LWPVAL32,
22855 IX86_BUILTIN_LWPVAL64,
22856 IX86_BUILTIN_LWPINS32,
22857 IX86_BUILTIN_LWPINS64,
22859 IX86_BUILTIN_CLZS,
22861 /* FSGSBASE instructions. */
22862 IX86_BUILTIN_RDFSBASE32,
22863 IX86_BUILTIN_RDFSBASE64,
22864 IX86_BUILTIN_RDGSBASE32,
22865 IX86_BUILTIN_RDGSBASE64,
22866 IX86_BUILTIN_WRFSBASE32,
22867 IX86_BUILTIN_WRFSBASE64,
22868 IX86_BUILTIN_WRGSBASE32,
22869 IX86_BUILTIN_WRGSBASE64,
22871 /* RDRND instructions. */
22872 IX86_BUILTIN_RDRAND16,
22873 IX86_BUILTIN_RDRAND32,
22874 IX86_BUILTIN_RDRAND64,
22876 /* F16C instructions. */
22877 IX86_BUILTIN_CVTPH2PS,
22878 IX86_BUILTIN_CVTPH2PS256,
22879 IX86_BUILTIN_CVTPS2PH,
22880 IX86_BUILTIN_CVTPS2PH256,
22882 IX86_BUILTIN_MAX
22885 /* Table for the ix86 builtin decls. */
22886 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22888 /* Table of all of the builtin functions that are possible with different ISA's
22889 but are waiting to be built until a function is declared to use that
22890 ISA. */
22891 struct builtin_isa {
22892 const char *name; /* function name */
22893 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22894 int isa; /* isa_flags this builtin is defined for */
22895 bool const_p; /* true if the declaration is constant */
22896 bool set_and_not_built_p;
22899 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22902 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22903 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22904 function decl in the ix86_builtins array. Returns the function decl or
22905 NULL_TREE, if the builtin was not added.
22907 If the front end has a special hook for builtin functions, delay adding
22908 builtin functions that aren't in the current ISA until the ISA is changed
22909 with function specific optimization. Doing so, can save about 300K for the
22910 default compiler. When the builtin is expanded, check at that time whether
22911 it is valid.
22913 If the front end doesn't have a special hook, record all builtins, even if
22914 it isn't an instruction set in the current ISA in case the user uses
22915 function specific options for a different ISA, so that we don't get scope
22916 errors if a builtin is added in the middle of a function scope. */
22918 static inline tree
22919 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22920 enum ix86_builtins code)
22922 tree decl = NULL_TREE;
22924 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22926 ix86_builtins_isa[(int) code].isa = mask;
22928 mask &= ~OPTION_MASK_ISA_64BIT;
22929 if (mask == 0
22930 || (mask & ix86_isa_flags) != 0
22931 || (lang_hooks.builtin_function
22932 == lang_hooks.builtin_function_ext_scope))
22935 tree type = ix86_get_builtin_func_type (tcode);
22936 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22937 NULL, NULL_TREE);
22938 ix86_builtins[(int) code] = decl;
22939 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22941 else
22943 ix86_builtins[(int) code] = NULL_TREE;
22944 ix86_builtins_isa[(int) code].tcode = tcode;
22945 ix86_builtins_isa[(int) code].name = name;
22946 ix86_builtins_isa[(int) code].const_p = false;
22947 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22951 return decl;
22954 /* Like def_builtin, but also marks the function decl "const". */
22956 static inline tree
22957 def_builtin_const (int mask, const char *name,
22958 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22960 tree decl = def_builtin (mask, name, tcode, code);
22961 if (decl)
22962 TREE_READONLY (decl) = 1;
22963 else
22964 ix86_builtins_isa[(int) code].const_p = true;
22966 return decl;
22969 /* Add any new builtin functions for a given ISA that may not have been
22970 declared. This saves a bit of space compared to adding all of the
22971 declarations to the tree, even if we didn't use them. */
22973 static void
22974 ix86_add_new_builtins (int isa)
22976 int i;
22978 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22980 if ((ix86_builtins_isa[i].isa & isa) != 0
22981 && ix86_builtins_isa[i].set_and_not_built_p)
22983 tree decl, type;
22985 /* Don't define the builtin again. */
22986 ix86_builtins_isa[i].set_and_not_built_p = false;
22988 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22989 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22990 type, i, BUILT_IN_MD, NULL,
22991 NULL_TREE);
22993 ix86_builtins[i] = decl;
22994 if (ix86_builtins_isa[i].const_p)
22995 TREE_READONLY (decl) = 1;
23000 /* Bits for builtin_description.flag. */
23002 /* Set when we don't support the comparison natively, and should
23003 swap_comparison in order to support it. */
23004 #define BUILTIN_DESC_SWAP_OPERANDS 1
23006 struct builtin_description
23008 const unsigned int mask;
23009 const enum insn_code icode;
23010 const char *const name;
23011 const enum ix86_builtins code;
23012 const enum rtx_code comparison;
23013 const int flag;
23016 static const struct builtin_description bdesc_comi[] =
23018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
23019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
23020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
23021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
23022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
23023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
23024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
23025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
23026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
23027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
23028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
23029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
23030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
23031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
23032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
23033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
23034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
23035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
23036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
23037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
23038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
23039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
23040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
23041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
23044 static const struct builtin_description bdesc_pcmpestr[] =
23046 /* SSE4.2 */
23047 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
23048 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
23049 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
23050 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
23051 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
23052 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
23053 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
23056 static const struct builtin_description bdesc_pcmpistr[] =
23058 /* SSE4.2 */
23059 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
23060 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
23061 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
23062 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
23063 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
23064 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
23065 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
23068 /* Special builtins with variable number of arguments. */
23069 static const struct builtin_description bdesc_special_args[] =
23071 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
23072 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
23074 /* MMX */
23075 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23077 /* 3DNow! */
23078 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23080 /* SSE */
23081 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23090 /* SSE or 3DNow!A */
23091 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23092 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
23094 /* SSE2 */
23095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
23099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
23101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
23102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
23103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23108 /* SSE3 */
23109 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23111 /* SSE4.1 */
23112 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
23114 /* SSE4A */
23115 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23116 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23118 /* AVX */
23119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
23120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
23122 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23123 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23124 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
23126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
23128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23129 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23130 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
23134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
23137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23140 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
23141 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
23142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
23143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
23144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
23145 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
23146 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
23147 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
23149 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
23150 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
23151 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
23152 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
23153 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
23154 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
23156 /* FSGSBASE */
23157 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23158 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23159 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23160 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23161 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23162 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23163 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23164 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23166 /* RDRND */
23167 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
23168 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23169 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23172 /* Builtins with variable number of arguments. */
23173 static const struct builtin_description bdesc_args[] =
23175 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
23176 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
23177 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
23178 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23179 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23180 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23181 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23183 /* MMX */
23184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
23220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
23222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
23224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
23228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23247 /* 3DNow! */
23248 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23249 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23250 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23251 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23253 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23269 /* 3DNow!A */
23270 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23271 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23272 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23273 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23274 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23275 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23277 /* SSE */
23278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
23279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23280 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23282 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23286 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23289 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23293 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23294 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23295 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
23313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23325 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23326 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23330 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23332 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23333 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23335 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23340 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23341 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
23344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
23345 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
23347 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
23349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23353 /* SSE MMX or 3Dnow!A */
23354 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23355 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23356 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23359 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23360 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23363 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
23364 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
23366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
23368 /* SSE2 */
23369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23371 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
23372 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
23373 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
23374 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
23375 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
23376 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23377 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23378 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23379 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23380 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23381 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23382 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
23399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23401 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23402 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
23406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23408 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23409 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23410 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23411 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
23422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23439 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23443 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23445 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23446 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23448 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23451 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23452 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23454 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
23456 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23457 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23458 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23459 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23460 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23461 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23462 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23463 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23474 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23475 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
23477 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23479 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23480 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23492 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23493 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23494 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23497 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23498 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23499 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23500 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23501 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23502 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23503 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23504 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23510 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23519 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23524 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23525 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23526 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23527 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23528 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23529 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23532 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23533 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23534 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23535 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23536 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23537 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23539 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23540 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23541 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23542 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23550 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23551 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23553 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23555 /* SSE2 MMX */
23556 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23557 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23559 /* SSE3 */
23560 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23561 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23563 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23564 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23565 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23566 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23567 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23568 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23570 /* SSSE3 */
23571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23594 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23598 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23599 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23600 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23601 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23603 /* SSSE3. */
23604 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23605 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23607 /* SSE4.1 */
23608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23637 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23638 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23639 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23640 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23641 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23642 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23643 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23644 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23646 /* SSE4.1 */
23647 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23648 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23652 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23653 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23654 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23656 /* SSE4.2 */
23657 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23658 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23659 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23660 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23661 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23663 /* SSE4A */
23664 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23665 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23666 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23667 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23669 /* AES */
23670 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23671 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23673 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23674 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23675 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23676 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23678 /* PCLMUL */
23679 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23681 /* AVX */
23682 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23683 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23686 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23687 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23690 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23696 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23697 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23698 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23699 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23700 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23701 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23702 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23703 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23704 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23705 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23706 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23707 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23753 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23755 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23757 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23772 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23773 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23774 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23795 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23797 /* F16C */
23798 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23799 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23800 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23801 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23804 /* FMA4 and XOP. */
23805 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23806 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23807 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23808 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23809 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23810 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23811 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23812 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23813 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23814 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23815 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23816 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23817 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23818 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23819 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23820 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23821 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23822 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23823 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23824 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23825 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23826 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23827 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23828 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23829 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23830 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23831 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23832 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23833 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23834 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23835 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23836 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23837 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23838 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23839 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23840 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23841 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23842 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23843 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23844 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23845 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23846 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23847 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23848 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23849 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23850 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23851 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23852 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23853 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23854 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23855 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23856 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23858 static const struct builtin_description bdesc_multi_arg[] =
23860 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23861 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23862 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23863 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23864 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23865 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23866 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23867 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23869 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23870 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23871 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23872 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23873 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23874 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23875 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23876 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23878 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23879 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23880 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23881 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23883 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23884 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23885 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23886 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23888 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23889 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23890 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23891 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23893 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23894 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23895 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23896 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23898 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23899 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23900 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23901 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23902 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23903 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23904 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23906 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23907 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23908 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23909 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23910 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23911 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23912 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23914 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23916 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23917 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23918 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23919 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23920 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23921 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23922 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23923 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23924 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23925 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23926 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23927 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23929 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23930 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23931 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23932 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23933 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23934 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23935 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23936 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23937 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23938 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23939 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23940 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23941 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23942 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23943 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23944 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23946 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23947 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23948 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23949 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23950 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23951 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23953 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23954 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23955 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23956 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23957 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23958 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23959 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23960 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23961 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23962 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23970 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23982 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23990 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
24001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
24002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
24005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
24006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
24007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
24009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
24010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
24013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
24014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
24015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
24017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
24018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
24021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
24022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
24023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
24025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
24029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
24030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
24031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
24033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
24052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
24053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
24054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
24058 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
24059 in the current target ISA to allow the user to compile particular modules
24060 with different target specific options that differ from the command line
24061 options. */
24062 static void
24063 ix86_init_mmx_sse_builtins (void)
24065 const struct builtin_description * d;
24066 enum ix86_builtin_func_type ftype;
24067 size_t i;
24069 /* Add all special builtins with variable number of operands. */
24070 for (i = 0, d = bdesc_special_args;
24071 i < ARRAY_SIZE (bdesc_special_args);
24072 i++, d++)
24074 if (d->name == 0)
24075 continue;
24077 ftype = (enum ix86_builtin_func_type) d->flag;
24078 def_builtin (d->mask, d->name, ftype, d->code);
24081 /* Add all builtins with variable number of operands. */
24082 for (i = 0, d = bdesc_args;
24083 i < ARRAY_SIZE (bdesc_args);
24084 i++, d++)
24086 if (d->name == 0)
24087 continue;
24089 ftype = (enum ix86_builtin_func_type) d->flag;
24090 def_builtin_const (d->mask, d->name, ftype, d->code);
24093 /* pcmpestr[im] insns. */
24094 for (i = 0, d = bdesc_pcmpestr;
24095 i < ARRAY_SIZE (bdesc_pcmpestr);
24096 i++, d++)
24098 if (d->code == IX86_BUILTIN_PCMPESTRM128)
24099 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
24100 else
24101 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
24102 def_builtin_const (d->mask, d->name, ftype, d->code);
24105 /* pcmpistr[im] insns. */
24106 for (i = 0, d = bdesc_pcmpistr;
24107 i < ARRAY_SIZE (bdesc_pcmpistr);
24108 i++, d++)
24110 if (d->code == IX86_BUILTIN_PCMPISTRM128)
24111 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
24112 else
24113 ftype = INT_FTYPE_V16QI_V16QI_INT;
24114 def_builtin_const (d->mask, d->name, ftype, d->code);
24117 /* comi/ucomi insns. */
24118 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24120 if (d->mask == OPTION_MASK_ISA_SSE2)
24121 ftype = INT_FTYPE_V2DF_V2DF;
24122 else
24123 ftype = INT_FTYPE_V4SF_V4SF;
24124 def_builtin_const (d->mask, d->name, ftype, d->code);
24127 /* SSE */
24128 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
24129 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
24130 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
24131 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
24133 /* SSE or 3DNow!A */
24134 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24135 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
24136 IX86_BUILTIN_MASKMOVQ);
24138 /* SSE2 */
24139 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
24140 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
24142 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
24143 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
24144 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
24145 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
24147 /* SSE3. */
24148 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
24149 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
24150 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
24151 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
24153 /* AES */
24154 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
24155 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
24156 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
24157 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
24158 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
24159 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
24160 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
24161 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
24162 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
24163 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
24164 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
24165 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
24167 /* PCLMUL */
24168 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
24169 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
24171 /* MMX access to the vec_init patterns. */
24172 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
24173 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
24175 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
24176 V4HI_FTYPE_HI_HI_HI_HI,
24177 IX86_BUILTIN_VEC_INIT_V4HI);
24179 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
24180 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
24181 IX86_BUILTIN_VEC_INIT_V8QI);
24183 /* Access to the vec_extract patterns. */
24184 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
24185 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
24186 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
24187 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
24188 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
24189 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
24190 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
24191 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
24192 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
24193 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
24195 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24196 "__builtin_ia32_vec_ext_v4hi",
24197 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
24199 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
24200 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
24202 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
24203 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
24205 /* Access to the vec_set patterns. */
24206 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
24207 "__builtin_ia32_vec_set_v2di",
24208 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
24210 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
24211 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
24213 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
24214 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
24216 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
24217 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
24219 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24220 "__builtin_ia32_vec_set_v4hi",
24221 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
24223 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
24224 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
24226 /* Add FMA4 multi-arg argument instructions */
24227 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24229 if (d->name == 0)
24230 continue;
24232 ftype = (enum ix86_builtin_func_type) d->flag;
24233 def_builtin_const (d->mask, d->name, ftype, d->code);
24237 /* Internal method for ix86_init_builtins. */
24239 static void
24240 ix86_init_builtins_va_builtins_abi (void)
24242 tree ms_va_ref, sysv_va_ref;
24243 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24244 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24245 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24246 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24248 if (!TARGET_64BIT)
24249 return;
24250 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24251 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24252 ms_va_ref = build_reference_type (ms_va_list_type_node);
24253 sysv_va_ref =
24254 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24256 fnvoid_va_end_ms =
24257 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24258 fnvoid_va_start_ms =
24259 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24260 fnvoid_va_end_sysv =
24261 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24262 fnvoid_va_start_sysv =
24263 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24264 NULL_TREE);
24265 fnvoid_va_copy_ms =
24266 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24267 NULL_TREE);
24268 fnvoid_va_copy_sysv =
24269 build_function_type_list (void_type_node, sysv_va_ref,
24270 sysv_va_ref, NULL_TREE);
24272 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24273 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24274 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24275 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24276 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24277 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24278 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24279 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24280 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24281 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24282 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24283 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24286 static void
24287 ix86_init_builtin_types (void)
24289 tree float128_type_node, float80_type_node;
24291 /* The __float80 type. */
24292 float80_type_node = long_double_type_node;
24293 if (TYPE_MODE (float80_type_node) != XFmode)
24295 /* The __float80 type. */
24296 float80_type_node = make_node (REAL_TYPE);
24298 TYPE_PRECISION (float80_type_node) = 80;
24299 layout_type (float80_type_node);
24301 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
24303 /* The __float128 type. */
24304 float128_type_node = make_node (REAL_TYPE);
24305 TYPE_PRECISION (float128_type_node) = 128;
24306 layout_type (float128_type_node);
24307 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
24309 /* This macro is built by i386-builtin-types.awk. */
24310 DEFINE_BUILTIN_PRIMITIVE_TYPES;
24313 static void
24314 ix86_init_builtins (void)
24316 tree t;
24318 ix86_init_builtin_types ();
24320 /* TFmode support builtins. */
24321 def_builtin_const (0, "__builtin_infq",
24322 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
24323 def_builtin_const (0, "__builtin_huge_valq",
24324 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
24326 /* We will expand them to normal call if SSE2 isn't available since
24327 they are used by libgcc. */
24328 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
24329 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
24330 BUILT_IN_MD, "__fabstf2", NULL_TREE);
24331 TREE_READONLY (t) = 1;
24332 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
24334 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
24335 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
24336 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
24337 TREE_READONLY (t) = 1;
24338 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
24340 ix86_init_mmx_sse_builtins ();
24342 if (TARGET_64BIT)
24343 ix86_init_builtins_va_builtins_abi ();
24346 /* Return the ix86 builtin for CODE. */
24348 static tree
24349 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24351 if (code >= IX86_BUILTIN_MAX)
24352 return error_mark_node;
24354 return ix86_builtins[code];
24357 /* Errors in the source file can cause expand_expr to return const0_rtx
24358 where we expect a vector. To avoid crashing, use one of the vector
24359 clear instructions. */
24360 static rtx
24361 safe_vector_operand (rtx x, enum machine_mode mode)
24363 if (x == const0_rtx)
24364 x = CONST0_RTX (mode);
24365 return x;
24368 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24370 static rtx
24371 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24373 rtx pat;
24374 tree arg0 = CALL_EXPR_ARG (exp, 0);
24375 tree arg1 = CALL_EXPR_ARG (exp, 1);
24376 rtx op0 = expand_normal (arg0);
24377 rtx op1 = expand_normal (arg1);
24378 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24379 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24380 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24382 if (VECTOR_MODE_P (mode0))
24383 op0 = safe_vector_operand (op0, mode0);
24384 if (VECTOR_MODE_P (mode1))
24385 op1 = safe_vector_operand (op1, mode1);
24387 if (optimize || !target
24388 || GET_MODE (target) != tmode
24389 || !insn_data[icode].operand[0].predicate (target, tmode))
24390 target = gen_reg_rtx (tmode);
24392 if (GET_MODE (op1) == SImode && mode1 == TImode)
24394 rtx x = gen_reg_rtx (V4SImode);
24395 emit_insn (gen_sse2_loadd (x, op1));
24396 op1 = gen_lowpart (TImode, x);
24399 if (!insn_data[icode].operand[1].predicate (op0, mode0))
24400 op0 = copy_to_mode_reg (mode0, op0);
24401 if (!insn_data[icode].operand[2].predicate (op1, mode1))
24402 op1 = copy_to_mode_reg (mode1, op1);
24404 pat = GEN_FCN (icode) (target, op0, op1);
24405 if (! pat)
24406 return 0;
24408 emit_insn (pat);
24410 return target;
24413 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24415 static rtx
24416 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24417 enum ix86_builtin_func_type m_type,
24418 enum rtx_code sub_code)
24420 rtx pat;
24421 int i;
24422 int nargs;
24423 bool comparison_p = false;
24424 bool tf_p = false;
24425 bool last_arg_constant = false;
24426 int num_memory = 0;
24427 struct {
24428 rtx op;
24429 enum machine_mode mode;
24430 } args[4];
24432 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24434 switch (m_type)
24436 case MULTI_ARG_4_DF2_DI_I:
24437 case MULTI_ARG_4_DF2_DI_I1:
24438 case MULTI_ARG_4_SF2_SI_I:
24439 case MULTI_ARG_4_SF2_SI_I1:
24440 nargs = 4;
24441 last_arg_constant = true;
24442 break;
24444 case MULTI_ARG_3_SF:
24445 case MULTI_ARG_3_DF:
24446 case MULTI_ARG_3_SF2:
24447 case MULTI_ARG_3_DF2:
24448 case MULTI_ARG_3_DI:
24449 case MULTI_ARG_3_SI:
24450 case MULTI_ARG_3_SI_DI:
24451 case MULTI_ARG_3_HI:
24452 case MULTI_ARG_3_HI_SI:
24453 case MULTI_ARG_3_QI:
24454 case MULTI_ARG_3_DI2:
24455 case MULTI_ARG_3_SI2:
24456 case MULTI_ARG_3_HI2:
24457 case MULTI_ARG_3_QI2:
24458 nargs = 3;
24459 break;
24461 case MULTI_ARG_2_SF:
24462 case MULTI_ARG_2_DF:
24463 case MULTI_ARG_2_DI:
24464 case MULTI_ARG_2_SI:
24465 case MULTI_ARG_2_HI:
24466 case MULTI_ARG_2_QI:
24467 nargs = 2;
24468 break;
24470 case MULTI_ARG_2_DI_IMM:
24471 case MULTI_ARG_2_SI_IMM:
24472 case MULTI_ARG_2_HI_IMM:
24473 case MULTI_ARG_2_QI_IMM:
24474 nargs = 2;
24475 last_arg_constant = true;
24476 break;
24478 case MULTI_ARG_1_SF:
24479 case MULTI_ARG_1_DF:
24480 case MULTI_ARG_1_SF2:
24481 case MULTI_ARG_1_DF2:
24482 case MULTI_ARG_1_DI:
24483 case MULTI_ARG_1_SI:
24484 case MULTI_ARG_1_HI:
24485 case MULTI_ARG_1_QI:
24486 case MULTI_ARG_1_SI_DI:
24487 case MULTI_ARG_1_HI_DI:
24488 case MULTI_ARG_1_HI_SI:
24489 case MULTI_ARG_1_QI_DI:
24490 case MULTI_ARG_1_QI_SI:
24491 case MULTI_ARG_1_QI_HI:
24492 nargs = 1;
24493 break;
24495 case MULTI_ARG_2_DI_CMP:
24496 case MULTI_ARG_2_SI_CMP:
24497 case MULTI_ARG_2_HI_CMP:
24498 case MULTI_ARG_2_QI_CMP:
24499 nargs = 2;
24500 comparison_p = true;
24501 break;
24503 case MULTI_ARG_2_SF_TF:
24504 case MULTI_ARG_2_DF_TF:
24505 case MULTI_ARG_2_DI_TF:
24506 case MULTI_ARG_2_SI_TF:
24507 case MULTI_ARG_2_HI_TF:
24508 case MULTI_ARG_2_QI_TF:
24509 nargs = 2;
24510 tf_p = true;
24511 break;
24513 default:
24514 gcc_unreachable ();
24517 if (optimize || !target
24518 || GET_MODE (target) != tmode
24519 || !insn_data[icode].operand[0].predicate (target, tmode))
24520 target = gen_reg_rtx (tmode);
24522 gcc_assert (nargs <= 4);
24524 for (i = 0; i < nargs; i++)
24526 tree arg = CALL_EXPR_ARG (exp, i);
24527 rtx op = expand_normal (arg);
24528 int adjust = (comparison_p) ? 1 : 0;
24529 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24531 if (last_arg_constant && i == nargs-1)
24533 if (!CONST_INT_P (op))
24535 error ("last argument must be an immediate");
24536 return gen_reg_rtx (tmode);
24539 else
24541 if (VECTOR_MODE_P (mode))
24542 op = safe_vector_operand (op, mode);
24544 /* If we aren't optimizing, only allow one memory operand to be
24545 generated. */
24546 if (memory_operand (op, mode))
24547 num_memory++;
24549 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24551 if (optimize
24552 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24553 || num_memory > 1)
24554 op = force_reg (mode, op);
24557 args[i].op = op;
24558 args[i].mode = mode;
24561 switch (nargs)
24563 case 1:
24564 pat = GEN_FCN (icode) (target, args[0].op);
24565 break;
24567 case 2:
24568 if (tf_p)
24569 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24570 GEN_INT ((int)sub_code));
24571 else if (! comparison_p)
24572 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24573 else
24575 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24576 args[0].op,
24577 args[1].op);
24579 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24581 break;
24583 case 3:
24584 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24585 break;
24587 case 4:
24588 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24589 break;
24591 default:
24592 gcc_unreachable ();
24595 if (! pat)
24596 return 0;
24598 emit_insn (pat);
24599 return target;
24602 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24603 insns with vec_merge. */
24605 static rtx
24606 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24607 rtx target)
24609 rtx pat;
24610 tree arg0 = CALL_EXPR_ARG (exp, 0);
24611 rtx op1, op0 = expand_normal (arg0);
24612 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24613 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24615 if (optimize || !target
24616 || GET_MODE (target) != tmode
24617 || !insn_data[icode].operand[0].predicate (target, tmode))
24618 target = gen_reg_rtx (tmode);
24620 if (VECTOR_MODE_P (mode0))
24621 op0 = safe_vector_operand (op0, mode0);
24623 if ((optimize && !register_operand (op0, mode0))
24624 || !insn_data[icode].operand[1].predicate (op0, mode0))
24625 op0 = copy_to_mode_reg (mode0, op0);
24627 op1 = op0;
24628 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24629 op1 = copy_to_mode_reg (mode0, op1);
24631 pat = GEN_FCN (icode) (target, op0, op1);
24632 if (! pat)
24633 return 0;
24634 emit_insn (pat);
24635 return target;
24638 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24640 static rtx
24641 ix86_expand_sse_compare (const struct builtin_description *d,
24642 tree exp, rtx target, bool swap)
24644 rtx pat;
24645 tree arg0 = CALL_EXPR_ARG (exp, 0);
24646 tree arg1 = CALL_EXPR_ARG (exp, 1);
24647 rtx op0 = expand_normal (arg0);
24648 rtx op1 = expand_normal (arg1);
24649 rtx op2;
24650 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24651 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24652 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24653 enum rtx_code comparison = d->comparison;
24655 if (VECTOR_MODE_P (mode0))
24656 op0 = safe_vector_operand (op0, mode0);
24657 if (VECTOR_MODE_P (mode1))
24658 op1 = safe_vector_operand (op1, mode1);
24660 /* Swap operands if we have a comparison that isn't available in
24661 hardware. */
24662 if (swap)
24664 rtx tmp = gen_reg_rtx (mode1);
24665 emit_move_insn (tmp, op1);
24666 op1 = op0;
24667 op0 = tmp;
24670 if (optimize || !target
24671 || GET_MODE (target) != tmode
24672 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24673 target = gen_reg_rtx (tmode);
24675 if ((optimize && !register_operand (op0, mode0))
24676 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24677 op0 = copy_to_mode_reg (mode0, op0);
24678 if ((optimize && !register_operand (op1, mode1))
24679 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24680 op1 = copy_to_mode_reg (mode1, op1);
24682 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24683 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24684 if (! pat)
24685 return 0;
24686 emit_insn (pat);
24687 return target;
24690 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24692 static rtx
24693 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24694 rtx target)
24696 rtx pat;
24697 tree arg0 = CALL_EXPR_ARG (exp, 0);
24698 tree arg1 = CALL_EXPR_ARG (exp, 1);
24699 rtx op0 = expand_normal (arg0);
24700 rtx op1 = expand_normal (arg1);
24701 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24702 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24703 enum rtx_code comparison = d->comparison;
24705 if (VECTOR_MODE_P (mode0))
24706 op0 = safe_vector_operand (op0, mode0);
24707 if (VECTOR_MODE_P (mode1))
24708 op1 = safe_vector_operand (op1, mode1);
24710 /* Swap operands if we have a comparison that isn't available in
24711 hardware. */
24712 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24714 rtx tmp = op1;
24715 op1 = op0;
24716 op0 = tmp;
24719 target = gen_reg_rtx (SImode);
24720 emit_move_insn (target, const0_rtx);
24721 target = gen_rtx_SUBREG (QImode, target, 0);
24723 if ((optimize && !register_operand (op0, mode0))
24724 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24725 op0 = copy_to_mode_reg (mode0, op0);
24726 if ((optimize && !register_operand (op1, mode1))
24727 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24728 op1 = copy_to_mode_reg (mode1, op1);
24730 pat = GEN_FCN (d->icode) (op0, op1);
24731 if (! pat)
24732 return 0;
24733 emit_insn (pat);
24734 emit_insn (gen_rtx_SET (VOIDmode,
24735 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24736 gen_rtx_fmt_ee (comparison, QImode,
24737 SET_DEST (pat),
24738 const0_rtx)));
24740 return SUBREG_REG (target);
24743 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24745 static rtx
24746 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24747 rtx target)
24749 rtx pat;
24750 tree arg0 = CALL_EXPR_ARG (exp, 0);
24751 tree arg1 = CALL_EXPR_ARG (exp, 1);
24752 rtx op0 = expand_normal (arg0);
24753 rtx op1 = expand_normal (arg1);
24754 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24755 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24756 enum rtx_code comparison = d->comparison;
24758 if (VECTOR_MODE_P (mode0))
24759 op0 = safe_vector_operand (op0, mode0);
24760 if (VECTOR_MODE_P (mode1))
24761 op1 = safe_vector_operand (op1, mode1);
24763 target = gen_reg_rtx (SImode);
24764 emit_move_insn (target, const0_rtx);
24765 target = gen_rtx_SUBREG (QImode, target, 0);
24767 if ((optimize && !register_operand (op0, mode0))
24768 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24769 op0 = copy_to_mode_reg (mode0, op0);
24770 if ((optimize && !register_operand (op1, mode1))
24771 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24772 op1 = copy_to_mode_reg (mode1, op1);
24774 pat = GEN_FCN (d->icode) (op0, op1);
24775 if (! pat)
24776 return 0;
24777 emit_insn (pat);
24778 emit_insn (gen_rtx_SET (VOIDmode,
24779 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24780 gen_rtx_fmt_ee (comparison, QImode,
24781 SET_DEST (pat),
24782 const0_rtx)));
24784 return SUBREG_REG (target);
24787 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24789 static rtx
24790 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24791 tree exp, rtx target)
24793 rtx pat;
24794 tree arg0 = CALL_EXPR_ARG (exp, 0);
24795 tree arg1 = CALL_EXPR_ARG (exp, 1);
24796 tree arg2 = CALL_EXPR_ARG (exp, 2);
24797 tree arg3 = CALL_EXPR_ARG (exp, 3);
24798 tree arg4 = CALL_EXPR_ARG (exp, 4);
24799 rtx scratch0, scratch1;
24800 rtx op0 = expand_normal (arg0);
24801 rtx op1 = expand_normal (arg1);
24802 rtx op2 = expand_normal (arg2);
24803 rtx op3 = expand_normal (arg3);
24804 rtx op4 = expand_normal (arg4);
24805 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24807 tmode0 = insn_data[d->icode].operand[0].mode;
24808 tmode1 = insn_data[d->icode].operand[1].mode;
24809 modev2 = insn_data[d->icode].operand[2].mode;
24810 modei3 = insn_data[d->icode].operand[3].mode;
24811 modev4 = insn_data[d->icode].operand[4].mode;
24812 modei5 = insn_data[d->icode].operand[5].mode;
24813 modeimm = insn_data[d->icode].operand[6].mode;
24815 if (VECTOR_MODE_P (modev2))
24816 op0 = safe_vector_operand (op0, modev2);
24817 if (VECTOR_MODE_P (modev4))
24818 op2 = safe_vector_operand (op2, modev4);
24820 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24821 op0 = copy_to_mode_reg (modev2, op0);
24822 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24823 op1 = copy_to_mode_reg (modei3, op1);
24824 if ((optimize && !register_operand (op2, modev4))
24825 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24826 op2 = copy_to_mode_reg (modev4, op2);
24827 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24828 op3 = copy_to_mode_reg (modei5, op3);
24830 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24832 error ("the fifth argument must be a 8-bit immediate");
24833 return const0_rtx;
24836 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24838 if (optimize || !target
24839 || GET_MODE (target) != tmode0
24840 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24841 target = gen_reg_rtx (tmode0);
24843 scratch1 = gen_reg_rtx (tmode1);
24845 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24847 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24849 if (optimize || !target
24850 || GET_MODE (target) != tmode1
24851 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24852 target = gen_reg_rtx (tmode1);
24854 scratch0 = gen_reg_rtx (tmode0);
24856 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24858 else
24860 gcc_assert (d->flag);
24862 scratch0 = gen_reg_rtx (tmode0);
24863 scratch1 = gen_reg_rtx (tmode1);
24865 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24868 if (! pat)
24869 return 0;
24871 emit_insn (pat);
24873 if (d->flag)
24875 target = gen_reg_rtx (SImode);
24876 emit_move_insn (target, const0_rtx);
24877 target = gen_rtx_SUBREG (QImode, target, 0);
24879 emit_insn
24880 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24881 gen_rtx_fmt_ee (EQ, QImode,
24882 gen_rtx_REG ((enum machine_mode) d->flag,
24883 FLAGS_REG),
24884 const0_rtx)));
24885 return SUBREG_REG (target);
24887 else
24888 return target;
24892 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24894 static rtx
24895 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24896 tree exp, rtx target)
24898 rtx pat;
24899 tree arg0 = CALL_EXPR_ARG (exp, 0);
24900 tree arg1 = CALL_EXPR_ARG (exp, 1);
24901 tree arg2 = CALL_EXPR_ARG (exp, 2);
24902 rtx scratch0, scratch1;
24903 rtx op0 = expand_normal (arg0);
24904 rtx op1 = expand_normal (arg1);
24905 rtx op2 = expand_normal (arg2);
24906 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24908 tmode0 = insn_data[d->icode].operand[0].mode;
24909 tmode1 = insn_data[d->icode].operand[1].mode;
24910 modev2 = insn_data[d->icode].operand[2].mode;
24911 modev3 = insn_data[d->icode].operand[3].mode;
24912 modeimm = insn_data[d->icode].operand[4].mode;
24914 if (VECTOR_MODE_P (modev2))
24915 op0 = safe_vector_operand (op0, modev2);
24916 if (VECTOR_MODE_P (modev3))
24917 op1 = safe_vector_operand (op1, modev3);
24919 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24920 op0 = copy_to_mode_reg (modev2, op0);
24921 if ((optimize && !register_operand (op1, modev3))
24922 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24923 op1 = copy_to_mode_reg (modev3, op1);
24925 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24927 error ("the third argument must be a 8-bit immediate");
24928 return const0_rtx;
24931 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24933 if (optimize || !target
24934 || GET_MODE (target) != tmode0
24935 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24936 target = gen_reg_rtx (tmode0);
24938 scratch1 = gen_reg_rtx (tmode1);
24940 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24942 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24944 if (optimize || !target
24945 || GET_MODE (target) != tmode1
24946 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24947 target = gen_reg_rtx (tmode1);
24949 scratch0 = gen_reg_rtx (tmode0);
24951 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24953 else
24955 gcc_assert (d->flag);
24957 scratch0 = gen_reg_rtx (tmode0);
24958 scratch1 = gen_reg_rtx (tmode1);
24960 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24963 if (! pat)
24964 return 0;
24966 emit_insn (pat);
24968 if (d->flag)
24970 target = gen_reg_rtx (SImode);
24971 emit_move_insn (target, const0_rtx);
24972 target = gen_rtx_SUBREG (QImode, target, 0);
24974 emit_insn
24975 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24976 gen_rtx_fmt_ee (EQ, QImode,
24977 gen_rtx_REG ((enum machine_mode) d->flag,
24978 FLAGS_REG),
24979 const0_rtx)));
24980 return SUBREG_REG (target);
24982 else
24983 return target;
24986 /* Subroutine of ix86_expand_builtin to take care of insns with
24987 variable number of operands. */
24989 static rtx
24990 ix86_expand_args_builtin (const struct builtin_description *d,
24991 tree exp, rtx target)
24993 rtx pat, real_target;
24994 unsigned int i, nargs;
24995 unsigned int nargs_constant = 0;
24996 int num_memory = 0;
24997 struct
24999 rtx op;
25000 enum machine_mode mode;
25001 } args[4];
25002 bool last_arg_count = false;
25003 enum insn_code icode = d->icode;
25004 const struct insn_data_d *insn_p = &insn_data[icode];
25005 enum machine_mode tmode = insn_p->operand[0].mode;
25006 enum machine_mode rmode = VOIDmode;
25007 bool swap = false;
25008 enum rtx_code comparison = d->comparison;
25010 switch ((enum ix86_builtin_func_type) d->flag)
25012 case INT_FTYPE_V8SF_V8SF_PTEST:
25013 case INT_FTYPE_V4DI_V4DI_PTEST:
25014 case INT_FTYPE_V4DF_V4DF_PTEST:
25015 case INT_FTYPE_V4SF_V4SF_PTEST:
25016 case INT_FTYPE_V2DI_V2DI_PTEST:
25017 case INT_FTYPE_V2DF_V2DF_PTEST:
25018 return ix86_expand_sse_ptest (d, exp, target);
25019 case FLOAT128_FTYPE_FLOAT128:
25020 case FLOAT_FTYPE_FLOAT:
25021 case INT_FTYPE_INT:
25022 case UINT64_FTYPE_INT:
25023 case UINT16_FTYPE_UINT16:
25024 case INT64_FTYPE_INT64:
25025 case INT64_FTYPE_V4SF:
25026 case INT64_FTYPE_V2DF:
25027 case INT_FTYPE_V16QI:
25028 case INT_FTYPE_V8QI:
25029 case INT_FTYPE_V8SF:
25030 case INT_FTYPE_V4DF:
25031 case INT_FTYPE_V4SF:
25032 case INT_FTYPE_V2DF:
25033 case V16QI_FTYPE_V16QI:
25034 case V8SI_FTYPE_V8SF:
25035 case V8SI_FTYPE_V4SI:
25036 case V8HI_FTYPE_V8HI:
25037 case V8HI_FTYPE_V16QI:
25038 case V8QI_FTYPE_V8QI:
25039 case V8SF_FTYPE_V8SF:
25040 case V8SF_FTYPE_V8SI:
25041 case V8SF_FTYPE_V4SF:
25042 case V8SF_FTYPE_V8HI:
25043 case V4SI_FTYPE_V4SI:
25044 case V4SI_FTYPE_V16QI:
25045 case V4SI_FTYPE_V4SF:
25046 case V4SI_FTYPE_V8SI:
25047 case V4SI_FTYPE_V8HI:
25048 case V4SI_FTYPE_V4DF:
25049 case V4SI_FTYPE_V2DF:
25050 case V4HI_FTYPE_V4HI:
25051 case V4DF_FTYPE_V4DF:
25052 case V4DF_FTYPE_V4SI:
25053 case V4DF_FTYPE_V4SF:
25054 case V4DF_FTYPE_V2DF:
25055 case V4SF_FTYPE_V4SF:
25056 case V4SF_FTYPE_V4SI:
25057 case V4SF_FTYPE_V8SF:
25058 case V4SF_FTYPE_V4DF:
25059 case V4SF_FTYPE_V8HI:
25060 case V4SF_FTYPE_V2DF:
25061 case V2DI_FTYPE_V2DI:
25062 case V2DI_FTYPE_V16QI:
25063 case V2DI_FTYPE_V8HI:
25064 case V2DI_FTYPE_V4SI:
25065 case V2DF_FTYPE_V2DF:
25066 case V2DF_FTYPE_V4SI:
25067 case V2DF_FTYPE_V4DF:
25068 case V2DF_FTYPE_V4SF:
25069 case V2DF_FTYPE_V2SI:
25070 case V2SI_FTYPE_V2SI:
25071 case V2SI_FTYPE_V4SF:
25072 case V2SI_FTYPE_V2SF:
25073 case V2SI_FTYPE_V2DF:
25074 case V2SF_FTYPE_V2SF:
25075 case V2SF_FTYPE_V2SI:
25076 nargs = 1;
25077 break;
25078 case V4SF_FTYPE_V4SF_VEC_MERGE:
25079 case V2DF_FTYPE_V2DF_VEC_MERGE:
25080 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
25081 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
25082 case V16QI_FTYPE_V16QI_V16QI:
25083 case V16QI_FTYPE_V8HI_V8HI:
25084 case V8QI_FTYPE_V8QI_V8QI:
25085 case V8QI_FTYPE_V4HI_V4HI:
25086 case V8HI_FTYPE_V8HI_V8HI:
25087 case V8HI_FTYPE_V16QI_V16QI:
25088 case V8HI_FTYPE_V4SI_V4SI:
25089 case V8SF_FTYPE_V8SF_V8SF:
25090 case V8SF_FTYPE_V8SF_V8SI:
25091 case V4SI_FTYPE_V4SI_V4SI:
25092 case V4SI_FTYPE_V8HI_V8HI:
25093 case V4SI_FTYPE_V4SF_V4SF:
25094 case V4SI_FTYPE_V2DF_V2DF:
25095 case V4HI_FTYPE_V4HI_V4HI:
25096 case V4HI_FTYPE_V8QI_V8QI:
25097 case V4HI_FTYPE_V2SI_V2SI:
25098 case V4DF_FTYPE_V4DF_V4DF:
25099 case V4DF_FTYPE_V4DF_V4DI:
25100 case V4SF_FTYPE_V4SF_V4SF:
25101 case V4SF_FTYPE_V4SF_V4SI:
25102 case V4SF_FTYPE_V4SF_V2SI:
25103 case V4SF_FTYPE_V4SF_V2DF:
25104 case V4SF_FTYPE_V4SF_DI:
25105 case V4SF_FTYPE_V4SF_SI:
25106 case V2DI_FTYPE_V2DI_V2DI:
25107 case V2DI_FTYPE_V16QI_V16QI:
25108 case V2DI_FTYPE_V4SI_V4SI:
25109 case V2DI_FTYPE_V2DI_V16QI:
25110 case V2DI_FTYPE_V2DF_V2DF:
25111 case V2SI_FTYPE_V2SI_V2SI:
25112 case V2SI_FTYPE_V4HI_V4HI:
25113 case V2SI_FTYPE_V2SF_V2SF:
25114 case V2DF_FTYPE_V2DF_V2DF:
25115 case V2DF_FTYPE_V2DF_V4SF:
25116 case V2DF_FTYPE_V2DF_V2DI:
25117 case V2DF_FTYPE_V2DF_DI:
25118 case V2DF_FTYPE_V2DF_SI:
25119 case V2SF_FTYPE_V2SF_V2SF:
25120 case V1DI_FTYPE_V1DI_V1DI:
25121 case V1DI_FTYPE_V8QI_V8QI:
25122 case V1DI_FTYPE_V2SI_V2SI:
25123 if (comparison == UNKNOWN)
25124 return ix86_expand_binop_builtin (icode, exp, target);
25125 nargs = 2;
25126 break;
25127 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25128 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25129 gcc_assert (comparison != UNKNOWN);
25130 nargs = 2;
25131 swap = true;
25132 break;
25133 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25134 case V8HI_FTYPE_V8HI_SI_COUNT:
25135 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25136 case V4SI_FTYPE_V4SI_SI_COUNT:
25137 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25138 case V4HI_FTYPE_V4HI_SI_COUNT:
25139 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25140 case V2DI_FTYPE_V2DI_SI_COUNT:
25141 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25142 case V2SI_FTYPE_V2SI_SI_COUNT:
25143 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25144 case V1DI_FTYPE_V1DI_SI_COUNT:
25145 nargs = 2;
25146 last_arg_count = true;
25147 break;
25148 case UINT64_FTYPE_UINT64_UINT64:
25149 case UINT_FTYPE_UINT_UINT:
25150 case UINT_FTYPE_UINT_USHORT:
25151 case UINT_FTYPE_UINT_UCHAR:
25152 case UINT16_FTYPE_UINT16_INT:
25153 case UINT8_FTYPE_UINT8_INT:
25154 nargs = 2;
25155 break;
25156 case V2DI_FTYPE_V2DI_INT_CONVERT:
25157 nargs = 2;
25158 rmode = V1TImode;
25159 nargs_constant = 1;
25160 break;
25161 case V8HI_FTYPE_V8HI_INT:
25162 case V8HI_FTYPE_V8SF_INT:
25163 case V8HI_FTYPE_V4SF_INT:
25164 case V8SF_FTYPE_V8SF_INT:
25165 case V4SI_FTYPE_V4SI_INT:
25166 case V4SI_FTYPE_V8SI_INT:
25167 case V4HI_FTYPE_V4HI_INT:
25168 case V4DF_FTYPE_V4DF_INT:
25169 case V4SF_FTYPE_V4SF_INT:
25170 case V4SF_FTYPE_V8SF_INT:
25171 case V2DI_FTYPE_V2DI_INT:
25172 case V2DF_FTYPE_V2DF_INT:
25173 case V2DF_FTYPE_V4DF_INT:
25174 nargs = 2;
25175 nargs_constant = 1;
25176 break;
25177 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25178 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25179 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25180 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25181 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25182 nargs = 3;
25183 break;
25184 case V16QI_FTYPE_V16QI_V16QI_INT:
25185 case V8HI_FTYPE_V8HI_V8HI_INT:
25186 case V8SI_FTYPE_V8SI_V8SI_INT:
25187 case V8SI_FTYPE_V8SI_V4SI_INT:
25188 case V8SF_FTYPE_V8SF_V8SF_INT:
25189 case V8SF_FTYPE_V8SF_V4SF_INT:
25190 case V4SI_FTYPE_V4SI_V4SI_INT:
25191 case V4DF_FTYPE_V4DF_V4DF_INT:
25192 case V4DF_FTYPE_V4DF_V2DF_INT:
25193 case V4SF_FTYPE_V4SF_V4SF_INT:
25194 case V2DI_FTYPE_V2DI_V2DI_INT:
25195 case V2DF_FTYPE_V2DF_V2DF_INT:
25196 nargs = 3;
25197 nargs_constant = 1;
25198 break;
25199 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
25200 nargs = 3;
25201 rmode = V2DImode;
25202 nargs_constant = 1;
25203 break;
25204 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
25205 nargs = 3;
25206 rmode = DImode;
25207 nargs_constant = 1;
25208 break;
25209 case V2DI_FTYPE_V2DI_UINT_UINT:
25210 nargs = 3;
25211 nargs_constant = 2;
25212 break;
25213 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
25214 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
25215 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
25216 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
25217 nargs = 4;
25218 nargs_constant = 1;
25219 break;
25220 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25221 nargs = 4;
25222 nargs_constant = 2;
25223 break;
25224 default:
25225 gcc_unreachable ();
25228 gcc_assert (nargs <= ARRAY_SIZE (args));
25230 if (comparison != UNKNOWN)
25232 gcc_assert (nargs == 2);
25233 return ix86_expand_sse_compare (d, exp, target, swap);
25236 if (rmode == VOIDmode || rmode == tmode)
25238 if (optimize
25239 || target == 0
25240 || GET_MODE (target) != tmode
25241 || !insn_p->operand[0].predicate (target, tmode))
25242 target = gen_reg_rtx (tmode);
25243 real_target = target;
25245 else
25247 target = gen_reg_rtx (rmode);
25248 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25251 for (i = 0; i < nargs; i++)
25253 tree arg = CALL_EXPR_ARG (exp, i);
25254 rtx op = expand_normal (arg);
25255 enum machine_mode mode = insn_p->operand[i + 1].mode;
25256 bool match = insn_p->operand[i + 1].predicate (op, mode);
25258 if (last_arg_count && (i + 1) == nargs)
25260 /* SIMD shift insns take either an 8-bit immediate or
25261 register as count. But builtin functions take int as
25262 count. If count doesn't match, we put it in register. */
25263 if (!match)
25265 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25266 if (!insn_p->operand[i + 1].predicate (op, mode))
25267 op = copy_to_reg (op);
25270 else if ((nargs - i) <= nargs_constant)
25272 if (!match)
25273 switch (icode)
25275 case CODE_FOR_sse4_1_roundpd:
25276 case CODE_FOR_sse4_1_roundps:
25277 case CODE_FOR_sse4_1_roundsd:
25278 case CODE_FOR_sse4_1_roundss:
25279 case CODE_FOR_sse4_1_blendps:
25280 case CODE_FOR_avx_blendpd256:
25281 case CODE_FOR_avx_vpermilv4df:
25282 case CODE_FOR_avx_roundpd256:
25283 case CODE_FOR_avx_roundps256:
25284 error ("the last argument must be a 4-bit immediate");
25285 return const0_rtx;
25287 case CODE_FOR_sse4_1_blendpd:
25288 case CODE_FOR_avx_vpermilv2df:
25289 case CODE_FOR_xop_vpermil2v2df3:
25290 case CODE_FOR_xop_vpermil2v4sf3:
25291 case CODE_FOR_xop_vpermil2v4df3:
25292 case CODE_FOR_xop_vpermil2v8sf3:
25293 error ("the last argument must be a 2-bit immediate");
25294 return const0_rtx;
25296 case CODE_FOR_avx_vextractf128v4df:
25297 case CODE_FOR_avx_vextractf128v8sf:
25298 case CODE_FOR_avx_vextractf128v8si:
25299 case CODE_FOR_avx_vinsertf128v4df:
25300 case CODE_FOR_avx_vinsertf128v8sf:
25301 case CODE_FOR_avx_vinsertf128v8si:
25302 error ("the last argument must be a 1-bit immediate");
25303 return const0_rtx;
25305 case CODE_FOR_avx_cmpsdv2df3:
25306 case CODE_FOR_avx_cmpssv4sf3:
25307 case CODE_FOR_avx_cmppdv2df3:
25308 case CODE_FOR_avx_cmppsv4sf3:
25309 case CODE_FOR_avx_cmppdv4df3:
25310 case CODE_FOR_avx_cmppsv8sf3:
25311 error ("the last argument must be a 5-bit immediate");
25312 return const0_rtx;
25314 default:
25315 switch (nargs_constant)
25317 case 2:
25318 if ((nargs - i) == nargs_constant)
25320 error ("the next to last argument must be an 8-bit immediate");
25321 break;
25323 case 1:
25324 error ("the last argument must be an 8-bit immediate");
25325 break;
25326 default:
25327 gcc_unreachable ();
25329 return const0_rtx;
25332 else
25334 if (VECTOR_MODE_P (mode))
25335 op = safe_vector_operand (op, mode);
25337 /* If we aren't optimizing, only allow one memory operand to
25338 be generated. */
25339 if (memory_operand (op, mode))
25340 num_memory++;
25342 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25344 if (optimize || !match || num_memory > 1)
25345 op = copy_to_mode_reg (mode, op);
25347 else
25349 op = copy_to_reg (op);
25350 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25354 args[i].op = op;
25355 args[i].mode = mode;
25358 switch (nargs)
25360 case 1:
25361 pat = GEN_FCN (icode) (real_target, args[0].op);
25362 break;
25363 case 2:
25364 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25365 break;
25366 case 3:
25367 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25368 args[2].op);
25369 break;
25370 case 4:
25371 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25372 args[2].op, args[3].op);
25373 break;
25374 default:
25375 gcc_unreachable ();
25378 if (! pat)
25379 return 0;
25381 emit_insn (pat);
25382 return target;
25385 /* Subroutine of ix86_expand_builtin to take care of special insns
25386 with variable number of operands. */
25388 static rtx
25389 ix86_expand_special_args_builtin (const struct builtin_description *d,
25390 tree exp, rtx target)
25392 tree arg;
25393 rtx pat, op;
25394 unsigned int i, nargs, arg_adjust, memory;
25395 struct
25397 rtx op;
25398 enum machine_mode mode;
25399 } args[3];
25400 enum insn_code icode = d->icode;
25401 bool last_arg_constant = false;
25402 const struct insn_data_d *insn_p = &insn_data[icode];
25403 enum machine_mode tmode = insn_p->operand[0].mode;
25404 enum { load, store } klass;
25406 switch ((enum ix86_builtin_func_type) d->flag)
25408 case VOID_FTYPE_VOID:
25409 emit_insn (GEN_FCN (icode) (target));
25410 return 0;
25411 case VOID_FTYPE_UINT64:
25412 case VOID_FTYPE_UNSIGNED:
25413 nargs = 0;
25414 klass = store;
25415 memory = 0;
25416 break;
25417 break;
25418 case UINT64_FTYPE_VOID:
25419 case UNSIGNED_FTYPE_VOID:
25420 case UINT16_FTYPE_VOID:
25421 nargs = 0;
25422 klass = load;
25423 memory = 0;
25424 break;
25425 case UINT64_FTYPE_PUNSIGNED:
25426 case V2DI_FTYPE_PV2DI:
25427 case V32QI_FTYPE_PCCHAR:
25428 case V16QI_FTYPE_PCCHAR:
25429 case V8SF_FTYPE_PCV4SF:
25430 case V8SF_FTYPE_PCFLOAT:
25431 case V4SF_FTYPE_PCFLOAT:
25432 case V4DF_FTYPE_PCV2DF:
25433 case V4DF_FTYPE_PCDOUBLE:
25434 case V2DF_FTYPE_PCDOUBLE:
25435 case VOID_FTYPE_PVOID:
25436 nargs = 1;
25437 klass = load;
25438 memory = 0;
25439 break;
25440 case VOID_FTYPE_PV2SF_V4SF:
25441 case VOID_FTYPE_PV4DI_V4DI:
25442 case VOID_FTYPE_PV2DI_V2DI:
25443 case VOID_FTYPE_PCHAR_V32QI:
25444 case VOID_FTYPE_PCHAR_V16QI:
25445 case VOID_FTYPE_PFLOAT_V8SF:
25446 case VOID_FTYPE_PFLOAT_V4SF:
25447 case VOID_FTYPE_PDOUBLE_V4DF:
25448 case VOID_FTYPE_PDOUBLE_V2DF:
25449 case VOID_FTYPE_PULONGLONG_ULONGLONG:
25450 case VOID_FTYPE_PINT_INT:
25451 nargs = 1;
25452 klass = store;
25453 /* Reserve memory operand for target. */
25454 memory = ARRAY_SIZE (args);
25455 break;
25456 case V4SF_FTYPE_V4SF_PCV2SF:
25457 case V2DF_FTYPE_V2DF_PCDOUBLE:
25458 nargs = 2;
25459 klass = load;
25460 memory = 1;
25461 break;
25462 case V8SF_FTYPE_PCV8SF_V8SF:
25463 case V4DF_FTYPE_PCV4DF_V4DF:
25464 case V4SF_FTYPE_PCV4SF_V4SF:
25465 case V2DF_FTYPE_PCV2DF_V2DF:
25466 nargs = 2;
25467 klass = load;
25468 memory = 0;
25469 break;
25470 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25471 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25472 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25473 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25474 nargs = 2;
25475 klass = store;
25476 /* Reserve memory operand for target. */
25477 memory = ARRAY_SIZE (args);
25478 break;
25479 case VOID_FTYPE_UINT_UINT_UINT:
25480 case VOID_FTYPE_UINT64_UINT_UINT:
25481 case UCHAR_FTYPE_UINT_UINT_UINT:
25482 case UCHAR_FTYPE_UINT64_UINT_UINT:
25483 nargs = 3;
25484 klass = load;
25485 memory = ARRAY_SIZE (args);
25486 last_arg_constant = true;
25487 break;
25488 default:
25489 gcc_unreachable ();
25492 gcc_assert (nargs <= ARRAY_SIZE (args));
25494 if (klass == store)
25496 arg = CALL_EXPR_ARG (exp, 0);
25497 op = expand_normal (arg);
25498 gcc_assert (target == 0);
25499 if (memory)
25500 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25501 else
25502 target = force_reg (tmode, op);
25503 arg_adjust = 1;
25505 else
25507 arg_adjust = 0;
25508 if (optimize
25509 || target == 0
25510 || GET_MODE (target) != tmode
25511 || !insn_p->operand[0].predicate (target, tmode))
25512 target = gen_reg_rtx (tmode);
25515 for (i = 0; i < nargs; i++)
25517 enum machine_mode mode = insn_p->operand[i + 1].mode;
25518 bool match;
25520 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25521 op = expand_normal (arg);
25522 match = insn_p->operand[i + 1].predicate (op, mode);
25524 if (last_arg_constant && (i + 1) == nargs)
25526 if (!match)
25528 if (icode == CODE_FOR_lwp_lwpvalsi3
25529 || icode == CODE_FOR_lwp_lwpinssi3
25530 || icode == CODE_FOR_lwp_lwpvaldi3
25531 || icode == CODE_FOR_lwp_lwpinsdi3)
25532 error ("the last argument must be a 32-bit immediate");
25533 else
25534 error ("the last argument must be an 8-bit immediate");
25535 return const0_rtx;
25538 else
25540 if (i == memory)
25542 /* This must be the memory operand. */
25543 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25544 gcc_assert (GET_MODE (op) == mode
25545 || GET_MODE (op) == VOIDmode);
25547 else
25549 /* This must be register. */
25550 if (VECTOR_MODE_P (mode))
25551 op = safe_vector_operand (op, mode);
25553 gcc_assert (GET_MODE (op) == mode
25554 || GET_MODE (op) == VOIDmode);
25555 op = copy_to_mode_reg (mode, op);
25559 args[i].op = op;
25560 args[i].mode = mode;
25563 switch (nargs)
25565 case 0:
25566 pat = GEN_FCN (icode) (target);
25567 break;
25568 case 1:
25569 pat = GEN_FCN (icode) (target, args[0].op);
25570 break;
25571 case 2:
25572 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25573 break;
25574 case 3:
25575 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25576 break;
25577 default:
25578 gcc_unreachable ();
25581 if (! pat)
25582 return 0;
25583 emit_insn (pat);
25584 return klass == store ? 0 : target;
25587 /* Return the integer constant in ARG. Constrain it to be in the range
25588 of the subparts of VEC_TYPE; issue an error if not. */
25590 static int
25591 get_element_number (tree vec_type, tree arg)
25593 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25595 if (!host_integerp (arg, 1)
25596 || (elt = tree_low_cst (arg, 1), elt > max))
25598 error ("selector must be an integer constant in the range 0..%wi", max);
25599 return 0;
25602 return elt;
25605 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25606 ix86_expand_vector_init. We DO have language-level syntax for this, in
25607 the form of (type){ init-list }. Except that since we can't place emms
25608 instructions from inside the compiler, we can't allow the use of MMX
25609 registers unless the user explicitly asks for it. So we do *not* define
25610 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25611 we have builtins invoked by mmintrin.h that gives us license to emit
25612 these sorts of instructions. */
25614 static rtx
25615 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25617 enum machine_mode tmode = TYPE_MODE (type);
25618 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25619 int i, n_elt = GET_MODE_NUNITS (tmode);
25620 rtvec v = rtvec_alloc (n_elt);
25622 gcc_assert (VECTOR_MODE_P (tmode));
25623 gcc_assert (call_expr_nargs (exp) == n_elt);
25625 for (i = 0; i < n_elt; ++i)
25627 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25628 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25631 if (!target || !register_operand (target, tmode))
25632 target = gen_reg_rtx (tmode);
25634 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25635 return target;
25638 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25639 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25640 had a language-level syntax for referencing vector elements. */
25642 static rtx
25643 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25645 enum machine_mode tmode, mode0;
25646 tree arg0, arg1;
25647 int elt;
25648 rtx op0;
25650 arg0 = CALL_EXPR_ARG (exp, 0);
25651 arg1 = CALL_EXPR_ARG (exp, 1);
25653 op0 = expand_normal (arg0);
25654 elt = get_element_number (TREE_TYPE (arg0), arg1);
25656 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25657 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25658 gcc_assert (VECTOR_MODE_P (mode0));
25660 op0 = force_reg (mode0, op0);
25662 if (optimize || !target || !register_operand (target, tmode))
25663 target = gen_reg_rtx (tmode);
25665 ix86_expand_vector_extract (true, target, op0, elt);
25667 return target;
25670 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25671 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25672 a language-level syntax for referencing vector elements. */
25674 static rtx
25675 ix86_expand_vec_set_builtin (tree exp)
25677 enum machine_mode tmode, mode1;
25678 tree arg0, arg1, arg2;
25679 int elt;
25680 rtx op0, op1, target;
25682 arg0 = CALL_EXPR_ARG (exp, 0);
25683 arg1 = CALL_EXPR_ARG (exp, 1);
25684 arg2 = CALL_EXPR_ARG (exp, 2);
25686 tmode = TYPE_MODE (TREE_TYPE (arg0));
25687 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25688 gcc_assert (VECTOR_MODE_P (tmode));
25690 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25691 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25692 elt = get_element_number (TREE_TYPE (arg0), arg2);
25694 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25695 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25697 op0 = force_reg (tmode, op0);
25698 op1 = force_reg (mode1, op1);
25700 /* OP0 is the source of these builtin functions and shouldn't be
25701 modified. Create a copy, use it and return it as target. */
25702 target = gen_reg_rtx (tmode);
25703 emit_move_insn (target, op0);
25704 ix86_expand_vector_set (true, target, op1, elt);
25706 return target;
25709 /* Expand an expression EXP that calls a built-in function,
25710 with result going to TARGET if that's convenient
25711 (and in mode MODE if that's convenient).
25712 SUBTARGET may be used as the target for computing one of EXP's operands.
25713 IGNORE is nonzero if the value is to be ignored. */
25715 static rtx
25716 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25717 enum machine_mode mode ATTRIBUTE_UNUSED,
25718 int ignore ATTRIBUTE_UNUSED)
25720 const struct builtin_description *d;
25721 size_t i;
25722 enum insn_code icode;
25723 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25724 tree arg0, arg1, arg2;
25725 rtx op0, op1, op2, pat;
25726 enum machine_mode mode0, mode1, mode2;
25727 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25729 /* Determine whether the builtin function is available under the current ISA.
25730 Originally the builtin was not created if it wasn't applicable to the
25731 current ISA based on the command line switches. With function specific
25732 options, we need to check in the context of the function making the call
25733 whether it is supported. */
25734 if (ix86_builtins_isa[fcode].isa
25735 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25737 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25738 NULL, NULL, false);
25740 if (!opts)
25741 error ("%qE needs unknown isa option", fndecl);
25742 else
25744 gcc_assert (opts != NULL);
25745 error ("%qE needs isa option %s", fndecl, opts);
25746 free (opts);
25748 return const0_rtx;
25751 switch (fcode)
25753 case IX86_BUILTIN_MASKMOVQ:
25754 case IX86_BUILTIN_MASKMOVDQU:
25755 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25756 ? CODE_FOR_mmx_maskmovq
25757 : CODE_FOR_sse2_maskmovdqu);
25758 /* Note the arg order is different from the operand order. */
25759 arg1 = CALL_EXPR_ARG (exp, 0);
25760 arg2 = CALL_EXPR_ARG (exp, 1);
25761 arg0 = CALL_EXPR_ARG (exp, 2);
25762 op0 = expand_normal (arg0);
25763 op1 = expand_normal (arg1);
25764 op2 = expand_normal (arg2);
25765 mode0 = insn_data[icode].operand[0].mode;
25766 mode1 = insn_data[icode].operand[1].mode;
25767 mode2 = insn_data[icode].operand[2].mode;
25769 op0 = force_reg (Pmode, op0);
25770 op0 = gen_rtx_MEM (mode1, op0);
25772 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25773 op0 = copy_to_mode_reg (mode0, op0);
25774 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25775 op1 = copy_to_mode_reg (mode1, op1);
25776 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25777 op2 = copy_to_mode_reg (mode2, op2);
25778 pat = GEN_FCN (icode) (op0, op1, op2);
25779 if (! pat)
25780 return 0;
25781 emit_insn (pat);
25782 return 0;
25784 case IX86_BUILTIN_LDMXCSR:
25785 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25786 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25787 emit_move_insn (target, op0);
25788 emit_insn (gen_sse_ldmxcsr (target));
25789 return 0;
25791 case IX86_BUILTIN_STMXCSR:
25792 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25793 emit_insn (gen_sse_stmxcsr (target));
25794 return copy_to_mode_reg (SImode, target);
25796 case IX86_BUILTIN_CLFLUSH:
25797 arg0 = CALL_EXPR_ARG (exp, 0);
25798 op0 = expand_normal (arg0);
25799 icode = CODE_FOR_sse2_clflush;
25800 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25801 op0 = copy_to_mode_reg (Pmode, op0);
25803 emit_insn (gen_sse2_clflush (op0));
25804 return 0;
25806 case IX86_BUILTIN_MONITOR:
25807 arg0 = CALL_EXPR_ARG (exp, 0);
25808 arg1 = CALL_EXPR_ARG (exp, 1);
25809 arg2 = CALL_EXPR_ARG (exp, 2);
25810 op0 = expand_normal (arg0);
25811 op1 = expand_normal (arg1);
25812 op2 = expand_normal (arg2);
25813 if (!REG_P (op0))
25814 op0 = copy_to_mode_reg (Pmode, op0);
25815 if (!REG_P (op1))
25816 op1 = copy_to_mode_reg (SImode, op1);
25817 if (!REG_P (op2))
25818 op2 = copy_to_mode_reg (SImode, op2);
25819 emit_insn (ix86_gen_monitor (op0, op1, op2));
25820 return 0;
25822 case IX86_BUILTIN_MWAIT:
25823 arg0 = CALL_EXPR_ARG (exp, 0);
25824 arg1 = CALL_EXPR_ARG (exp, 1);
25825 op0 = expand_normal (arg0);
25826 op1 = expand_normal (arg1);
25827 if (!REG_P (op0))
25828 op0 = copy_to_mode_reg (SImode, op0);
25829 if (!REG_P (op1))
25830 op1 = copy_to_mode_reg (SImode, op1);
25831 emit_insn (gen_sse3_mwait (op0, op1));
25832 return 0;
25834 case IX86_BUILTIN_VEC_INIT_V2SI:
25835 case IX86_BUILTIN_VEC_INIT_V4HI:
25836 case IX86_BUILTIN_VEC_INIT_V8QI:
25837 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25839 case IX86_BUILTIN_VEC_EXT_V2DF:
25840 case IX86_BUILTIN_VEC_EXT_V2DI:
25841 case IX86_BUILTIN_VEC_EXT_V4SF:
25842 case IX86_BUILTIN_VEC_EXT_V4SI:
25843 case IX86_BUILTIN_VEC_EXT_V8HI:
25844 case IX86_BUILTIN_VEC_EXT_V2SI:
25845 case IX86_BUILTIN_VEC_EXT_V4HI:
25846 case IX86_BUILTIN_VEC_EXT_V16QI:
25847 return ix86_expand_vec_ext_builtin (exp, target);
25849 case IX86_BUILTIN_VEC_SET_V2DI:
25850 case IX86_BUILTIN_VEC_SET_V4SF:
25851 case IX86_BUILTIN_VEC_SET_V4SI:
25852 case IX86_BUILTIN_VEC_SET_V8HI:
25853 case IX86_BUILTIN_VEC_SET_V4HI:
25854 case IX86_BUILTIN_VEC_SET_V16QI:
25855 return ix86_expand_vec_set_builtin (exp);
25857 case IX86_BUILTIN_VEC_PERM_V2DF:
25858 case IX86_BUILTIN_VEC_PERM_V4SF:
25859 case IX86_BUILTIN_VEC_PERM_V2DI:
25860 case IX86_BUILTIN_VEC_PERM_V4SI:
25861 case IX86_BUILTIN_VEC_PERM_V8HI:
25862 case IX86_BUILTIN_VEC_PERM_V16QI:
25863 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25864 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25865 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25866 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25867 case IX86_BUILTIN_VEC_PERM_V4DF:
25868 case IX86_BUILTIN_VEC_PERM_V8SF:
25869 return ix86_expand_vec_perm_builtin (exp);
25871 case IX86_BUILTIN_INFQ:
25872 case IX86_BUILTIN_HUGE_VALQ:
25874 REAL_VALUE_TYPE inf;
25875 rtx tmp;
25877 real_inf (&inf);
25878 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25880 tmp = validize_mem (force_const_mem (mode, tmp));
25882 if (target == 0)
25883 target = gen_reg_rtx (mode);
25885 emit_move_insn (target, tmp);
25886 return target;
25889 case IX86_BUILTIN_LLWPCB:
25890 arg0 = CALL_EXPR_ARG (exp, 0);
25891 op0 = expand_normal (arg0);
25892 icode = CODE_FOR_lwp_llwpcb;
25893 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25894 op0 = copy_to_mode_reg (Pmode, op0);
25895 emit_insn (gen_lwp_llwpcb (op0));
25896 return 0;
25898 case IX86_BUILTIN_SLWPCB:
25899 icode = CODE_FOR_lwp_slwpcb;
25900 if (!target
25901 || !insn_data[icode].operand[0].predicate (target, Pmode))
25902 target = gen_reg_rtx (Pmode);
25903 emit_insn (gen_lwp_slwpcb (target));
25904 return target;
25906 default:
25907 break;
25910 for (i = 0, d = bdesc_special_args;
25911 i < ARRAY_SIZE (bdesc_special_args);
25912 i++, d++)
25913 if (d->code == fcode)
25914 return ix86_expand_special_args_builtin (d, exp, target);
25916 for (i = 0, d = bdesc_args;
25917 i < ARRAY_SIZE (bdesc_args);
25918 i++, d++)
25919 if (d->code == fcode)
25920 switch (fcode)
25922 case IX86_BUILTIN_FABSQ:
25923 case IX86_BUILTIN_COPYSIGNQ:
25924 if (!TARGET_SSE2)
25925 /* Emit a normal call if SSE2 isn't available. */
25926 return expand_call (exp, target, ignore);
25927 default:
25928 return ix86_expand_args_builtin (d, exp, target);
25931 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25932 if (d->code == fcode)
25933 return ix86_expand_sse_comi (d, exp, target);
25935 for (i = 0, d = bdesc_pcmpestr;
25936 i < ARRAY_SIZE (bdesc_pcmpestr);
25937 i++, d++)
25938 if (d->code == fcode)
25939 return ix86_expand_sse_pcmpestr (d, exp, target);
25941 for (i = 0, d = bdesc_pcmpistr;
25942 i < ARRAY_SIZE (bdesc_pcmpistr);
25943 i++, d++)
25944 if (d->code == fcode)
25945 return ix86_expand_sse_pcmpistr (d, exp, target);
25947 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25948 if (d->code == fcode)
25949 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25950 (enum ix86_builtin_func_type)
25951 d->flag, d->comparison);
25953 gcc_unreachable ();
25956 /* Returns a function decl for a vectorized version of the builtin function
25957 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25958 if it is not available. */
25960 static tree
25961 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25962 tree type_in)
25964 enum machine_mode in_mode, out_mode;
25965 int in_n, out_n;
25966 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25968 if (TREE_CODE (type_out) != VECTOR_TYPE
25969 || TREE_CODE (type_in) != VECTOR_TYPE
25970 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25971 return NULL_TREE;
25973 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25974 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25975 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25976 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25978 switch (fn)
25980 case BUILT_IN_SQRT:
25981 if (out_mode == DFmode && out_n == 2
25982 && in_mode == DFmode && in_n == 2)
25983 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25984 break;
25986 case BUILT_IN_SQRTF:
25987 if (out_mode == SFmode && out_n == 4
25988 && in_mode == SFmode && in_n == 4)
25989 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25990 break;
25992 case BUILT_IN_LRINT:
25993 if (out_mode == SImode && out_n == 4
25994 && in_mode == DFmode && in_n == 2)
25995 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25996 break;
25998 case BUILT_IN_LRINTF:
25999 if (out_mode == SImode && out_n == 4
26000 && in_mode == SFmode && in_n == 4)
26001 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
26002 break;
26004 case BUILT_IN_COPYSIGN:
26005 if (out_mode == DFmode && out_n == 2
26006 && in_mode == DFmode && in_n == 2)
26007 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
26008 break;
26010 case BUILT_IN_COPYSIGNF:
26011 if (out_mode == SFmode && out_n == 4
26012 && in_mode == SFmode && in_n == 4)
26013 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
26014 break;
26016 default:
26020 /* Dispatch to a handler for a vectorization library. */
26021 if (ix86_veclib_handler)
26022 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
26023 type_in);
26025 return NULL_TREE;
26028 /* Handler for an SVML-style interface to
26029 a library with vectorized intrinsics. */
26031 static tree
26032 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
26034 char name[20];
26035 tree fntype, new_fndecl, args;
26036 unsigned arity;
26037 const char *bname;
26038 enum machine_mode el_mode, in_mode;
26039 int n, in_n;
26041 /* The SVML is suitable for unsafe math only. */
26042 if (!flag_unsafe_math_optimizations)
26043 return NULL_TREE;
26045 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26046 n = TYPE_VECTOR_SUBPARTS (type_out);
26047 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26048 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26049 if (el_mode != in_mode
26050 || n != in_n)
26051 return NULL_TREE;
26053 switch (fn)
26055 case BUILT_IN_EXP:
26056 case BUILT_IN_LOG:
26057 case BUILT_IN_LOG10:
26058 case BUILT_IN_POW:
26059 case BUILT_IN_TANH:
26060 case BUILT_IN_TAN:
26061 case BUILT_IN_ATAN:
26062 case BUILT_IN_ATAN2:
26063 case BUILT_IN_ATANH:
26064 case BUILT_IN_CBRT:
26065 case BUILT_IN_SINH:
26066 case BUILT_IN_SIN:
26067 case BUILT_IN_ASINH:
26068 case BUILT_IN_ASIN:
26069 case BUILT_IN_COSH:
26070 case BUILT_IN_COS:
26071 case BUILT_IN_ACOSH:
26072 case BUILT_IN_ACOS:
26073 if (el_mode != DFmode || n != 2)
26074 return NULL_TREE;
26075 break;
26077 case BUILT_IN_EXPF:
26078 case BUILT_IN_LOGF:
26079 case BUILT_IN_LOG10F:
26080 case BUILT_IN_POWF:
26081 case BUILT_IN_TANHF:
26082 case BUILT_IN_TANF:
26083 case BUILT_IN_ATANF:
26084 case BUILT_IN_ATAN2F:
26085 case BUILT_IN_ATANHF:
26086 case BUILT_IN_CBRTF:
26087 case BUILT_IN_SINHF:
26088 case BUILT_IN_SINF:
26089 case BUILT_IN_ASINHF:
26090 case BUILT_IN_ASINF:
26091 case BUILT_IN_COSHF:
26092 case BUILT_IN_COSF:
26093 case BUILT_IN_ACOSHF:
26094 case BUILT_IN_ACOSF:
26095 if (el_mode != SFmode || n != 4)
26096 return NULL_TREE;
26097 break;
26099 default:
26100 return NULL_TREE;
26103 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26105 if (fn == BUILT_IN_LOGF)
26106 strcpy (name, "vmlsLn4");
26107 else if (fn == BUILT_IN_LOG)
26108 strcpy (name, "vmldLn2");
26109 else if (n == 4)
26111 sprintf (name, "vmls%s", bname+10);
26112 name[strlen (name)-1] = '4';
26114 else
26115 sprintf (name, "vmld%s2", bname+10);
26117 /* Convert to uppercase. */
26118 name[4] &= ~0x20;
26120 arity = 0;
26121 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26122 args = TREE_CHAIN (args))
26123 arity++;
26125 if (arity == 1)
26126 fntype = build_function_type_list (type_out, type_in, NULL);
26127 else
26128 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26130 /* Build a function declaration for the vectorized function. */
26131 new_fndecl = build_decl (BUILTINS_LOCATION,
26132 FUNCTION_DECL, get_identifier (name), fntype);
26133 TREE_PUBLIC (new_fndecl) = 1;
26134 DECL_EXTERNAL (new_fndecl) = 1;
26135 DECL_IS_NOVOPS (new_fndecl) = 1;
26136 TREE_READONLY (new_fndecl) = 1;
26138 return new_fndecl;
26141 /* Handler for an ACML-style interface to
26142 a library with vectorized intrinsics. */
26144 static tree
26145 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
26147 char name[20] = "__vr.._";
26148 tree fntype, new_fndecl, args;
26149 unsigned arity;
26150 const char *bname;
26151 enum machine_mode el_mode, in_mode;
26152 int n, in_n;
26154 /* The ACML is 64bits only and suitable for unsafe math only as
26155 it does not correctly support parts of IEEE with the required
26156 precision such as denormals. */
26157 if (!TARGET_64BIT
26158 || !flag_unsafe_math_optimizations)
26159 return NULL_TREE;
26161 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26162 n = TYPE_VECTOR_SUBPARTS (type_out);
26163 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26164 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26165 if (el_mode != in_mode
26166 || n != in_n)
26167 return NULL_TREE;
26169 switch (fn)
26171 case BUILT_IN_SIN:
26172 case BUILT_IN_COS:
26173 case BUILT_IN_EXP:
26174 case BUILT_IN_LOG:
26175 case BUILT_IN_LOG2:
26176 case BUILT_IN_LOG10:
26177 name[4] = 'd';
26178 name[5] = '2';
26179 if (el_mode != DFmode
26180 || n != 2)
26181 return NULL_TREE;
26182 break;
26184 case BUILT_IN_SINF:
26185 case BUILT_IN_COSF:
26186 case BUILT_IN_EXPF:
26187 case BUILT_IN_POWF:
26188 case BUILT_IN_LOGF:
26189 case BUILT_IN_LOG2F:
26190 case BUILT_IN_LOG10F:
26191 name[4] = 's';
26192 name[5] = '4';
26193 if (el_mode != SFmode
26194 || n != 4)
26195 return NULL_TREE;
26196 break;
26198 default:
26199 return NULL_TREE;
26202 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26203 sprintf (name + 7, "%s", bname+10);
26205 arity = 0;
26206 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26207 args = TREE_CHAIN (args))
26208 arity++;
26210 if (arity == 1)
26211 fntype = build_function_type_list (type_out, type_in, NULL);
26212 else
26213 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26215 /* Build a function declaration for the vectorized function. */
26216 new_fndecl = build_decl (BUILTINS_LOCATION,
26217 FUNCTION_DECL, get_identifier (name), fntype);
26218 TREE_PUBLIC (new_fndecl) = 1;
26219 DECL_EXTERNAL (new_fndecl) = 1;
26220 DECL_IS_NOVOPS (new_fndecl) = 1;
26221 TREE_READONLY (new_fndecl) = 1;
26223 return new_fndecl;
26227 /* Returns a decl of a function that implements conversion of an integer vector
26228 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
26229 are the types involved when converting according to CODE.
26230 Return NULL_TREE if it is not available. */
26232 static tree
26233 ix86_vectorize_builtin_conversion (unsigned int code,
26234 tree dest_type, tree src_type)
26236 if (! TARGET_SSE2)
26237 return NULL_TREE;
26239 switch (code)
26241 case FLOAT_EXPR:
26242 switch (TYPE_MODE (src_type))
26244 case V4SImode:
26245 switch (TYPE_MODE (dest_type))
26247 case V4SFmode:
26248 return (TYPE_UNSIGNED (src_type)
26249 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
26250 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26251 case V4DFmode:
26252 return (TYPE_UNSIGNED (src_type)
26253 ? NULL_TREE
26254 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
26255 default:
26256 return NULL_TREE;
26258 break;
26259 case V8SImode:
26260 switch (TYPE_MODE (dest_type))
26262 case V8SFmode:
26263 return (TYPE_UNSIGNED (src_type)
26264 ? NULL_TREE
26265 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26266 default:
26267 return NULL_TREE;
26269 break;
26270 default:
26271 return NULL_TREE;
26274 case FIX_TRUNC_EXPR:
26275 switch (TYPE_MODE (dest_type))
26277 case V4SImode:
26278 switch (TYPE_MODE (src_type))
26280 case V4SFmode:
26281 return (TYPE_UNSIGNED (dest_type)
26282 ? NULL_TREE
26283 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
26284 case V4DFmode:
26285 return (TYPE_UNSIGNED (dest_type)
26286 ? NULL_TREE
26287 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
26288 default:
26289 return NULL_TREE;
26291 break;
26293 case V8SImode:
26294 switch (TYPE_MODE (src_type))
26296 case V8SFmode:
26297 return (TYPE_UNSIGNED (dest_type)
26298 ? NULL_TREE
26299 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
26300 default:
26301 return NULL_TREE;
26303 break;
26305 default:
26306 return NULL_TREE;
26309 default:
26310 return NULL_TREE;
26313 return NULL_TREE;
26316 /* Returns a code for a target-specific builtin that implements
26317 reciprocal of the function, or NULL_TREE if not available. */
26319 static tree
26320 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26321 bool sqrt ATTRIBUTE_UNUSED)
26323 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
26324 && flag_finite_math_only && !flag_trapping_math
26325 && flag_unsafe_math_optimizations))
26326 return NULL_TREE;
26328 if (md_fn)
26329 /* Machine dependent builtins. */
26330 switch (fn)
26332 /* Vectorized version of sqrt to rsqrt conversion. */
26333 case IX86_BUILTIN_SQRTPS_NR:
26334 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26336 default:
26337 return NULL_TREE;
26339 else
26340 /* Normal builtins. */
26341 switch (fn)
26343 /* Sqrt to rsqrt conversion. */
26344 case BUILT_IN_SQRTF:
26345 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26347 default:
26348 return NULL_TREE;
26352 /* Helper for avx_vpermilps256_operand et al. This is also used by
26353 the expansion functions to turn the parallel back into a mask.
26354 The return value is 0 for no match and the imm8+1 for a match. */
26357 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
26359 unsigned i, nelt = GET_MODE_NUNITS (mode);
26360 unsigned mask = 0;
26361 unsigned char ipar[8];
26363 if (XVECLEN (par, 0) != (int) nelt)
26364 return 0;
26366 /* Validate that all of the elements are constants, and not totally
26367 out of range. Copy the data into an integral array to make the
26368 subsequent checks easier. */
26369 for (i = 0; i < nelt; ++i)
26371 rtx er = XVECEXP (par, 0, i);
26372 unsigned HOST_WIDE_INT ei;
26374 if (!CONST_INT_P (er))
26375 return 0;
26376 ei = INTVAL (er);
26377 if (ei >= nelt)
26378 return 0;
26379 ipar[i] = ei;
26382 switch (mode)
26384 case V4DFmode:
26385 /* In the 256-bit DFmode case, we can only move elements within
26386 a 128-bit lane. */
26387 for (i = 0; i < 2; ++i)
26389 if (ipar[i] >= 2)
26390 return 0;
26391 mask |= ipar[i] << i;
26393 for (i = 2; i < 4; ++i)
26395 if (ipar[i] < 2)
26396 return 0;
26397 mask |= (ipar[i] - 2) << i;
26399 break;
26401 case V8SFmode:
26402 /* In the 256-bit SFmode case, we have full freedom of movement
26403 within the low 128-bit lane, but the high 128-bit lane must
26404 mirror the exact same pattern. */
26405 for (i = 0; i < 4; ++i)
26406 if (ipar[i] + 4 != ipar[i + 4])
26407 return 0;
26408 nelt = 4;
26409 /* FALLTHRU */
26411 case V2DFmode:
26412 case V4SFmode:
26413 /* In the 128-bit case, we've full freedom in the placement of
26414 the elements from the source operand. */
26415 for (i = 0; i < nelt; ++i)
26416 mask |= ipar[i] << (i * (nelt / 2));
26417 break;
26419 default:
26420 gcc_unreachable ();
26423 /* Make sure success has a non-zero value by adding one. */
26424 return mask + 1;
26427 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
26428 the expansion functions to turn the parallel back into a mask.
26429 The return value is 0 for no match and the imm8+1 for a match. */
26432 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
26434 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
26435 unsigned mask = 0;
26436 unsigned char ipar[8];
26438 if (XVECLEN (par, 0) != (int) nelt)
26439 return 0;
26441 /* Validate that all of the elements are constants, and not totally
26442 out of range. Copy the data into an integral array to make the
26443 subsequent checks easier. */
26444 for (i = 0; i < nelt; ++i)
26446 rtx er = XVECEXP (par, 0, i);
26447 unsigned HOST_WIDE_INT ei;
26449 if (!CONST_INT_P (er))
26450 return 0;
26451 ei = INTVAL (er);
26452 if (ei >= 2 * nelt)
26453 return 0;
26454 ipar[i] = ei;
26457 /* Validate that the halves of the permute are halves. */
26458 for (i = 0; i < nelt2 - 1; ++i)
26459 if (ipar[i] + 1 != ipar[i + 1])
26460 return 0;
26461 for (i = nelt2; i < nelt - 1; ++i)
26462 if (ipar[i] + 1 != ipar[i + 1])
26463 return 0;
26465 /* Reconstruct the mask. */
26466 for (i = 0; i < 2; ++i)
26468 unsigned e = ipar[i * nelt2];
26469 if (e % nelt2)
26470 return 0;
26471 e /= nelt2;
26472 mask |= e << (i * 4);
26475 /* Make sure success has a non-zero value by adding one. */
26476 return mask + 1;
26480 /* Store OPERAND to the memory after reload is completed. This means
26481 that we can't easily use assign_stack_local. */
26483 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26485 rtx result;
26487 gcc_assert (reload_completed);
26488 if (ix86_using_red_zone ())
26490 result = gen_rtx_MEM (mode,
26491 gen_rtx_PLUS (Pmode,
26492 stack_pointer_rtx,
26493 GEN_INT (-RED_ZONE_SIZE)));
26494 emit_move_insn (result, operand);
26496 else if (TARGET_64BIT)
26498 switch (mode)
26500 case HImode:
26501 case SImode:
26502 operand = gen_lowpart (DImode, operand);
26503 /* FALLTHRU */
26504 case DImode:
26505 emit_insn (
26506 gen_rtx_SET (VOIDmode,
26507 gen_rtx_MEM (DImode,
26508 gen_rtx_PRE_DEC (DImode,
26509 stack_pointer_rtx)),
26510 operand));
26511 break;
26512 default:
26513 gcc_unreachable ();
26515 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26517 else
26519 switch (mode)
26521 case DImode:
26523 rtx operands[2];
26524 split_di (&operand, 1, operands, operands + 1);
26525 emit_insn (
26526 gen_rtx_SET (VOIDmode,
26527 gen_rtx_MEM (SImode,
26528 gen_rtx_PRE_DEC (Pmode,
26529 stack_pointer_rtx)),
26530 operands[1]));
26531 emit_insn (
26532 gen_rtx_SET (VOIDmode,
26533 gen_rtx_MEM (SImode,
26534 gen_rtx_PRE_DEC (Pmode,
26535 stack_pointer_rtx)),
26536 operands[0]));
26538 break;
26539 case HImode:
26540 /* Store HImodes as SImodes. */
26541 operand = gen_lowpart (SImode, operand);
26542 /* FALLTHRU */
26543 case SImode:
26544 emit_insn (
26545 gen_rtx_SET (VOIDmode,
26546 gen_rtx_MEM (GET_MODE (operand),
26547 gen_rtx_PRE_DEC (SImode,
26548 stack_pointer_rtx)),
26549 operand));
26550 break;
26551 default:
26552 gcc_unreachable ();
26554 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26556 return result;
26559 /* Free operand from the memory. */
26560 void
26561 ix86_free_from_memory (enum machine_mode mode)
26563 if (!ix86_using_red_zone ())
26565 int size;
26567 if (mode == DImode || TARGET_64BIT)
26568 size = 8;
26569 else
26570 size = 4;
26571 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26572 to pop or add instruction if registers are available. */
26573 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26574 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26575 GEN_INT (size))));
26579 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26580 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26581 same. */
26582 static const reg_class_t *
26583 i386_ira_cover_classes (void)
26585 static const reg_class_t sse_fpmath_classes[] = {
26586 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26588 static const reg_class_t no_sse_fpmath_classes[] = {
26589 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26592 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26595 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26596 QImode must go into class Q_REGS.
26597 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26598 movdf to do mem-to-mem moves through integer regs. */
26599 enum reg_class
26600 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26602 enum machine_mode mode = GET_MODE (x);
26604 /* We're only allowed to return a subclass of CLASS. Many of the
26605 following checks fail for NO_REGS, so eliminate that early. */
26606 if (regclass == NO_REGS)
26607 return NO_REGS;
26609 /* All classes can load zeros. */
26610 if (x == CONST0_RTX (mode))
26611 return regclass;
26613 /* Force constants into memory if we are loading a (nonzero) constant into
26614 an MMX or SSE register. This is because there are no MMX/SSE instructions
26615 to load from a constant. */
26616 if (CONSTANT_P (x)
26617 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26618 return NO_REGS;
26620 /* Prefer SSE regs only, if we can use them for math. */
26621 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26622 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26624 /* Floating-point constants need more complex checks. */
26625 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26627 /* General regs can load everything. */
26628 if (reg_class_subset_p (regclass, GENERAL_REGS))
26629 return regclass;
26631 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26632 zero above. We only want to wind up preferring 80387 registers if
26633 we plan on doing computation with them. */
26634 if (TARGET_80387
26635 && standard_80387_constant_p (x))
26637 /* Limit class to non-sse. */
26638 if (regclass == FLOAT_SSE_REGS)
26639 return FLOAT_REGS;
26640 if (regclass == FP_TOP_SSE_REGS)
26641 return FP_TOP_REG;
26642 if (regclass == FP_SECOND_SSE_REGS)
26643 return FP_SECOND_REG;
26644 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26645 return regclass;
26648 return NO_REGS;
26651 /* Generally when we see PLUS here, it's the function invariant
26652 (plus soft-fp const_int). Which can only be computed into general
26653 regs. */
26654 if (GET_CODE (x) == PLUS)
26655 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26657 /* QImode constants are easy to load, but non-constant QImode data
26658 must go into Q_REGS. */
26659 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26661 if (reg_class_subset_p (regclass, Q_REGS))
26662 return regclass;
26663 if (reg_class_subset_p (Q_REGS, regclass))
26664 return Q_REGS;
26665 return NO_REGS;
26668 return regclass;
26671 /* Discourage putting floating-point values in SSE registers unless
26672 SSE math is being used, and likewise for the 387 registers. */
26673 enum reg_class
26674 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26676 enum machine_mode mode = GET_MODE (x);
26678 /* Restrict the output reload class to the register bank that we are doing
26679 math on. If we would like not to return a subset of CLASS, reject this
26680 alternative: if reload cannot do this, it will still use its choice. */
26681 mode = GET_MODE (x);
26682 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26683 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26685 if (X87_FLOAT_MODE_P (mode))
26687 if (regclass == FP_TOP_SSE_REGS)
26688 return FP_TOP_REG;
26689 else if (regclass == FP_SECOND_SSE_REGS)
26690 return FP_SECOND_REG;
26691 else
26692 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26695 return regclass;
26698 static reg_class_t
26699 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26700 enum machine_mode mode,
26701 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26703 /* QImode spills from non-QI registers require
26704 intermediate register on 32bit targets. */
26705 if (!in_p && mode == QImode && !TARGET_64BIT
26706 && (rclass == GENERAL_REGS
26707 || rclass == LEGACY_REGS
26708 || rclass == INDEX_REGS))
26710 int regno;
26712 if (REG_P (x))
26713 regno = REGNO (x);
26714 else
26715 regno = -1;
26717 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26718 regno = true_regnum (x);
26720 /* Return Q_REGS if the operand is in memory. */
26721 if (regno == -1)
26722 return Q_REGS;
26725 return NO_REGS;
26728 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26730 static bool
26731 ix86_class_likely_spilled_p (reg_class_t rclass)
26733 switch (rclass)
26735 case AREG:
26736 case DREG:
26737 case CREG:
26738 case BREG:
26739 case AD_REGS:
26740 case SIREG:
26741 case DIREG:
26742 case SSE_FIRST_REG:
26743 case FP_TOP_REG:
26744 case FP_SECOND_REG:
26745 return true;
26747 default:
26748 break;
26751 return false;
26754 /* If we are copying between general and FP registers, we need a memory
26755 location. The same is true for SSE and MMX registers.
26757 To optimize register_move_cost performance, allow inline variant.
26759 The macro can't work reliably when one of the CLASSES is class containing
26760 registers from multiple units (SSE, MMX, integer). We avoid this by never
26761 combining those units in single alternative in the machine description.
26762 Ensure that this constraint holds to avoid unexpected surprises.
26764 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26765 enforce these sanity checks. */
26767 static inline bool
26768 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26769 enum machine_mode mode, int strict)
26771 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26772 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26773 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26774 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26775 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26776 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26778 gcc_assert (!strict);
26779 return true;
26782 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26783 return true;
26785 /* ??? This is a lie. We do have moves between mmx/general, and for
26786 mmx/sse2. But by saying we need secondary memory we discourage the
26787 register allocator from using the mmx registers unless needed. */
26788 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26789 return true;
26791 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26793 /* SSE1 doesn't have any direct moves from other classes. */
26794 if (!TARGET_SSE2)
26795 return true;
26797 /* If the target says that inter-unit moves are more expensive
26798 than moving through memory, then don't generate them. */
26799 if (!TARGET_INTER_UNIT_MOVES)
26800 return true;
26802 /* Between SSE and general, we have moves no larger than word size. */
26803 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26804 return true;
26807 return false;
26810 bool
26811 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26812 enum machine_mode mode, int strict)
26814 return inline_secondary_memory_needed (class1, class2, mode, strict);
26817 /* Return true if the registers in CLASS cannot represent the change from
26818 modes FROM to TO. */
26820 bool
26821 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26822 enum reg_class regclass)
26824 if (from == to)
26825 return false;
26827 /* x87 registers can't do subreg at all, as all values are reformatted
26828 to extended precision. */
26829 if (MAYBE_FLOAT_CLASS_P (regclass))
26830 return true;
26832 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26834 /* Vector registers do not support QI or HImode loads. If we don't
26835 disallow a change to these modes, reload will assume it's ok to
26836 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26837 the vec_dupv4hi pattern. */
26838 if (GET_MODE_SIZE (from) < 4)
26839 return true;
26841 /* Vector registers do not support subreg with nonzero offsets, which
26842 are otherwise valid for integer registers. Since we can't see
26843 whether we have a nonzero offset from here, prohibit all
26844 nonparadoxical subregs changing size. */
26845 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26846 return true;
26849 return false;
26852 /* Return the cost of moving data of mode M between a
26853 register and memory. A value of 2 is the default; this cost is
26854 relative to those in `REGISTER_MOVE_COST'.
26856 This function is used extensively by register_move_cost that is used to
26857 build tables at startup. Make it inline in this case.
26858 When IN is 2, return maximum of in and out move cost.
26860 If moving between registers and memory is more expensive than
26861 between two registers, you should define this macro to express the
26862 relative cost.
26864 Model also increased moving costs of QImode registers in non
26865 Q_REGS classes.
26867 static inline int
26868 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26869 int in)
26871 int cost;
26872 if (FLOAT_CLASS_P (regclass))
26874 int index;
26875 switch (mode)
26877 case SFmode:
26878 index = 0;
26879 break;
26880 case DFmode:
26881 index = 1;
26882 break;
26883 case XFmode:
26884 index = 2;
26885 break;
26886 default:
26887 return 100;
26889 if (in == 2)
26890 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26891 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26893 if (SSE_CLASS_P (regclass))
26895 int index;
26896 switch (GET_MODE_SIZE (mode))
26898 case 4:
26899 index = 0;
26900 break;
26901 case 8:
26902 index = 1;
26903 break;
26904 case 16:
26905 index = 2;
26906 break;
26907 default:
26908 return 100;
26910 if (in == 2)
26911 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26912 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26914 if (MMX_CLASS_P (regclass))
26916 int index;
26917 switch (GET_MODE_SIZE (mode))
26919 case 4:
26920 index = 0;
26921 break;
26922 case 8:
26923 index = 1;
26924 break;
26925 default:
26926 return 100;
26928 if (in)
26929 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26930 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26932 switch (GET_MODE_SIZE (mode))
26934 case 1:
26935 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26937 if (!in)
26938 return ix86_cost->int_store[0];
26939 if (TARGET_PARTIAL_REG_DEPENDENCY
26940 && optimize_function_for_speed_p (cfun))
26941 cost = ix86_cost->movzbl_load;
26942 else
26943 cost = ix86_cost->int_load[0];
26944 if (in == 2)
26945 return MAX (cost, ix86_cost->int_store[0]);
26946 return cost;
26948 else
26950 if (in == 2)
26951 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26952 if (in)
26953 return ix86_cost->movzbl_load;
26954 else
26955 return ix86_cost->int_store[0] + 4;
26957 break;
26958 case 2:
26959 if (in == 2)
26960 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26961 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26962 default:
26963 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26964 if (mode == TFmode)
26965 mode = XFmode;
26966 if (in == 2)
26967 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26968 else if (in)
26969 cost = ix86_cost->int_load[2];
26970 else
26971 cost = ix86_cost->int_store[2];
26972 return (cost * (((int) GET_MODE_SIZE (mode)
26973 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26977 static int
26978 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26979 bool in)
26981 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26985 /* Return the cost of moving data from a register in class CLASS1 to
26986 one in class CLASS2.
26988 It is not required that the cost always equal 2 when FROM is the same as TO;
26989 on some machines it is expensive to move between registers if they are not
26990 general registers. */
26992 static int
26993 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26994 reg_class_t class2_i)
26996 enum reg_class class1 = (enum reg_class) class1_i;
26997 enum reg_class class2 = (enum reg_class) class2_i;
26999 /* In case we require secondary memory, compute cost of the store followed
27000 by load. In order to avoid bad register allocation choices, we need
27001 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
27003 if (inline_secondary_memory_needed (class1, class2, mode, 0))
27005 int cost = 1;
27007 cost += inline_memory_move_cost (mode, class1, 2);
27008 cost += inline_memory_move_cost (mode, class2, 2);
27010 /* In case of copying from general_purpose_register we may emit multiple
27011 stores followed by single load causing memory size mismatch stall.
27012 Count this as arbitrarily high cost of 20. */
27013 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
27014 cost += 20;
27016 /* In the case of FP/MMX moves, the registers actually overlap, and we
27017 have to switch modes in order to treat them differently. */
27018 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
27019 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
27020 cost += 20;
27022 return cost;
27025 /* Moves between SSE/MMX and integer unit are expensive. */
27026 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
27027 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27029 /* ??? By keeping returned value relatively high, we limit the number
27030 of moves between integer and MMX/SSE registers for all targets.
27031 Additionally, high value prevents problem with x86_modes_tieable_p(),
27032 where integer modes in MMX/SSE registers are not tieable
27033 because of missing QImode and HImode moves to, from or between
27034 MMX/SSE registers. */
27035 return MAX (8, ix86_cost->mmxsse_to_integer);
27037 if (MAYBE_FLOAT_CLASS_P (class1))
27038 return ix86_cost->fp_move;
27039 if (MAYBE_SSE_CLASS_P (class1))
27040 return ix86_cost->sse_move;
27041 if (MAYBE_MMX_CLASS_P (class1))
27042 return ix86_cost->mmx_move;
27043 return 2;
27046 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
27048 bool
27049 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
27051 /* Flags and only flags can only hold CCmode values. */
27052 if (CC_REGNO_P (regno))
27053 return GET_MODE_CLASS (mode) == MODE_CC;
27054 if (GET_MODE_CLASS (mode) == MODE_CC
27055 || GET_MODE_CLASS (mode) == MODE_RANDOM
27056 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
27057 return 0;
27058 if (FP_REGNO_P (regno))
27059 return VALID_FP_MODE_P (mode);
27060 if (SSE_REGNO_P (regno))
27062 /* We implement the move patterns for all vector modes into and
27063 out of SSE registers, even when no operation instructions
27064 are available. OImode move is available only when AVX is
27065 enabled. */
27066 return ((TARGET_AVX && mode == OImode)
27067 || VALID_AVX256_REG_MODE (mode)
27068 || VALID_SSE_REG_MODE (mode)
27069 || VALID_SSE2_REG_MODE (mode)
27070 || VALID_MMX_REG_MODE (mode)
27071 || VALID_MMX_REG_MODE_3DNOW (mode));
27073 if (MMX_REGNO_P (regno))
27075 /* We implement the move patterns for 3DNOW modes even in MMX mode,
27076 so if the register is available at all, then we can move data of
27077 the given mode into or out of it. */
27078 return (VALID_MMX_REG_MODE (mode)
27079 || VALID_MMX_REG_MODE_3DNOW (mode));
27082 if (mode == QImode)
27084 /* Take care for QImode values - they can be in non-QI regs,
27085 but then they do cause partial register stalls. */
27086 if (regno <= BX_REG || TARGET_64BIT)
27087 return 1;
27088 if (!TARGET_PARTIAL_REG_STALL)
27089 return 1;
27090 return reload_in_progress || reload_completed;
27092 /* We handle both integer and floats in the general purpose registers. */
27093 else if (VALID_INT_MODE_P (mode))
27094 return 1;
27095 else if (VALID_FP_MODE_P (mode))
27096 return 1;
27097 else if (VALID_DFP_MODE_P (mode))
27098 return 1;
27099 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
27100 on to use that value in smaller contexts, this can easily force a
27101 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
27102 supporting DImode, allow it. */
27103 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
27104 return 1;
27106 return 0;
27109 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
27110 tieable integer mode. */
27112 static bool
27113 ix86_tieable_integer_mode_p (enum machine_mode mode)
27115 switch (mode)
27117 case HImode:
27118 case SImode:
27119 return true;
27121 case QImode:
27122 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
27124 case DImode:
27125 return TARGET_64BIT;
27127 default:
27128 return false;
27132 /* Return true if MODE1 is accessible in a register that can hold MODE2
27133 without copying. That is, all register classes that can hold MODE2
27134 can also hold MODE1. */
27136 bool
27137 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
27139 if (mode1 == mode2)
27140 return true;
27142 if (ix86_tieable_integer_mode_p (mode1)
27143 && ix86_tieable_integer_mode_p (mode2))
27144 return true;
27146 /* MODE2 being XFmode implies fp stack or general regs, which means we
27147 can tie any smaller floating point modes to it. Note that we do not
27148 tie this with TFmode. */
27149 if (mode2 == XFmode)
27150 return mode1 == SFmode || mode1 == DFmode;
27152 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
27153 that we can tie it with SFmode. */
27154 if (mode2 == DFmode)
27155 return mode1 == SFmode;
27157 /* If MODE2 is only appropriate for an SSE register, then tie with
27158 any other mode acceptable to SSE registers. */
27159 if (GET_MODE_SIZE (mode2) == 16
27160 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
27161 return (GET_MODE_SIZE (mode1) == 16
27162 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
27164 /* If MODE2 is appropriate for an MMX register, then tie
27165 with any other mode acceptable to MMX registers. */
27166 if (GET_MODE_SIZE (mode2) == 8
27167 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
27168 return (GET_MODE_SIZE (mode1) == 8
27169 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
27171 return false;
27174 /* Compute a (partial) cost for rtx X. Return true if the complete
27175 cost has been computed, and false if subexpressions should be
27176 scanned. In either case, *TOTAL contains the cost result. */
27178 static bool
27179 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
27181 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
27182 enum machine_mode mode = GET_MODE (x);
27183 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
27185 switch (code)
27187 case CONST_INT:
27188 case CONST:
27189 case LABEL_REF:
27190 case SYMBOL_REF:
27191 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
27192 *total = 3;
27193 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
27194 *total = 2;
27195 else if (flag_pic && SYMBOLIC_CONST (x)
27196 && (!TARGET_64BIT
27197 || (!GET_CODE (x) != LABEL_REF
27198 && (GET_CODE (x) != SYMBOL_REF
27199 || !SYMBOL_REF_LOCAL_P (x)))))
27200 *total = 1;
27201 else
27202 *total = 0;
27203 return true;
27205 case CONST_DOUBLE:
27206 if (mode == VOIDmode)
27207 *total = 0;
27208 else
27209 switch (standard_80387_constant_p (x))
27211 case 1: /* 0.0 */
27212 *total = 1;
27213 break;
27214 default: /* Other constants */
27215 *total = 2;
27216 break;
27217 case 0:
27218 case -1:
27219 /* Start with (MEM (SYMBOL_REF)), since that's where
27220 it'll probably end up. Add a penalty for size. */
27221 *total = (COSTS_N_INSNS (1)
27222 + (flag_pic != 0 && !TARGET_64BIT)
27223 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
27224 break;
27226 return true;
27228 case ZERO_EXTEND:
27229 /* The zero extensions is often completely free on x86_64, so make
27230 it as cheap as possible. */
27231 if (TARGET_64BIT && mode == DImode
27232 && GET_MODE (XEXP (x, 0)) == SImode)
27233 *total = 1;
27234 else if (TARGET_ZERO_EXTEND_WITH_AND)
27235 *total = cost->add;
27236 else
27237 *total = cost->movzx;
27238 return false;
27240 case SIGN_EXTEND:
27241 *total = cost->movsx;
27242 return false;
27244 case ASHIFT:
27245 if (CONST_INT_P (XEXP (x, 1))
27246 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
27248 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27249 if (value == 1)
27251 *total = cost->add;
27252 return false;
27254 if ((value == 2 || value == 3)
27255 && cost->lea <= cost->shift_const)
27257 *total = cost->lea;
27258 return false;
27261 /* FALLTHRU */
27263 case ROTATE:
27264 case ASHIFTRT:
27265 case LSHIFTRT:
27266 case ROTATERT:
27267 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
27269 if (CONST_INT_P (XEXP (x, 1)))
27271 if (INTVAL (XEXP (x, 1)) > 32)
27272 *total = cost->shift_const + COSTS_N_INSNS (2);
27273 else
27274 *total = cost->shift_const * 2;
27276 else
27278 if (GET_CODE (XEXP (x, 1)) == AND)
27279 *total = cost->shift_var * 2;
27280 else
27281 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
27284 else
27286 if (CONST_INT_P (XEXP (x, 1)))
27287 *total = cost->shift_const;
27288 else
27289 *total = cost->shift_var;
27291 return false;
27293 case MULT:
27294 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27296 /* ??? SSE scalar cost should be used here. */
27297 *total = cost->fmul;
27298 return false;
27300 else if (X87_FLOAT_MODE_P (mode))
27302 *total = cost->fmul;
27303 return false;
27305 else if (FLOAT_MODE_P (mode))
27307 /* ??? SSE vector cost should be used here. */
27308 *total = cost->fmul;
27309 return false;
27311 else
27313 rtx op0 = XEXP (x, 0);
27314 rtx op1 = XEXP (x, 1);
27315 int nbits;
27316 if (CONST_INT_P (XEXP (x, 1)))
27318 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27319 for (nbits = 0; value != 0; value &= value - 1)
27320 nbits++;
27322 else
27323 /* This is arbitrary. */
27324 nbits = 7;
27326 /* Compute costs correctly for widening multiplication. */
27327 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
27328 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
27329 == GET_MODE_SIZE (mode))
27331 int is_mulwiden = 0;
27332 enum machine_mode inner_mode = GET_MODE (op0);
27334 if (GET_CODE (op0) == GET_CODE (op1))
27335 is_mulwiden = 1, op1 = XEXP (op1, 0);
27336 else if (CONST_INT_P (op1))
27338 if (GET_CODE (op0) == SIGN_EXTEND)
27339 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
27340 == INTVAL (op1);
27341 else
27342 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
27345 if (is_mulwiden)
27346 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
27349 *total = (cost->mult_init[MODE_INDEX (mode)]
27350 + nbits * cost->mult_bit
27351 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
27353 return true;
27356 case DIV:
27357 case UDIV:
27358 case MOD:
27359 case UMOD:
27360 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27361 /* ??? SSE cost should be used here. */
27362 *total = cost->fdiv;
27363 else if (X87_FLOAT_MODE_P (mode))
27364 *total = cost->fdiv;
27365 else if (FLOAT_MODE_P (mode))
27366 /* ??? SSE vector cost should be used here. */
27367 *total = cost->fdiv;
27368 else
27369 *total = cost->divide[MODE_INDEX (mode)];
27370 return false;
27372 case PLUS:
27373 if (GET_MODE_CLASS (mode) == MODE_INT
27374 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
27376 if (GET_CODE (XEXP (x, 0)) == PLUS
27377 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
27378 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
27379 && CONSTANT_P (XEXP (x, 1)))
27381 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
27382 if (val == 2 || val == 4 || val == 8)
27384 *total = cost->lea;
27385 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27386 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
27387 outer_code, speed);
27388 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27389 return true;
27392 else if (GET_CODE (XEXP (x, 0)) == MULT
27393 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
27395 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
27396 if (val == 2 || val == 4 || val == 8)
27398 *total = cost->lea;
27399 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27400 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27401 return true;
27404 else if (GET_CODE (XEXP (x, 0)) == PLUS)
27406 *total = cost->lea;
27407 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27408 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27409 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27410 return true;
27413 /* FALLTHRU */
27415 case MINUS:
27416 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27418 /* ??? SSE cost should be used here. */
27419 *total = cost->fadd;
27420 return false;
27422 else if (X87_FLOAT_MODE_P (mode))
27424 *total = cost->fadd;
27425 return false;
27427 else if (FLOAT_MODE_P (mode))
27429 /* ??? SSE vector cost should be used here. */
27430 *total = cost->fadd;
27431 return false;
27433 /* FALLTHRU */
27435 case AND:
27436 case IOR:
27437 case XOR:
27438 if (!TARGET_64BIT && mode == DImode)
27440 *total = (cost->add * 2
27441 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27442 << (GET_MODE (XEXP (x, 0)) != DImode))
27443 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27444 << (GET_MODE (XEXP (x, 1)) != DImode)));
27445 return true;
27447 /* FALLTHRU */
27449 case NEG:
27450 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27452 /* ??? SSE cost should be used here. */
27453 *total = cost->fchs;
27454 return false;
27456 else if (X87_FLOAT_MODE_P (mode))
27458 *total = cost->fchs;
27459 return false;
27461 else if (FLOAT_MODE_P (mode))
27463 /* ??? SSE vector cost should be used here. */
27464 *total = cost->fchs;
27465 return false;
27467 /* FALLTHRU */
27469 case NOT:
27470 if (!TARGET_64BIT && mode == DImode)
27471 *total = cost->add * 2;
27472 else
27473 *total = cost->add;
27474 return false;
27476 case COMPARE:
27477 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27478 && XEXP (XEXP (x, 0), 1) == const1_rtx
27479 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27480 && XEXP (x, 1) == const0_rtx)
27482 /* This kind of construct is implemented using test[bwl].
27483 Treat it as if we had an AND. */
27484 *total = (cost->add
27485 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27486 + rtx_cost (const1_rtx, outer_code, speed));
27487 return true;
27489 return false;
27491 case FLOAT_EXTEND:
27492 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27493 *total = 0;
27494 return false;
27496 case ABS:
27497 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27498 /* ??? SSE cost should be used here. */
27499 *total = cost->fabs;
27500 else if (X87_FLOAT_MODE_P (mode))
27501 *total = cost->fabs;
27502 else if (FLOAT_MODE_P (mode))
27503 /* ??? SSE vector cost should be used here. */
27504 *total = cost->fabs;
27505 return false;
27507 case SQRT:
27508 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27509 /* ??? SSE cost should be used here. */
27510 *total = cost->fsqrt;
27511 else if (X87_FLOAT_MODE_P (mode))
27512 *total = cost->fsqrt;
27513 else if (FLOAT_MODE_P (mode))
27514 /* ??? SSE vector cost should be used here. */
27515 *total = cost->fsqrt;
27516 return false;
27518 case UNSPEC:
27519 if (XINT (x, 1) == UNSPEC_TP)
27520 *total = 0;
27521 return false;
27523 case VEC_SELECT:
27524 case VEC_CONCAT:
27525 case VEC_MERGE:
27526 case VEC_DUPLICATE:
27527 /* ??? Assume all of these vector manipulation patterns are
27528 recognizable. In which case they all pretty much have the
27529 same cost. */
27530 *total = COSTS_N_INSNS (1);
27531 return true;
27533 default:
27534 return false;
27538 #if TARGET_MACHO
27540 static int current_machopic_label_num;
27542 /* Given a symbol name and its associated stub, write out the
27543 definition of the stub. */
27545 void
27546 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27548 unsigned int length;
27549 char *binder_name, *symbol_name, lazy_ptr_name[32];
27550 int label = ++current_machopic_label_num;
27552 /* For 64-bit we shouldn't get here. */
27553 gcc_assert (!TARGET_64BIT);
27555 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27556 symb = targetm.strip_name_encoding (symb);
27558 length = strlen (stub);
27559 binder_name = XALLOCAVEC (char, length + 32);
27560 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27562 length = strlen (symb);
27563 symbol_name = XALLOCAVEC (char, length + 32);
27564 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27566 sprintf (lazy_ptr_name, "L%d$lz", label);
27568 if (MACHOPIC_PURE)
27569 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27570 else
27571 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27573 fprintf (file, "%s:\n", stub);
27574 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27576 if (MACHOPIC_PURE)
27578 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27579 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27580 fprintf (file, "\tjmp\t*%%edx\n");
27582 else
27583 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27585 fprintf (file, "%s:\n", binder_name);
27587 if (MACHOPIC_PURE)
27589 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27590 fputs ("\tpushl\t%eax\n", file);
27592 else
27593 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27595 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27597 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27598 fprintf (file, "%s:\n", lazy_ptr_name);
27599 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27600 fprintf (file, ASM_LONG "%s\n", binder_name);
27602 #endif /* TARGET_MACHO */
27604 /* Order the registers for register allocator. */
27606 void
27607 x86_order_regs_for_local_alloc (void)
27609 int pos = 0;
27610 int i;
27612 /* First allocate the local general purpose registers. */
27613 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27614 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27615 reg_alloc_order [pos++] = i;
27617 /* Global general purpose registers. */
27618 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27619 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27620 reg_alloc_order [pos++] = i;
27622 /* x87 registers come first in case we are doing FP math
27623 using them. */
27624 if (!TARGET_SSE_MATH)
27625 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27626 reg_alloc_order [pos++] = i;
27628 /* SSE registers. */
27629 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27630 reg_alloc_order [pos++] = i;
27631 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27632 reg_alloc_order [pos++] = i;
27634 /* x87 registers. */
27635 if (TARGET_SSE_MATH)
27636 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27637 reg_alloc_order [pos++] = i;
27639 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27640 reg_alloc_order [pos++] = i;
27642 /* Initialize the rest of array as we do not allocate some registers
27643 at all. */
27644 while (pos < FIRST_PSEUDO_REGISTER)
27645 reg_alloc_order [pos++] = 0;
27648 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27649 struct attribute_spec.handler. */
27650 static tree
27651 ix86_handle_abi_attribute (tree *node, tree name,
27652 tree args ATTRIBUTE_UNUSED,
27653 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27655 if (TREE_CODE (*node) != FUNCTION_TYPE
27656 && TREE_CODE (*node) != METHOD_TYPE
27657 && TREE_CODE (*node) != FIELD_DECL
27658 && TREE_CODE (*node) != TYPE_DECL)
27660 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27661 name);
27662 *no_add_attrs = true;
27663 return NULL_TREE;
27665 if (!TARGET_64BIT)
27667 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27668 name);
27669 *no_add_attrs = true;
27670 return NULL_TREE;
27673 /* Can combine regparm with all attributes but fastcall. */
27674 if (is_attribute_p ("ms_abi", name))
27676 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27678 error ("ms_abi and sysv_abi attributes are not compatible");
27681 return NULL_TREE;
27683 else if (is_attribute_p ("sysv_abi", name))
27685 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27687 error ("ms_abi and sysv_abi attributes are not compatible");
27690 return NULL_TREE;
27693 return NULL_TREE;
27696 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27697 struct attribute_spec.handler. */
27698 static tree
27699 ix86_handle_struct_attribute (tree *node, tree name,
27700 tree args ATTRIBUTE_UNUSED,
27701 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27703 tree *type = NULL;
27704 if (DECL_P (*node))
27706 if (TREE_CODE (*node) == TYPE_DECL)
27707 type = &TREE_TYPE (*node);
27709 else
27710 type = node;
27712 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27713 || TREE_CODE (*type) == UNION_TYPE)))
27715 warning (OPT_Wattributes, "%qE attribute ignored",
27716 name);
27717 *no_add_attrs = true;
27720 else if ((is_attribute_p ("ms_struct", name)
27721 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27722 || ((is_attribute_p ("gcc_struct", name)
27723 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27725 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27726 name);
27727 *no_add_attrs = true;
27730 return NULL_TREE;
27733 static tree
27734 ix86_handle_fndecl_attribute (tree *node, tree name,
27735 tree args ATTRIBUTE_UNUSED,
27736 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27738 if (TREE_CODE (*node) != FUNCTION_DECL)
27740 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27741 name);
27742 *no_add_attrs = true;
27744 return NULL_TREE;
27747 static bool
27748 ix86_ms_bitfield_layout_p (const_tree record_type)
27750 return ((TARGET_MS_BITFIELD_LAYOUT
27751 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27752 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27755 /* Returns an expression indicating where the this parameter is
27756 located on entry to the FUNCTION. */
27758 static rtx
27759 x86_this_parameter (tree function)
27761 tree type = TREE_TYPE (function);
27762 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27763 int nregs;
27765 if (TARGET_64BIT)
27767 const int *parm_regs;
27769 if (ix86_function_type_abi (type) == MS_ABI)
27770 parm_regs = x86_64_ms_abi_int_parameter_registers;
27771 else
27772 parm_regs = x86_64_int_parameter_registers;
27773 return gen_rtx_REG (DImode, parm_regs[aggr]);
27776 nregs = ix86_function_regparm (type, function);
27778 if (nregs > 0 && !stdarg_p (type))
27780 int regno;
27782 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27783 regno = aggr ? DX_REG : CX_REG;
27784 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27786 regno = CX_REG;
27787 if (aggr)
27788 return gen_rtx_MEM (SImode,
27789 plus_constant (stack_pointer_rtx, 4));
27791 else
27793 regno = AX_REG;
27794 if (aggr)
27796 regno = DX_REG;
27797 if (nregs == 1)
27798 return gen_rtx_MEM (SImode,
27799 plus_constant (stack_pointer_rtx, 4));
27802 return gen_rtx_REG (SImode, regno);
27805 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27808 /* Determine whether x86_output_mi_thunk can succeed. */
27810 static bool
27811 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27812 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27813 HOST_WIDE_INT vcall_offset, const_tree function)
27815 /* 64-bit can handle anything. */
27816 if (TARGET_64BIT)
27817 return true;
27819 /* For 32-bit, everything's fine if we have one free register. */
27820 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27821 return true;
27823 /* Need a free register for vcall_offset. */
27824 if (vcall_offset)
27825 return false;
27827 /* Need a free register for GOT references. */
27828 if (flag_pic && !targetm.binds_local_p (function))
27829 return false;
27831 /* Otherwise ok. */
27832 return true;
27835 /* Output the assembler code for a thunk function. THUNK_DECL is the
27836 declaration for the thunk function itself, FUNCTION is the decl for
27837 the target function. DELTA is an immediate constant offset to be
27838 added to THIS. If VCALL_OFFSET is nonzero, the word at
27839 *(*this + vcall_offset) should be added to THIS. */
27841 static void
27842 x86_output_mi_thunk (FILE *file,
27843 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27844 HOST_WIDE_INT vcall_offset, tree function)
27846 rtx xops[3];
27847 rtx this_param = x86_this_parameter (function);
27848 rtx this_reg, tmp;
27850 /* Make sure unwind info is emitted for the thunk if needed. */
27851 final_start_function (emit_barrier (), file, 1);
27853 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27854 pull it in now and let DELTA benefit. */
27855 if (REG_P (this_param))
27856 this_reg = this_param;
27857 else if (vcall_offset)
27859 /* Put the this parameter into %eax. */
27860 xops[0] = this_param;
27861 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27862 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27864 else
27865 this_reg = NULL_RTX;
27867 /* Adjust the this parameter by a fixed constant. */
27868 if (delta)
27870 xops[0] = GEN_INT (delta);
27871 xops[1] = this_reg ? this_reg : this_param;
27872 if (TARGET_64BIT)
27874 if (!x86_64_general_operand (xops[0], DImode))
27876 tmp = gen_rtx_REG (DImode, R10_REG);
27877 xops[1] = tmp;
27878 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27879 xops[0] = tmp;
27880 xops[1] = this_param;
27882 if (x86_maybe_negate_const_int (&xops[0], DImode))
27883 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27884 else
27885 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27887 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27888 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27889 else
27890 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27893 /* Adjust the this parameter by a value stored in the vtable. */
27894 if (vcall_offset)
27896 if (TARGET_64BIT)
27897 tmp = gen_rtx_REG (DImode, R10_REG);
27898 else
27900 int tmp_regno = CX_REG;
27901 if (lookup_attribute ("fastcall",
27902 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27903 || lookup_attribute ("thiscall",
27904 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27905 tmp_regno = AX_REG;
27906 tmp = gen_rtx_REG (SImode, tmp_regno);
27909 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27910 xops[1] = tmp;
27911 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27913 /* Adjust the this parameter. */
27914 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27915 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27917 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27918 xops[0] = GEN_INT (vcall_offset);
27919 xops[1] = tmp2;
27920 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27921 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27923 xops[1] = this_reg;
27924 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27927 /* If necessary, drop THIS back to its stack slot. */
27928 if (this_reg && this_reg != this_param)
27930 xops[0] = this_reg;
27931 xops[1] = this_param;
27932 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27935 xops[0] = XEXP (DECL_RTL (function), 0);
27936 if (TARGET_64BIT)
27938 if (!flag_pic || targetm.binds_local_p (function))
27939 output_asm_insn ("jmp\t%P0", xops);
27940 /* All thunks should be in the same object as their target,
27941 and thus binds_local_p should be true. */
27942 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27943 gcc_unreachable ();
27944 else
27946 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27947 tmp = gen_rtx_CONST (Pmode, tmp);
27948 tmp = gen_rtx_MEM (QImode, tmp);
27949 xops[0] = tmp;
27950 output_asm_insn ("jmp\t%A0", xops);
27953 else
27955 if (!flag_pic || targetm.binds_local_p (function))
27956 output_asm_insn ("jmp\t%P0", xops);
27957 else
27958 #if TARGET_MACHO
27959 if (TARGET_MACHO)
27961 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27962 if (TARGET_MACHO_BRANCH_ISLANDS)
27963 sym_ref = (gen_rtx_SYMBOL_REF
27964 (Pmode,
27965 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27966 tmp = gen_rtx_MEM (QImode, sym_ref);
27967 xops[0] = tmp;
27968 output_asm_insn ("jmp\t%0", xops);
27970 else
27971 #endif /* TARGET_MACHO */
27973 tmp = gen_rtx_REG (SImode, CX_REG);
27974 output_set_got (tmp, NULL_RTX);
27976 xops[1] = tmp;
27977 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27978 output_asm_insn ("jmp\t{*}%1", xops);
27981 final_end_function ();
27984 static void
27985 x86_file_start (void)
27987 default_file_start ();
27988 #if TARGET_MACHO
27989 darwin_file_start ();
27990 #endif
27991 if (X86_FILE_START_VERSION_DIRECTIVE)
27992 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27993 if (X86_FILE_START_FLTUSED)
27994 fputs ("\t.global\t__fltused\n", asm_out_file);
27995 if (ix86_asm_dialect == ASM_INTEL)
27996 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
28000 x86_field_alignment (tree field, int computed)
28002 enum machine_mode mode;
28003 tree type = TREE_TYPE (field);
28005 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
28006 return computed;
28007 mode = TYPE_MODE (strip_array_types (type));
28008 if (mode == DFmode || mode == DCmode
28009 || GET_MODE_CLASS (mode) == MODE_INT
28010 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
28011 return MIN (32, computed);
28012 return computed;
28015 /* Output assembler code to FILE to increment profiler label # LABELNO
28016 for profiling a function entry. */
28017 void
28018 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
28020 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
28021 : MCOUNT_NAME);
28023 if (TARGET_64BIT)
28025 #ifndef NO_PROFILE_COUNTERS
28026 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
28027 #endif
28029 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
28030 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
28031 else
28032 fprintf (file, "\tcall\t%s\n", mcount_name);
28034 else if (flag_pic)
28036 #ifndef NO_PROFILE_COUNTERS
28037 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
28038 LPREFIX, labelno);
28039 #endif
28040 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
28042 else
28044 #ifndef NO_PROFILE_COUNTERS
28045 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
28046 LPREFIX, labelno);
28047 #endif
28048 fprintf (file, "\tcall\t%s\n", mcount_name);
28052 /* We don't have exact information about the insn sizes, but we may assume
28053 quite safely that we are informed about all 1 byte insns and memory
28054 address sizes. This is enough to eliminate unnecessary padding in
28055 99% of cases. */
28057 static int
28058 min_insn_size (rtx insn)
28060 int l = 0, len;
28062 if (!INSN_P (insn) || !active_insn_p (insn))
28063 return 0;
28065 /* Discard alignments we've emit and jump instructions. */
28066 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
28067 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
28068 return 0;
28069 if (JUMP_TABLE_DATA_P (insn))
28070 return 0;
28072 /* Important case - calls are always 5 bytes.
28073 It is common to have many calls in the row. */
28074 if (CALL_P (insn)
28075 && symbolic_reference_mentioned_p (PATTERN (insn))
28076 && !SIBLING_CALL_P (insn))
28077 return 5;
28078 len = get_attr_length (insn);
28079 if (len <= 1)
28080 return 1;
28082 /* For normal instructions we rely on get_attr_length being exact,
28083 with a few exceptions. */
28084 if (!JUMP_P (insn))
28086 enum attr_type type = get_attr_type (insn);
28088 switch (type)
28090 case TYPE_MULTI:
28091 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
28092 || asm_noperands (PATTERN (insn)) >= 0)
28093 return 0;
28094 break;
28095 case TYPE_OTHER:
28096 case TYPE_FCMP:
28097 break;
28098 default:
28099 /* Otherwise trust get_attr_length. */
28100 return len;
28103 l = get_attr_length_address (insn);
28104 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
28105 l = 4;
28107 if (l)
28108 return 1+l;
28109 else
28110 return 2;
28113 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28115 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
28116 window. */
28118 static void
28119 ix86_avoid_jump_mispredicts (void)
28121 rtx insn, start = get_insns ();
28122 int nbytes = 0, njumps = 0;
28123 int isjump = 0;
28125 /* Look for all minimal intervals of instructions containing 4 jumps.
28126 The intervals are bounded by START and INSN. NBYTES is the total
28127 size of instructions in the interval including INSN and not including
28128 START. When the NBYTES is smaller than 16 bytes, it is possible
28129 that the end of START and INSN ends up in the same 16byte page.
28131 The smallest offset in the page INSN can start is the case where START
28132 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
28133 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
28135 for (insn = start; insn; insn = NEXT_INSN (insn))
28137 int min_size;
28139 if (LABEL_P (insn))
28141 int align = label_to_alignment (insn);
28142 int max_skip = label_to_max_skip (insn);
28144 if (max_skip > 15)
28145 max_skip = 15;
28146 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
28147 already in the current 16 byte page, because otherwise
28148 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
28149 bytes to reach 16 byte boundary. */
28150 if (align <= 0
28151 || (align <= 3 && max_skip != (1 << align) - 1))
28152 max_skip = 0;
28153 if (dump_file)
28154 fprintf (dump_file, "Label %i with max_skip %i\n",
28155 INSN_UID (insn), max_skip);
28156 if (max_skip)
28158 while (nbytes + max_skip >= 16)
28160 start = NEXT_INSN (start);
28161 if ((JUMP_P (start)
28162 && GET_CODE (PATTERN (start)) != ADDR_VEC
28163 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28164 || CALL_P (start))
28165 njumps--, isjump = 1;
28166 else
28167 isjump = 0;
28168 nbytes -= min_insn_size (start);
28171 continue;
28174 min_size = min_insn_size (insn);
28175 nbytes += min_size;
28176 if (dump_file)
28177 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
28178 INSN_UID (insn), min_size);
28179 if ((JUMP_P (insn)
28180 && GET_CODE (PATTERN (insn)) != ADDR_VEC
28181 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
28182 || CALL_P (insn))
28183 njumps++;
28184 else
28185 continue;
28187 while (njumps > 3)
28189 start = NEXT_INSN (start);
28190 if ((JUMP_P (start)
28191 && GET_CODE (PATTERN (start)) != ADDR_VEC
28192 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28193 || CALL_P (start))
28194 njumps--, isjump = 1;
28195 else
28196 isjump = 0;
28197 nbytes -= min_insn_size (start);
28199 gcc_assert (njumps >= 0);
28200 if (dump_file)
28201 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
28202 INSN_UID (start), INSN_UID (insn), nbytes);
28204 if (njumps == 3 && isjump && nbytes < 16)
28206 int padsize = 15 - nbytes + min_insn_size (insn);
28208 if (dump_file)
28209 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
28210 INSN_UID (insn), padsize);
28211 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
28215 #endif
28217 /* AMD Athlon works faster
28218 when RET is not destination of conditional jump or directly preceded
28219 by other jump instruction. We avoid the penalty by inserting NOP just
28220 before the RET instructions in such cases. */
28221 static void
28222 ix86_pad_returns (void)
28224 edge e;
28225 edge_iterator ei;
28227 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28229 basic_block bb = e->src;
28230 rtx ret = BB_END (bb);
28231 rtx prev;
28232 bool replace = false;
28234 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
28235 || optimize_bb_for_size_p (bb))
28236 continue;
28237 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
28238 if (active_insn_p (prev) || LABEL_P (prev))
28239 break;
28240 if (prev && LABEL_P (prev))
28242 edge e;
28243 edge_iterator ei;
28245 FOR_EACH_EDGE (e, ei, bb->preds)
28246 if (EDGE_FREQUENCY (e) && e->src->index >= 0
28247 && !(e->flags & EDGE_FALLTHRU))
28248 replace = true;
28250 if (!replace)
28252 prev = prev_active_insn (ret);
28253 if (prev
28254 && ((JUMP_P (prev) && any_condjump_p (prev))
28255 || CALL_P (prev)))
28256 replace = true;
28257 /* Empty functions get branch mispredict even when the jump destination
28258 is not visible to us. */
28259 if (!prev && !optimize_function_for_size_p (cfun))
28260 replace = true;
28262 if (replace)
28264 emit_jump_insn_before (gen_return_internal_long (), ret);
28265 delete_insn (ret);
28270 /* Count the minimum number of instructions in BB. Return 4 if the
28271 number of instructions >= 4. */
28273 static int
28274 ix86_count_insn_bb (basic_block bb)
28276 rtx insn;
28277 int insn_count = 0;
28279 /* Count number of instructions in this block. Return 4 if the number
28280 of instructions >= 4. */
28281 FOR_BB_INSNS (bb, insn)
28283 /* Only happen in exit blocks. */
28284 if (JUMP_P (insn)
28285 && GET_CODE (PATTERN (insn)) == RETURN)
28286 break;
28288 if (NONDEBUG_INSN_P (insn)
28289 && GET_CODE (PATTERN (insn)) != USE
28290 && GET_CODE (PATTERN (insn)) != CLOBBER)
28292 insn_count++;
28293 if (insn_count >= 4)
28294 return insn_count;
28298 return insn_count;
28302 /* Count the minimum number of instructions in code path in BB.
28303 Return 4 if the number of instructions >= 4. */
28305 static int
28306 ix86_count_insn (basic_block bb)
28308 edge e;
28309 edge_iterator ei;
28310 int min_prev_count;
28312 /* Only bother counting instructions along paths with no
28313 more than 2 basic blocks between entry and exit. Given
28314 that BB has an edge to exit, determine if a predecessor
28315 of BB has an edge from entry. If so, compute the number
28316 of instructions in the predecessor block. If there
28317 happen to be multiple such blocks, compute the minimum. */
28318 min_prev_count = 4;
28319 FOR_EACH_EDGE (e, ei, bb->preds)
28321 edge prev_e;
28322 edge_iterator prev_ei;
28324 if (e->src == ENTRY_BLOCK_PTR)
28326 min_prev_count = 0;
28327 break;
28329 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
28331 if (prev_e->src == ENTRY_BLOCK_PTR)
28333 int count = ix86_count_insn_bb (e->src);
28334 if (count < min_prev_count)
28335 min_prev_count = count;
28336 break;
28341 if (min_prev_count < 4)
28342 min_prev_count += ix86_count_insn_bb (bb);
28344 return min_prev_count;
28347 /* Pad short funtion to 4 instructions. */
28349 static void
28350 ix86_pad_short_function (void)
28352 edge e;
28353 edge_iterator ei;
28355 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28357 rtx ret = BB_END (e->src);
28358 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
28360 int insn_count = ix86_count_insn (e->src);
28362 /* Pad short function. */
28363 if (insn_count < 4)
28365 rtx insn = ret;
28367 /* Find epilogue. */
28368 while (insn
28369 && (!NOTE_P (insn)
28370 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
28371 insn = PREV_INSN (insn);
28373 if (!insn)
28374 insn = ret;
28376 /* Two NOPs are counted as one instruction. */
28377 insn_count = 2 * (4 - insn_count);
28378 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
28384 /* Implement machine specific optimizations. We implement padding of returns
28385 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
28386 static void
28387 ix86_reorg (void)
28389 if (optimize && optimize_function_for_speed_p (cfun))
28391 if (TARGET_PAD_SHORT_FUNCTION)
28392 ix86_pad_short_function ();
28393 else if (TARGET_PAD_RETURNS)
28394 ix86_pad_returns ();
28395 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28396 if (TARGET_FOUR_JUMP_LIMIT)
28397 ix86_avoid_jump_mispredicts ();
28398 #endif
28402 /* Return nonzero when QImode register that must be represented via REX prefix
28403 is used. */
28404 bool
28405 x86_extended_QIreg_mentioned_p (rtx insn)
28407 int i;
28408 extract_insn_cached (insn);
28409 for (i = 0; i < recog_data.n_operands; i++)
28410 if (REG_P (recog_data.operand[i])
28411 && REGNO (recog_data.operand[i]) > BX_REG)
28412 return true;
28413 return false;
28416 /* Return nonzero when P points to register encoded via REX prefix.
28417 Called via for_each_rtx. */
28418 static int
28419 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
28421 unsigned int regno;
28422 if (!REG_P (*p))
28423 return 0;
28424 regno = REGNO (*p);
28425 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
28428 /* Return true when INSN mentions register that must be encoded using REX
28429 prefix. */
28430 bool
28431 x86_extended_reg_mentioned_p (rtx insn)
28433 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
28434 extended_reg_mentioned_1, NULL);
28437 /* If profitable, negate (without causing overflow) integer constant
28438 of mode MODE at location LOC. Return true in this case. */
28439 bool
28440 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
28442 HOST_WIDE_INT val;
28444 if (!CONST_INT_P (*loc))
28445 return false;
28447 switch (mode)
28449 case DImode:
28450 /* DImode x86_64 constants must fit in 32 bits. */
28451 gcc_assert (x86_64_immediate_operand (*loc, mode));
28453 mode = SImode;
28454 break;
28456 case SImode:
28457 case HImode:
28458 case QImode:
28459 break;
28461 default:
28462 gcc_unreachable ();
28465 /* Avoid overflows. */
28466 if (mode_signbit_p (mode, *loc))
28467 return false;
28469 val = INTVAL (*loc);
28471 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
28472 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
28473 if ((val < 0 && val != -128)
28474 || val == 128)
28476 *loc = GEN_INT (-val);
28477 return true;
28480 return false;
28483 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
28484 optabs would emit if we didn't have TFmode patterns. */
28486 void
28487 x86_emit_floatuns (rtx operands[2])
28489 rtx neglab, donelab, i0, i1, f0, in, out;
28490 enum machine_mode mode, inmode;
28492 inmode = GET_MODE (operands[1]);
28493 gcc_assert (inmode == SImode || inmode == DImode);
28495 out = operands[0];
28496 in = force_reg (inmode, operands[1]);
28497 mode = GET_MODE (out);
28498 neglab = gen_label_rtx ();
28499 donelab = gen_label_rtx ();
28500 f0 = gen_reg_rtx (mode);
28502 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
28504 expand_float (out, in, 0);
28506 emit_jump_insn (gen_jump (donelab));
28507 emit_barrier ();
28509 emit_label (neglab);
28511 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
28512 1, OPTAB_DIRECT);
28513 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
28514 1, OPTAB_DIRECT);
28515 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
28517 expand_float (f0, i0, 0);
28519 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
28521 emit_label (donelab);
28524 /* AVX does not support 32-byte integer vector operations,
28525 thus the longest vector we are faced with is V16QImode. */
28526 #define MAX_VECT_LEN 16
28528 struct expand_vec_perm_d
28530 rtx target, op0, op1;
28531 unsigned char perm[MAX_VECT_LEN];
28532 enum machine_mode vmode;
28533 unsigned char nelt;
28534 bool testing_p;
28537 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
28538 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
28540 /* Get a vector mode of the same size as the original but with elements
28541 twice as wide. This is only guaranteed to apply to integral vectors. */
28543 static inline enum machine_mode
28544 get_mode_wider_vector (enum machine_mode o)
28546 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
28547 enum machine_mode n = GET_MODE_WIDER_MODE (o);
28548 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
28549 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
28550 return n;
28553 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28554 with all elements equal to VAR. Return true if successful. */
28556 static bool
28557 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
28558 rtx target, rtx val)
28560 bool ok;
28562 switch (mode)
28564 case V2SImode:
28565 case V2SFmode:
28566 if (!mmx_ok)
28567 return false;
28568 /* FALLTHRU */
28570 case V4DFmode:
28571 case V4DImode:
28572 case V8SFmode:
28573 case V8SImode:
28574 case V2DFmode:
28575 case V2DImode:
28576 case V4SFmode:
28577 case V4SImode:
28579 rtx insn, dup;
28581 /* First attempt to recognize VAL as-is. */
28582 dup = gen_rtx_VEC_DUPLICATE (mode, val);
28583 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
28584 if (recog_memoized (insn) < 0)
28586 rtx seq;
28587 /* If that fails, force VAL into a register. */
28589 start_sequence ();
28590 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
28591 seq = get_insns ();
28592 end_sequence ();
28593 if (seq)
28594 emit_insn_before (seq, insn);
28596 ok = recog_memoized (insn) >= 0;
28597 gcc_assert (ok);
28600 return true;
28602 case V4HImode:
28603 if (!mmx_ok)
28604 return false;
28605 if (TARGET_SSE || TARGET_3DNOW_A)
28607 rtx x;
28609 val = gen_lowpart (SImode, val);
28610 x = gen_rtx_TRUNCATE (HImode, val);
28611 x = gen_rtx_VEC_DUPLICATE (mode, x);
28612 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28613 return true;
28615 goto widen;
28617 case V8QImode:
28618 if (!mmx_ok)
28619 return false;
28620 goto widen;
28622 case V8HImode:
28623 if (TARGET_SSE2)
28625 struct expand_vec_perm_d dperm;
28626 rtx tmp1, tmp2;
28628 permute:
28629 memset (&dperm, 0, sizeof (dperm));
28630 dperm.target = target;
28631 dperm.vmode = mode;
28632 dperm.nelt = GET_MODE_NUNITS (mode);
28633 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28635 /* Extend to SImode using a paradoxical SUBREG. */
28636 tmp1 = gen_reg_rtx (SImode);
28637 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28639 /* Insert the SImode value as low element of a V4SImode vector. */
28640 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28641 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28643 ok = (expand_vec_perm_1 (&dperm)
28644 || expand_vec_perm_broadcast_1 (&dperm));
28645 gcc_assert (ok);
28646 return ok;
28648 goto widen;
28650 case V16QImode:
28651 if (TARGET_SSE2)
28652 goto permute;
28653 goto widen;
28655 widen:
28656 /* Replicate the value once into the next wider mode and recurse. */
28658 enum machine_mode smode, wsmode, wvmode;
28659 rtx x;
28661 smode = GET_MODE_INNER (mode);
28662 wvmode = get_mode_wider_vector (mode);
28663 wsmode = GET_MODE_INNER (wvmode);
28665 val = convert_modes (wsmode, smode, val, true);
28666 x = expand_simple_binop (wsmode, ASHIFT, val,
28667 GEN_INT (GET_MODE_BITSIZE (smode)),
28668 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28669 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28671 x = gen_lowpart (wvmode, target);
28672 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28673 gcc_assert (ok);
28674 return ok;
28677 case V16HImode:
28678 case V32QImode:
28680 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28681 rtx x = gen_reg_rtx (hvmode);
28683 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28684 gcc_assert (ok);
28686 x = gen_rtx_VEC_CONCAT (mode, x, x);
28687 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28689 return true;
28691 default:
28692 return false;
28696 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28697 whose ONE_VAR element is VAR, and other elements are zero. Return true
28698 if successful. */
28700 static bool
28701 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28702 rtx target, rtx var, int one_var)
28704 enum machine_mode vsimode;
28705 rtx new_target;
28706 rtx x, tmp;
28707 bool use_vector_set = false;
28709 switch (mode)
28711 case V2DImode:
28712 /* For SSE4.1, we normally use vector set. But if the second
28713 element is zero and inter-unit moves are OK, we use movq
28714 instead. */
28715 use_vector_set = (TARGET_64BIT
28716 && TARGET_SSE4_1
28717 && !(TARGET_INTER_UNIT_MOVES
28718 && one_var == 0));
28719 break;
28720 case V16QImode:
28721 case V4SImode:
28722 case V4SFmode:
28723 use_vector_set = TARGET_SSE4_1;
28724 break;
28725 case V8HImode:
28726 use_vector_set = TARGET_SSE2;
28727 break;
28728 case V4HImode:
28729 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28730 break;
28731 case V32QImode:
28732 case V16HImode:
28733 case V8SImode:
28734 case V8SFmode:
28735 case V4DFmode:
28736 use_vector_set = TARGET_AVX;
28737 break;
28738 case V4DImode:
28739 /* Use ix86_expand_vector_set in 64bit mode only. */
28740 use_vector_set = TARGET_AVX && TARGET_64BIT;
28741 break;
28742 default:
28743 break;
28746 if (use_vector_set)
28748 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28749 var = force_reg (GET_MODE_INNER (mode), var);
28750 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28751 return true;
28754 switch (mode)
28756 case V2SFmode:
28757 case V2SImode:
28758 if (!mmx_ok)
28759 return false;
28760 /* FALLTHRU */
28762 case V2DFmode:
28763 case V2DImode:
28764 if (one_var != 0)
28765 return false;
28766 var = force_reg (GET_MODE_INNER (mode), var);
28767 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28768 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28769 return true;
28771 case V4SFmode:
28772 case V4SImode:
28773 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28774 new_target = gen_reg_rtx (mode);
28775 else
28776 new_target = target;
28777 var = force_reg (GET_MODE_INNER (mode), var);
28778 x = gen_rtx_VEC_DUPLICATE (mode, var);
28779 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28780 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28781 if (one_var != 0)
28783 /* We need to shuffle the value to the correct position, so
28784 create a new pseudo to store the intermediate result. */
28786 /* With SSE2, we can use the integer shuffle insns. */
28787 if (mode != V4SFmode && TARGET_SSE2)
28789 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28790 const1_rtx,
28791 GEN_INT (one_var == 1 ? 0 : 1),
28792 GEN_INT (one_var == 2 ? 0 : 1),
28793 GEN_INT (one_var == 3 ? 0 : 1)));
28794 if (target != new_target)
28795 emit_move_insn (target, new_target);
28796 return true;
28799 /* Otherwise convert the intermediate result to V4SFmode and
28800 use the SSE1 shuffle instructions. */
28801 if (mode != V4SFmode)
28803 tmp = gen_reg_rtx (V4SFmode);
28804 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28806 else
28807 tmp = new_target;
28809 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28810 const1_rtx,
28811 GEN_INT (one_var == 1 ? 0 : 1),
28812 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28813 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28815 if (mode != V4SFmode)
28816 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28817 else if (tmp != target)
28818 emit_move_insn (target, tmp);
28820 else if (target != new_target)
28821 emit_move_insn (target, new_target);
28822 return true;
28824 case V8HImode:
28825 case V16QImode:
28826 vsimode = V4SImode;
28827 goto widen;
28828 case V4HImode:
28829 case V8QImode:
28830 if (!mmx_ok)
28831 return false;
28832 vsimode = V2SImode;
28833 goto widen;
28834 widen:
28835 if (one_var != 0)
28836 return false;
28838 /* Zero extend the variable element to SImode and recurse. */
28839 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28841 x = gen_reg_rtx (vsimode);
28842 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28843 var, one_var))
28844 gcc_unreachable ();
28846 emit_move_insn (target, gen_lowpart (mode, x));
28847 return true;
28849 default:
28850 return false;
28854 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28855 consisting of the values in VALS. It is known that all elements
28856 except ONE_VAR are constants. Return true if successful. */
28858 static bool
28859 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28860 rtx target, rtx vals, int one_var)
28862 rtx var = XVECEXP (vals, 0, one_var);
28863 enum machine_mode wmode;
28864 rtx const_vec, x;
28866 const_vec = copy_rtx (vals);
28867 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28868 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28870 switch (mode)
28872 case V2DFmode:
28873 case V2DImode:
28874 case V2SFmode:
28875 case V2SImode:
28876 /* For the two element vectors, it's just as easy to use
28877 the general case. */
28878 return false;
28880 case V4DImode:
28881 /* Use ix86_expand_vector_set in 64bit mode only. */
28882 if (!TARGET_64BIT)
28883 return false;
28884 case V4DFmode:
28885 case V8SFmode:
28886 case V8SImode:
28887 case V16HImode:
28888 case V32QImode:
28889 case V4SFmode:
28890 case V4SImode:
28891 case V8HImode:
28892 case V4HImode:
28893 break;
28895 case V16QImode:
28896 if (TARGET_SSE4_1)
28897 break;
28898 wmode = V8HImode;
28899 goto widen;
28900 case V8QImode:
28901 wmode = V4HImode;
28902 goto widen;
28903 widen:
28904 /* There's no way to set one QImode entry easily. Combine
28905 the variable value with its adjacent constant value, and
28906 promote to an HImode set. */
28907 x = XVECEXP (vals, 0, one_var ^ 1);
28908 if (one_var & 1)
28910 var = convert_modes (HImode, QImode, var, true);
28911 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28912 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28913 x = GEN_INT (INTVAL (x) & 0xff);
28915 else
28917 var = convert_modes (HImode, QImode, var, true);
28918 x = gen_int_mode (INTVAL (x) << 8, HImode);
28920 if (x != const0_rtx)
28921 var = expand_simple_binop (HImode, IOR, var, x, var,
28922 1, OPTAB_LIB_WIDEN);
28924 x = gen_reg_rtx (wmode);
28925 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28926 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28928 emit_move_insn (target, gen_lowpart (mode, x));
28929 return true;
28931 default:
28932 return false;
28935 emit_move_insn (target, const_vec);
28936 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28937 return true;
28940 /* A subroutine of ix86_expand_vector_init_general. Use vector
28941 concatenate to handle the most general case: all values variable,
28942 and none identical. */
28944 static void
28945 ix86_expand_vector_init_concat (enum machine_mode mode,
28946 rtx target, rtx *ops, int n)
28948 enum machine_mode cmode, hmode = VOIDmode;
28949 rtx first[8], second[4];
28950 rtvec v;
28951 int i, j;
28953 switch (n)
28955 case 2:
28956 switch (mode)
28958 case V8SImode:
28959 cmode = V4SImode;
28960 break;
28961 case V8SFmode:
28962 cmode = V4SFmode;
28963 break;
28964 case V4DImode:
28965 cmode = V2DImode;
28966 break;
28967 case V4DFmode:
28968 cmode = V2DFmode;
28969 break;
28970 case V4SImode:
28971 cmode = V2SImode;
28972 break;
28973 case V4SFmode:
28974 cmode = V2SFmode;
28975 break;
28976 case V2DImode:
28977 cmode = DImode;
28978 break;
28979 case V2SImode:
28980 cmode = SImode;
28981 break;
28982 case V2DFmode:
28983 cmode = DFmode;
28984 break;
28985 case V2SFmode:
28986 cmode = SFmode;
28987 break;
28988 default:
28989 gcc_unreachable ();
28992 if (!register_operand (ops[1], cmode))
28993 ops[1] = force_reg (cmode, ops[1]);
28994 if (!register_operand (ops[0], cmode))
28995 ops[0] = force_reg (cmode, ops[0]);
28996 emit_insn (gen_rtx_SET (VOIDmode, target,
28997 gen_rtx_VEC_CONCAT (mode, ops[0],
28998 ops[1])));
28999 break;
29001 case 4:
29002 switch (mode)
29004 case V4DImode:
29005 cmode = V2DImode;
29006 break;
29007 case V4DFmode:
29008 cmode = V2DFmode;
29009 break;
29010 case V4SImode:
29011 cmode = V2SImode;
29012 break;
29013 case V4SFmode:
29014 cmode = V2SFmode;
29015 break;
29016 default:
29017 gcc_unreachable ();
29019 goto half;
29021 case 8:
29022 switch (mode)
29024 case V8SImode:
29025 cmode = V2SImode;
29026 hmode = V4SImode;
29027 break;
29028 case V8SFmode:
29029 cmode = V2SFmode;
29030 hmode = V4SFmode;
29031 break;
29032 default:
29033 gcc_unreachable ();
29035 goto half;
29037 half:
29038 /* FIXME: We process inputs backward to help RA. PR 36222. */
29039 i = n - 1;
29040 j = (n >> 1) - 1;
29041 for (; i > 0; i -= 2, j--)
29043 first[j] = gen_reg_rtx (cmode);
29044 v = gen_rtvec (2, ops[i - 1], ops[i]);
29045 ix86_expand_vector_init (false, first[j],
29046 gen_rtx_PARALLEL (cmode, v));
29049 n >>= 1;
29050 if (n > 2)
29052 gcc_assert (hmode != VOIDmode);
29053 for (i = j = 0; i < n; i += 2, j++)
29055 second[j] = gen_reg_rtx (hmode);
29056 ix86_expand_vector_init_concat (hmode, second [j],
29057 &first [i], 2);
29059 n >>= 1;
29060 ix86_expand_vector_init_concat (mode, target, second, n);
29062 else
29063 ix86_expand_vector_init_concat (mode, target, first, n);
29064 break;
29066 default:
29067 gcc_unreachable ();
29071 /* A subroutine of ix86_expand_vector_init_general. Use vector
29072 interleave to handle the most general case: all values variable,
29073 and none identical. */
29075 static void
29076 ix86_expand_vector_init_interleave (enum machine_mode mode,
29077 rtx target, rtx *ops, int n)
29079 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
29080 int i, j;
29081 rtx op0, op1;
29082 rtx (*gen_load_even) (rtx, rtx, rtx);
29083 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
29084 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
29086 switch (mode)
29088 case V8HImode:
29089 gen_load_even = gen_vec_setv8hi;
29090 gen_interleave_first_low = gen_vec_interleave_lowv4si;
29091 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29092 inner_mode = HImode;
29093 first_imode = V4SImode;
29094 second_imode = V2DImode;
29095 third_imode = VOIDmode;
29096 break;
29097 case V16QImode:
29098 gen_load_even = gen_vec_setv16qi;
29099 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
29100 gen_interleave_second_low = gen_vec_interleave_lowv4si;
29101 inner_mode = QImode;
29102 first_imode = V8HImode;
29103 second_imode = V4SImode;
29104 third_imode = V2DImode;
29105 break;
29106 default:
29107 gcc_unreachable ();
29110 for (i = 0; i < n; i++)
29112 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
29113 op0 = gen_reg_rtx (SImode);
29114 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
29116 /* Insert the SImode value as low element of V4SImode vector. */
29117 op1 = gen_reg_rtx (V4SImode);
29118 op0 = gen_rtx_VEC_MERGE (V4SImode,
29119 gen_rtx_VEC_DUPLICATE (V4SImode,
29120 op0),
29121 CONST0_RTX (V4SImode),
29122 const1_rtx);
29123 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
29125 /* Cast the V4SImode vector back to a vector in orignal mode. */
29126 op0 = gen_reg_rtx (mode);
29127 emit_move_insn (op0, gen_lowpart (mode, op1));
29129 /* Load even elements into the second positon. */
29130 emit_insn (gen_load_even (op0,
29131 force_reg (inner_mode,
29132 ops [i + i + 1]),
29133 const1_rtx));
29135 /* Cast vector to FIRST_IMODE vector. */
29136 ops[i] = gen_reg_rtx (first_imode);
29137 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
29140 /* Interleave low FIRST_IMODE vectors. */
29141 for (i = j = 0; i < n; i += 2, j++)
29143 op0 = gen_reg_rtx (first_imode);
29144 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
29146 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
29147 ops[j] = gen_reg_rtx (second_imode);
29148 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
29151 /* Interleave low SECOND_IMODE vectors. */
29152 switch (second_imode)
29154 case V4SImode:
29155 for (i = j = 0; i < n / 2; i += 2, j++)
29157 op0 = gen_reg_rtx (second_imode);
29158 emit_insn (gen_interleave_second_low (op0, ops[i],
29159 ops[i + 1]));
29161 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
29162 vector. */
29163 ops[j] = gen_reg_rtx (third_imode);
29164 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
29166 second_imode = V2DImode;
29167 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29168 /* FALLTHRU */
29170 case V2DImode:
29171 op0 = gen_reg_rtx (second_imode);
29172 emit_insn (gen_interleave_second_low (op0, ops[0],
29173 ops[1]));
29175 /* Cast the SECOND_IMODE vector back to a vector on original
29176 mode. */
29177 emit_insn (gen_rtx_SET (VOIDmode, target,
29178 gen_lowpart (mode, op0)));
29179 break;
29181 default:
29182 gcc_unreachable ();
29186 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
29187 all values variable, and none identical. */
29189 static void
29190 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
29191 rtx target, rtx vals)
29193 rtx ops[32], op0, op1;
29194 enum machine_mode half_mode = VOIDmode;
29195 int n, i;
29197 switch (mode)
29199 case V2SFmode:
29200 case V2SImode:
29201 if (!mmx_ok && !TARGET_SSE)
29202 break;
29203 /* FALLTHRU */
29205 case V8SFmode:
29206 case V8SImode:
29207 case V4DFmode:
29208 case V4DImode:
29209 case V4SFmode:
29210 case V4SImode:
29211 case V2DFmode:
29212 case V2DImode:
29213 n = GET_MODE_NUNITS (mode);
29214 for (i = 0; i < n; i++)
29215 ops[i] = XVECEXP (vals, 0, i);
29216 ix86_expand_vector_init_concat (mode, target, ops, n);
29217 return;
29219 case V32QImode:
29220 half_mode = V16QImode;
29221 goto half;
29223 case V16HImode:
29224 half_mode = V8HImode;
29225 goto half;
29227 half:
29228 n = GET_MODE_NUNITS (mode);
29229 for (i = 0; i < n; i++)
29230 ops[i] = XVECEXP (vals, 0, i);
29231 op0 = gen_reg_rtx (half_mode);
29232 op1 = gen_reg_rtx (half_mode);
29233 ix86_expand_vector_init_interleave (half_mode, op0, ops,
29234 n >> 2);
29235 ix86_expand_vector_init_interleave (half_mode, op1,
29236 &ops [n >> 1], n >> 2);
29237 emit_insn (gen_rtx_SET (VOIDmode, target,
29238 gen_rtx_VEC_CONCAT (mode, op0, op1)));
29239 return;
29241 case V16QImode:
29242 if (!TARGET_SSE4_1)
29243 break;
29244 /* FALLTHRU */
29246 case V8HImode:
29247 if (!TARGET_SSE2)
29248 break;
29250 /* Don't use ix86_expand_vector_init_interleave if we can't
29251 move from GPR to SSE register directly. */
29252 if (!TARGET_INTER_UNIT_MOVES)
29253 break;
29255 n = GET_MODE_NUNITS (mode);
29256 for (i = 0; i < n; i++)
29257 ops[i] = XVECEXP (vals, 0, i);
29258 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
29259 return;
29261 case V4HImode:
29262 case V8QImode:
29263 break;
29265 default:
29266 gcc_unreachable ();
29270 int i, j, n_elts, n_words, n_elt_per_word;
29271 enum machine_mode inner_mode;
29272 rtx words[4], shift;
29274 inner_mode = GET_MODE_INNER (mode);
29275 n_elts = GET_MODE_NUNITS (mode);
29276 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
29277 n_elt_per_word = n_elts / n_words;
29278 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
29280 for (i = 0; i < n_words; ++i)
29282 rtx word = NULL_RTX;
29284 for (j = 0; j < n_elt_per_word; ++j)
29286 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
29287 elt = convert_modes (word_mode, inner_mode, elt, true);
29289 if (j == 0)
29290 word = elt;
29291 else
29293 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
29294 word, 1, OPTAB_LIB_WIDEN);
29295 word = expand_simple_binop (word_mode, IOR, word, elt,
29296 word, 1, OPTAB_LIB_WIDEN);
29300 words[i] = word;
29303 if (n_words == 1)
29304 emit_move_insn (target, gen_lowpart (mode, words[0]));
29305 else if (n_words == 2)
29307 rtx tmp = gen_reg_rtx (mode);
29308 emit_clobber (tmp);
29309 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
29310 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
29311 emit_move_insn (target, tmp);
29313 else if (n_words == 4)
29315 rtx tmp = gen_reg_rtx (V4SImode);
29316 gcc_assert (word_mode == SImode);
29317 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
29318 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
29319 emit_move_insn (target, gen_lowpart (mode, tmp));
29321 else
29322 gcc_unreachable ();
29326 /* Initialize vector TARGET via VALS. Suppress the use of MMX
29327 instructions unless MMX_OK is true. */
29329 void
29330 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
29332 enum machine_mode mode = GET_MODE (target);
29333 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29334 int n_elts = GET_MODE_NUNITS (mode);
29335 int n_var = 0, one_var = -1;
29336 bool all_same = true, all_const_zero = true;
29337 int i;
29338 rtx x;
29340 for (i = 0; i < n_elts; ++i)
29342 x = XVECEXP (vals, 0, i);
29343 if (!(CONST_INT_P (x)
29344 || GET_CODE (x) == CONST_DOUBLE
29345 || GET_CODE (x) == CONST_FIXED))
29346 n_var++, one_var = i;
29347 else if (x != CONST0_RTX (inner_mode))
29348 all_const_zero = false;
29349 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
29350 all_same = false;
29353 /* Constants are best loaded from the constant pool. */
29354 if (n_var == 0)
29356 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
29357 return;
29360 /* If all values are identical, broadcast the value. */
29361 if (all_same
29362 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
29363 XVECEXP (vals, 0, 0)))
29364 return;
29366 /* Values where only one field is non-constant are best loaded from
29367 the pool and overwritten via move later. */
29368 if (n_var == 1)
29370 if (all_const_zero
29371 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
29372 XVECEXP (vals, 0, one_var),
29373 one_var))
29374 return;
29376 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
29377 return;
29380 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
29383 void
29384 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
29386 enum machine_mode mode = GET_MODE (target);
29387 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29388 enum machine_mode half_mode;
29389 bool use_vec_merge = false;
29390 rtx tmp;
29391 static rtx (*gen_extract[6][2]) (rtx, rtx)
29393 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
29394 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
29395 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
29396 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
29397 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
29398 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
29400 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
29402 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
29403 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
29404 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
29405 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
29406 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
29407 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
29409 int i, j, n;
29411 switch (mode)
29413 case V2SFmode:
29414 case V2SImode:
29415 if (mmx_ok)
29417 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
29418 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
29419 if (elt == 0)
29420 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
29421 else
29422 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
29423 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29424 return;
29426 break;
29428 case V2DImode:
29429 use_vec_merge = TARGET_SSE4_1;
29430 if (use_vec_merge)
29431 break;
29433 case V2DFmode:
29435 rtx op0, op1;
29437 /* For the two element vectors, we implement a VEC_CONCAT with
29438 the extraction of the other element. */
29440 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
29441 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
29443 if (elt == 0)
29444 op0 = val, op1 = tmp;
29445 else
29446 op0 = tmp, op1 = val;
29448 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
29449 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29451 return;
29453 case V4SFmode:
29454 use_vec_merge = TARGET_SSE4_1;
29455 if (use_vec_merge)
29456 break;
29458 switch (elt)
29460 case 0:
29461 use_vec_merge = true;
29462 break;
29464 case 1:
29465 /* tmp = target = A B C D */
29466 tmp = copy_to_reg (target);
29467 /* target = A A B B */
29468 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
29469 /* target = X A B B */
29470 ix86_expand_vector_set (false, target, val, 0);
29471 /* target = A X C D */
29472 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29473 const1_rtx, const0_rtx,
29474 GEN_INT (2+4), GEN_INT (3+4)));
29475 return;
29477 case 2:
29478 /* tmp = target = A B C D */
29479 tmp = copy_to_reg (target);
29480 /* tmp = X B C D */
29481 ix86_expand_vector_set (false, tmp, val, 0);
29482 /* target = A B X D */
29483 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29484 const0_rtx, const1_rtx,
29485 GEN_INT (0+4), GEN_INT (3+4)));
29486 return;
29488 case 3:
29489 /* tmp = target = A B C D */
29490 tmp = copy_to_reg (target);
29491 /* tmp = X B C D */
29492 ix86_expand_vector_set (false, tmp, val, 0);
29493 /* target = A B X D */
29494 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29495 const0_rtx, const1_rtx,
29496 GEN_INT (2+4), GEN_INT (0+4)));
29497 return;
29499 default:
29500 gcc_unreachable ();
29502 break;
29504 case V4SImode:
29505 use_vec_merge = TARGET_SSE4_1;
29506 if (use_vec_merge)
29507 break;
29509 /* Element 0 handled by vec_merge below. */
29510 if (elt == 0)
29512 use_vec_merge = true;
29513 break;
29516 if (TARGET_SSE2)
29518 /* With SSE2, use integer shuffles to swap element 0 and ELT,
29519 store into element 0, then shuffle them back. */
29521 rtx order[4];
29523 order[0] = GEN_INT (elt);
29524 order[1] = const1_rtx;
29525 order[2] = const2_rtx;
29526 order[3] = GEN_INT (3);
29527 order[elt] = const0_rtx;
29529 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29530 order[1], order[2], order[3]));
29532 ix86_expand_vector_set (false, target, val, 0);
29534 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29535 order[1], order[2], order[3]));
29537 else
29539 /* For SSE1, we have to reuse the V4SF code. */
29540 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
29541 gen_lowpart (SFmode, val), elt);
29543 return;
29545 case V8HImode:
29546 use_vec_merge = TARGET_SSE2;
29547 break;
29548 case V4HImode:
29549 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29550 break;
29552 case V16QImode:
29553 use_vec_merge = TARGET_SSE4_1;
29554 break;
29556 case V8QImode:
29557 break;
29559 case V32QImode:
29560 half_mode = V16QImode;
29561 j = 0;
29562 n = 16;
29563 goto half;
29565 case V16HImode:
29566 half_mode = V8HImode;
29567 j = 1;
29568 n = 8;
29569 goto half;
29571 case V8SImode:
29572 half_mode = V4SImode;
29573 j = 2;
29574 n = 4;
29575 goto half;
29577 case V4DImode:
29578 half_mode = V2DImode;
29579 j = 3;
29580 n = 2;
29581 goto half;
29583 case V8SFmode:
29584 half_mode = V4SFmode;
29585 j = 4;
29586 n = 4;
29587 goto half;
29589 case V4DFmode:
29590 half_mode = V2DFmode;
29591 j = 5;
29592 n = 2;
29593 goto half;
29595 half:
29596 /* Compute offset. */
29597 i = elt / n;
29598 elt %= n;
29600 gcc_assert (i <= 1);
29602 /* Extract the half. */
29603 tmp = gen_reg_rtx (half_mode);
29604 emit_insn (gen_extract[j][i] (tmp, target));
29606 /* Put val in tmp at elt. */
29607 ix86_expand_vector_set (false, tmp, val, elt);
29609 /* Put it back. */
29610 emit_insn (gen_insert[j][i] (target, target, tmp));
29611 return;
29613 default:
29614 break;
29617 if (use_vec_merge)
29619 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29620 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29621 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29623 else
29625 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29627 emit_move_insn (mem, target);
29629 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29630 emit_move_insn (tmp, val);
29632 emit_move_insn (target, mem);
29636 void
29637 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29639 enum machine_mode mode = GET_MODE (vec);
29640 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29641 bool use_vec_extr = false;
29642 rtx tmp;
29644 switch (mode)
29646 case V2SImode:
29647 case V2SFmode:
29648 if (!mmx_ok)
29649 break;
29650 /* FALLTHRU */
29652 case V2DFmode:
29653 case V2DImode:
29654 use_vec_extr = true;
29655 break;
29657 case V4SFmode:
29658 use_vec_extr = TARGET_SSE4_1;
29659 if (use_vec_extr)
29660 break;
29662 switch (elt)
29664 case 0:
29665 tmp = vec;
29666 break;
29668 case 1:
29669 case 3:
29670 tmp = gen_reg_rtx (mode);
29671 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29672 GEN_INT (elt), GEN_INT (elt),
29673 GEN_INT (elt+4), GEN_INT (elt+4)));
29674 break;
29676 case 2:
29677 tmp = gen_reg_rtx (mode);
29678 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29679 break;
29681 default:
29682 gcc_unreachable ();
29684 vec = tmp;
29685 use_vec_extr = true;
29686 elt = 0;
29687 break;
29689 case V4SImode:
29690 use_vec_extr = TARGET_SSE4_1;
29691 if (use_vec_extr)
29692 break;
29694 if (TARGET_SSE2)
29696 switch (elt)
29698 case 0:
29699 tmp = vec;
29700 break;
29702 case 1:
29703 case 3:
29704 tmp = gen_reg_rtx (mode);
29705 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29706 GEN_INT (elt), GEN_INT (elt),
29707 GEN_INT (elt), GEN_INT (elt)));
29708 break;
29710 case 2:
29711 tmp = gen_reg_rtx (mode);
29712 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29713 break;
29715 default:
29716 gcc_unreachable ();
29718 vec = tmp;
29719 use_vec_extr = true;
29720 elt = 0;
29722 else
29724 /* For SSE1, we have to reuse the V4SF code. */
29725 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29726 gen_lowpart (V4SFmode, vec), elt);
29727 return;
29729 break;
29731 case V8HImode:
29732 use_vec_extr = TARGET_SSE2;
29733 break;
29734 case V4HImode:
29735 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29736 break;
29738 case V16QImode:
29739 use_vec_extr = TARGET_SSE4_1;
29740 break;
29742 case V8QImode:
29743 /* ??? Could extract the appropriate HImode element and shift. */
29744 default:
29745 break;
29748 if (use_vec_extr)
29750 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29751 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29753 /* Let the rtl optimizers know about the zero extension performed. */
29754 if (inner_mode == QImode || inner_mode == HImode)
29756 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29757 target = gen_lowpart (SImode, target);
29760 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29762 else
29764 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29766 emit_move_insn (mem, vec);
29768 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29769 emit_move_insn (target, tmp);
29773 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29774 pattern to reduce; DEST is the destination; IN is the input vector. */
29776 void
29777 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29779 rtx tmp1, tmp2, tmp3;
29781 tmp1 = gen_reg_rtx (V4SFmode);
29782 tmp2 = gen_reg_rtx (V4SFmode);
29783 tmp3 = gen_reg_rtx (V4SFmode);
29785 emit_insn (gen_sse_movhlps (tmp1, in, in));
29786 emit_insn (fn (tmp2, tmp1, in));
29788 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29789 const1_rtx, const1_rtx,
29790 GEN_INT (1+4), GEN_INT (1+4)));
29791 emit_insn (fn (dest, tmp2, tmp3));
29794 /* Target hook for scalar_mode_supported_p. */
29795 static bool
29796 ix86_scalar_mode_supported_p (enum machine_mode mode)
29798 if (DECIMAL_FLOAT_MODE_P (mode))
29799 return default_decimal_float_supported_p ();
29800 else if (mode == TFmode)
29801 return true;
29802 else
29803 return default_scalar_mode_supported_p (mode);
29806 /* Implements target hook vector_mode_supported_p. */
29807 static bool
29808 ix86_vector_mode_supported_p (enum machine_mode mode)
29810 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29811 return true;
29812 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29813 return true;
29814 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29815 return true;
29816 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29817 return true;
29818 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29819 return true;
29820 return false;
29823 /* Target hook for c_mode_for_suffix. */
29824 static enum machine_mode
29825 ix86_c_mode_for_suffix (char suffix)
29827 if (suffix == 'q')
29828 return TFmode;
29829 if (suffix == 'w')
29830 return XFmode;
29832 return VOIDmode;
29835 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29837 We do this in the new i386 backend to maintain source compatibility
29838 with the old cc0-based compiler. */
29840 static tree
29841 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29842 tree inputs ATTRIBUTE_UNUSED,
29843 tree clobbers)
29845 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29846 clobbers);
29847 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29848 clobbers);
29849 return clobbers;
29852 /* Implements target vector targetm.asm.encode_section_info. This
29853 is not used by netware. */
29855 static void ATTRIBUTE_UNUSED
29856 ix86_encode_section_info (tree decl, rtx rtl, int first)
29858 default_encode_section_info (decl, rtl, first);
29860 if (TREE_CODE (decl) == VAR_DECL
29861 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29862 && ix86_in_large_data_p (decl))
29863 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29866 /* Worker function for REVERSE_CONDITION. */
29868 enum rtx_code
29869 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29871 return (mode != CCFPmode && mode != CCFPUmode
29872 ? reverse_condition (code)
29873 : reverse_condition_maybe_unordered (code));
29876 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29877 to OPERANDS[0]. */
29879 const char *
29880 output_387_reg_move (rtx insn, rtx *operands)
29882 if (REG_P (operands[0]))
29884 if (REG_P (operands[1])
29885 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29887 if (REGNO (operands[0]) == FIRST_STACK_REG)
29888 return output_387_ffreep (operands, 0);
29889 return "fstp\t%y0";
29891 if (STACK_TOP_P (operands[0]))
29892 return "fld%Z1\t%y1";
29893 return "fst\t%y0";
29895 else if (MEM_P (operands[0]))
29897 gcc_assert (REG_P (operands[1]));
29898 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29899 return "fstp%Z0\t%y0";
29900 else
29902 /* There is no non-popping store to memory for XFmode.
29903 So if we need one, follow the store with a load. */
29904 if (GET_MODE (operands[0]) == XFmode)
29905 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29906 else
29907 return "fst%Z0\t%y0";
29910 else
29911 gcc_unreachable();
29914 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29915 FP status register is set. */
29917 void
29918 ix86_emit_fp_unordered_jump (rtx label)
29920 rtx reg = gen_reg_rtx (HImode);
29921 rtx temp;
29923 emit_insn (gen_x86_fnstsw_1 (reg));
29925 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29927 emit_insn (gen_x86_sahf_1 (reg));
29929 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29930 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29932 else
29934 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29936 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29937 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29940 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29941 gen_rtx_LABEL_REF (VOIDmode, label),
29942 pc_rtx);
29943 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29945 emit_jump_insn (temp);
29946 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29949 /* Output code to perform a log1p XFmode calculation. */
29951 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29953 rtx label1 = gen_label_rtx ();
29954 rtx label2 = gen_label_rtx ();
29956 rtx tmp = gen_reg_rtx (XFmode);
29957 rtx tmp2 = gen_reg_rtx (XFmode);
29958 rtx test;
29960 emit_insn (gen_absxf2 (tmp, op1));
29961 test = gen_rtx_GE (VOIDmode, tmp,
29962 CONST_DOUBLE_FROM_REAL_VALUE (
29963 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29964 XFmode));
29965 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29967 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29968 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29969 emit_jump (label2);
29971 emit_label (label1);
29972 emit_move_insn (tmp, CONST1_RTX (XFmode));
29973 emit_insn (gen_addxf3 (tmp, op1, tmp));
29974 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29975 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29977 emit_label (label2);
29980 /* Output code to perform a Newton-Rhapson approximation of a single precision
29981 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29983 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29985 rtx x0, x1, e0, e1, two;
29987 x0 = gen_reg_rtx (mode);
29988 e0 = gen_reg_rtx (mode);
29989 e1 = gen_reg_rtx (mode);
29990 x1 = gen_reg_rtx (mode);
29992 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29994 if (VECTOR_MODE_P (mode))
29995 two = ix86_build_const_vector (SFmode, true, two);
29997 two = force_reg (mode, two);
29999 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
30001 /* x0 = rcp(b) estimate */
30002 emit_insn (gen_rtx_SET (VOIDmode, x0,
30003 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
30004 UNSPEC_RCP)));
30005 /* e0 = x0 * a */
30006 emit_insn (gen_rtx_SET (VOIDmode, e0,
30007 gen_rtx_MULT (mode, x0, a)));
30008 /* e1 = x0 * b */
30009 emit_insn (gen_rtx_SET (VOIDmode, e1,
30010 gen_rtx_MULT (mode, x0, b)));
30011 /* x1 = 2. - e1 */
30012 emit_insn (gen_rtx_SET (VOIDmode, x1,
30013 gen_rtx_MINUS (mode, two, e1)));
30014 /* res = e0 * x1 */
30015 emit_insn (gen_rtx_SET (VOIDmode, res,
30016 gen_rtx_MULT (mode, e0, x1)));
30019 /* Output code to perform a Newton-Rhapson approximation of a
30020 single precision floating point [reciprocal] square root. */
30022 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
30023 bool recip)
30025 rtx x0, e0, e1, e2, e3, mthree, mhalf;
30026 REAL_VALUE_TYPE r;
30028 x0 = gen_reg_rtx (mode);
30029 e0 = gen_reg_rtx (mode);
30030 e1 = gen_reg_rtx (mode);
30031 e2 = gen_reg_rtx (mode);
30032 e3 = gen_reg_rtx (mode);
30034 real_from_integer (&r, VOIDmode, -3, -1, 0);
30035 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30037 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
30038 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30040 if (VECTOR_MODE_P (mode))
30042 mthree = ix86_build_const_vector (SFmode, true, mthree);
30043 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
30046 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
30047 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
30049 /* x0 = rsqrt(a) estimate */
30050 emit_insn (gen_rtx_SET (VOIDmode, x0,
30051 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
30052 UNSPEC_RSQRT)));
30054 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
30055 if (!recip)
30057 rtx zero, mask;
30059 zero = gen_reg_rtx (mode);
30060 mask = gen_reg_rtx (mode);
30062 zero = force_reg (mode, CONST0_RTX(mode));
30063 emit_insn (gen_rtx_SET (VOIDmode, mask,
30064 gen_rtx_NE (mode, zero, a)));
30066 emit_insn (gen_rtx_SET (VOIDmode, x0,
30067 gen_rtx_AND (mode, x0, mask)));
30070 /* e0 = x0 * a */
30071 emit_insn (gen_rtx_SET (VOIDmode, e0,
30072 gen_rtx_MULT (mode, x0, a)));
30073 /* e1 = e0 * x0 */
30074 emit_insn (gen_rtx_SET (VOIDmode, e1,
30075 gen_rtx_MULT (mode, e0, x0)));
30077 /* e2 = e1 - 3. */
30078 mthree = force_reg (mode, mthree);
30079 emit_insn (gen_rtx_SET (VOIDmode, e2,
30080 gen_rtx_PLUS (mode, e1, mthree)));
30082 mhalf = force_reg (mode, mhalf);
30083 if (recip)
30084 /* e3 = -.5 * x0 */
30085 emit_insn (gen_rtx_SET (VOIDmode, e3,
30086 gen_rtx_MULT (mode, x0, mhalf)));
30087 else
30088 /* e3 = -.5 * e0 */
30089 emit_insn (gen_rtx_SET (VOIDmode, e3,
30090 gen_rtx_MULT (mode, e0, mhalf)));
30091 /* ret = e2 * e3 */
30092 emit_insn (gen_rtx_SET (VOIDmode, res,
30093 gen_rtx_MULT (mode, e2, e3)));
30096 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
30098 static void ATTRIBUTE_UNUSED
30099 i386_solaris_elf_named_section (const char *name, unsigned int flags,
30100 tree decl)
30102 /* With Binutils 2.15, the "@unwind" marker must be specified on
30103 every occurrence of the ".eh_frame" section, not just the first
30104 one. */
30105 if (TARGET_64BIT
30106 && strcmp (name, ".eh_frame") == 0)
30108 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
30109 flags & SECTION_WRITE ? "aw" : "a");
30110 return;
30112 default_elf_asm_named_section (name, flags, decl);
30115 /* Return the mangling of TYPE if it is an extended fundamental type. */
30117 static const char *
30118 ix86_mangle_type (const_tree type)
30120 type = TYPE_MAIN_VARIANT (type);
30122 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30123 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30124 return NULL;
30126 switch (TYPE_MODE (type))
30128 case TFmode:
30129 /* __float128 is "g". */
30130 return "g";
30131 case XFmode:
30132 /* "long double" or __float80 is "e". */
30133 return "e";
30134 default:
30135 return NULL;
30139 /* For 32-bit code we can save PIC register setup by using
30140 __stack_chk_fail_local hidden function instead of calling
30141 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
30142 register, so it is better to call __stack_chk_fail directly. */
30144 static tree
30145 ix86_stack_protect_fail (void)
30147 return TARGET_64BIT
30148 ? default_external_stack_protect_fail ()
30149 : default_hidden_stack_protect_fail ();
30152 /* Select a format to encode pointers in exception handling data. CODE
30153 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
30154 true if the symbol may be affected by dynamic relocations.
30156 ??? All x86 object file formats are capable of representing this.
30157 After all, the relocation needed is the same as for the call insn.
30158 Whether or not a particular assembler allows us to enter such, I
30159 guess we'll have to see. */
30161 asm_preferred_eh_data_format (int code, int global)
30163 if (flag_pic)
30165 int type = DW_EH_PE_sdata8;
30166 if (!TARGET_64BIT
30167 || ix86_cmodel == CM_SMALL_PIC
30168 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
30169 type = DW_EH_PE_sdata4;
30170 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
30172 if (ix86_cmodel == CM_SMALL
30173 || (ix86_cmodel == CM_MEDIUM && code))
30174 return DW_EH_PE_udata4;
30175 return DW_EH_PE_absptr;
30178 /* Expand copysign from SIGN to the positive value ABS_VALUE
30179 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
30180 the sign-bit. */
30181 static void
30182 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
30184 enum machine_mode mode = GET_MODE (sign);
30185 rtx sgn = gen_reg_rtx (mode);
30186 if (mask == NULL_RTX)
30188 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
30189 if (!VECTOR_MODE_P (mode))
30191 /* We need to generate a scalar mode mask in this case. */
30192 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30193 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30194 mask = gen_reg_rtx (mode);
30195 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30198 else
30199 mask = gen_rtx_NOT (mode, mask);
30200 emit_insn (gen_rtx_SET (VOIDmode, sgn,
30201 gen_rtx_AND (mode, mask, sign)));
30202 emit_insn (gen_rtx_SET (VOIDmode, result,
30203 gen_rtx_IOR (mode, abs_value, sgn)));
30206 /* Expand fabs (OP0) and return a new rtx that holds the result. The
30207 mask for masking out the sign-bit is stored in *SMASK, if that is
30208 non-null. */
30209 static rtx
30210 ix86_expand_sse_fabs (rtx op0, rtx *smask)
30212 enum machine_mode mode = GET_MODE (op0);
30213 rtx xa, mask;
30215 xa = gen_reg_rtx (mode);
30216 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
30217 if (!VECTOR_MODE_P (mode))
30219 /* We need to generate a scalar mode mask in this case. */
30220 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30221 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30222 mask = gen_reg_rtx (mode);
30223 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30225 emit_insn (gen_rtx_SET (VOIDmode, xa,
30226 gen_rtx_AND (mode, op0, mask)));
30228 if (smask)
30229 *smask = mask;
30231 return xa;
30234 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
30235 swapping the operands if SWAP_OPERANDS is true. The expanded
30236 code is a forward jump to a newly created label in case the
30237 comparison is true. The generated label rtx is returned. */
30238 static rtx
30239 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
30240 bool swap_operands)
30242 rtx label, tmp;
30244 if (swap_operands)
30246 tmp = op0;
30247 op0 = op1;
30248 op1 = tmp;
30251 label = gen_label_rtx ();
30252 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
30253 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30254 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
30255 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
30256 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
30257 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
30258 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
30259 JUMP_LABEL (tmp) = label;
30261 return label;
30264 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
30265 using comparison code CODE. Operands are swapped for the comparison if
30266 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
30267 static rtx
30268 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
30269 bool swap_operands)
30271 enum machine_mode mode = GET_MODE (op0);
30272 rtx mask = gen_reg_rtx (mode);
30274 if (swap_operands)
30276 rtx tmp = op0;
30277 op0 = op1;
30278 op1 = tmp;
30281 if (mode == DFmode)
30282 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
30283 gen_rtx_fmt_ee (code, mode, op0, op1)));
30284 else
30285 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
30286 gen_rtx_fmt_ee (code, mode, op0, op1)));
30288 return mask;
30291 /* Generate and return a rtx of mode MODE for 2**n where n is the number
30292 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
30293 static rtx
30294 ix86_gen_TWO52 (enum machine_mode mode)
30296 REAL_VALUE_TYPE TWO52r;
30297 rtx TWO52;
30299 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
30300 TWO52 = const_double_from_real_value (TWO52r, mode);
30301 TWO52 = force_reg (mode, TWO52);
30303 return TWO52;
30306 /* Expand SSE sequence for computing lround from OP1 storing
30307 into OP0. */
30308 void
30309 ix86_expand_lround (rtx op0, rtx op1)
30311 /* C code for the stuff we're doing below:
30312 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
30313 return (long)tmp;
30315 enum machine_mode mode = GET_MODE (op1);
30316 const struct real_format *fmt;
30317 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30318 rtx adj;
30320 /* load nextafter (0.5, 0.0) */
30321 fmt = REAL_MODE_FORMAT (mode);
30322 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30323 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30325 /* adj = copysign (0.5, op1) */
30326 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
30327 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
30329 /* adj = op1 + adj */
30330 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
30332 /* op0 = (imode)adj */
30333 expand_fix (op0, adj, 0);
30336 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
30337 into OPERAND0. */
30338 void
30339 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
30341 /* C code for the stuff we're doing below (for do_floor):
30342 xi = (long)op1;
30343 xi -= (double)xi > op1 ? 1 : 0;
30344 return xi;
30346 enum machine_mode fmode = GET_MODE (op1);
30347 enum machine_mode imode = GET_MODE (op0);
30348 rtx ireg, freg, label, tmp;
30350 /* reg = (long)op1 */
30351 ireg = gen_reg_rtx (imode);
30352 expand_fix (ireg, op1, 0);
30354 /* freg = (double)reg */
30355 freg = gen_reg_rtx (fmode);
30356 expand_float (freg, ireg, 0);
30358 /* ireg = (freg > op1) ? ireg - 1 : ireg */
30359 label = ix86_expand_sse_compare_and_jump (UNLE,
30360 freg, op1, !do_floor);
30361 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
30362 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
30363 emit_move_insn (ireg, tmp);
30365 emit_label (label);
30366 LABEL_NUSES (label) = 1;
30368 emit_move_insn (op0, ireg);
30371 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
30372 result in OPERAND0. */
30373 void
30374 ix86_expand_rint (rtx operand0, rtx operand1)
30376 /* C code for the stuff we're doing below:
30377 xa = fabs (operand1);
30378 if (!isless (xa, 2**52))
30379 return operand1;
30380 xa = xa + 2**52 - 2**52;
30381 return copysign (xa, operand1);
30383 enum machine_mode mode = GET_MODE (operand0);
30384 rtx res, xa, label, TWO52, mask;
30386 res = gen_reg_rtx (mode);
30387 emit_move_insn (res, operand1);
30389 /* xa = abs (operand1) */
30390 xa = ix86_expand_sse_fabs (res, &mask);
30392 /* if (!isless (xa, TWO52)) goto label; */
30393 TWO52 = ix86_gen_TWO52 (mode);
30394 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30396 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30397 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30399 ix86_sse_copysign_to_positive (res, xa, res, mask);
30401 emit_label (label);
30402 LABEL_NUSES (label) = 1;
30404 emit_move_insn (operand0, res);
30407 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30408 into OPERAND0. */
30409 void
30410 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
30412 /* C code for the stuff we expand below.
30413 double xa = fabs (x), x2;
30414 if (!isless (xa, TWO52))
30415 return x;
30416 xa = xa + TWO52 - TWO52;
30417 x2 = copysign (xa, x);
30418 Compensate. Floor:
30419 if (x2 > x)
30420 x2 -= 1;
30421 Compensate. Ceil:
30422 if (x2 < x)
30423 x2 -= -1;
30424 return x2;
30426 enum machine_mode mode = GET_MODE (operand0);
30427 rtx xa, TWO52, tmp, label, one, res, mask;
30429 TWO52 = ix86_gen_TWO52 (mode);
30431 /* Temporary for holding the result, initialized to the input
30432 operand to ease control flow. */
30433 res = gen_reg_rtx (mode);
30434 emit_move_insn (res, operand1);
30436 /* xa = abs (operand1) */
30437 xa = ix86_expand_sse_fabs (res, &mask);
30439 /* if (!isless (xa, TWO52)) goto label; */
30440 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30442 /* xa = xa + TWO52 - TWO52; */
30443 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30444 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30446 /* xa = copysign (xa, operand1) */
30447 ix86_sse_copysign_to_positive (xa, xa, res, mask);
30449 /* generate 1.0 or -1.0 */
30450 one = force_reg (mode,
30451 const_double_from_real_value (do_floor
30452 ? dconst1 : dconstm1, mode));
30454 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30455 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30456 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30457 gen_rtx_AND (mode, one, tmp)));
30458 /* We always need to subtract here to preserve signed zero. */
30459 tmp = expand_simple_binop (mode, MINUS,
30460 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30461 emit_move_insn (res, tmp);
30463 emit_label (label);
30464 LABEL_NUSES (label) = 1;
30466 emit_move_insn (operand0, res);
30469 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30470 into OPERAND0. */
30471 void
30472 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
30474 /* C code for the stuff we expand below.
30475 double xa = fabs (x), x2;
30476 if (!isless (xa, TWO52))
30477 return x;
30478 x2 = (double)(long)x;
30479 Compensate. Floor:
30480 if (x2 > x)
30481 x2 -= 1;
30482 Compensate. Ceil:
30483 if (x2 < x)
30484 x2 += 1;
30485 if (HONOR_SIGNED_ZEROS (mode))
30486 return copysign (x2, x);
30487 return x2;
30489 enum machine_mode mode = GET_MODE (operand0);
30490 rtx xa, xi, TWO52, tmp, label, one, res, mask;
30492 TWO52 = ix86_gen_TWO52 (mode);
30494 /* Temporary for holding the result, initialized to the input
30495 operand to ease control flow. */
30496 res = gen_reg_rtx (mode);
30497 emit_move_insn (res, operand1);
30499 /* xa = abs (operand1) */
30500 xa = ix86_expand_sse_fabs (res, &mask);
30502 /* if (!isless (xa, TWO52)) goto label; */
30503 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30505 /* xa = (double)(long)x */
30506 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30507 expand_fix (xi, res, 0);
30508 expand_float (xa, xi, 0);
30510 /* generate 1.0 */
30511 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30513 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30514 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30515 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30516 gen_rtx_AND (mode, one, tmp)));
30517 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
30518 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30519 emit_move_insn (res, tmp);
30521 if (HONOR_SIGNED_ZEROS (mode))
30522 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30524 emit_label (label);
30525 LABEL_NUSES (label) = 1;
30527 emit_move_insn (operand0, res);
30530 /* Expand SSE sequence for computing round from OPERAND1 storing
30531 into OPERAND0. Sequence that works without relying on DImode truncation
30532 via cvttsd2siq that is only available on 64bit targets. */
30533 void
30534 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
30536 /* C code for the stuff we expand below.
30537 double xa = fabs (x), xa2, x2;
30538 if (!isless (xa, TWO52))
30539 return x;
30540 Using the absolute value and copying back sign makes
30541 -0.0 -> -0.0 correct.
30542 xa2 = xa + TWO52 - TWO52;
30543 Compensate.
30544 dxa = xa2 - xa;
30545 if (dxa <= -0.5)
30546 xa2 += 1;
30547 else if (dxa > 0.5)
30548 xa2 -= 1;
30549 x2 = copysign (xa2, x);
30550 return x2;
30552 enum machine_mode mode = GET_MODE (operand0);
30553 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
30555 TWO52 = ix86_gen_TWO52 (mode);
30557 /* Temporary for holding the result, initialized to the input
30558 operand to ease control flow. */
30559 res = gen_reg_rtx (mode);
30560 emit_move_insn (res, operand1);
30562 /* xa = abs (operand1) */
30563 xa = ix86_expand_sse_fabs (res, &mask);
30565 /* if (!isless (xa, TWO52)) goto label; */
30566 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30568 /* xa2 = xa + TWO52 - TWO52; */
30569 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30570 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
30572 /* dxa = xa2 - xa; */
30573 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
30575 /* generate 0.5, 1.0 and -0.5 */
30576 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
30577 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
30578 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
30579 0, OPTAB_DIRECT);
30581 /* Compensate. */
30582 tmp = gen_reg_rtx (mode);
30583 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
30584 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
30585 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30586 gen_rtx_AND (mode, one, tmp)));
30587 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30588 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
30589 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
30590 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30591 gen_rtx_AND (mode, one, tmp)));
30592 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30594 /* res = copysign (xa2, operand1) */
30595 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
30597 emit_label (label);
30598 LABEL_NUSES (label) = 1;
30600 emit_move_insn (operand0, res);
30603 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30604 into OPERAND0. */
30605 void
30606 ix86_expand_trunc (rtx operand0, rtx operand1)
30608 /* C code for SSE variant we expand below.
30609 double xa = fabs (x), x2;
30610 if (!isless (xa, TWO52))
30611 return x;
30612 x2 = (double)(long)x;
30613 if (HONOR_SIGNED_ZEROS (mode))
30614 return copysign (x2, x);
30615 return x2;
30617 enum machine_mode mode = GET_MODE (operand0);
30618 rtx xa, xi, TWO52, label, res, mask;
30620 TWO52 = ix86_gen_TWO52 (mode);
30622 /* Temporary for holding the result, initialized to the input
30623 operand to ease control flow. */
30624 res = gen_reg_rtx (mode);
30625 emit_move_insn (res, operand1);
30627 /* xa = abs (operand1) */
30628 xa = ix86_expand_sse_fabs (res, &mask);
30630 /* if (!isless (xa, TWO52)) goto label; */
30631 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30633 /* x = (double)(long)x */
30634 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30635 expand_fix (xi, res, 0);
30636 expand_float (res, xi, 0);
30638 if (HONOR_SIGNED_ZEROS (mode))
30639 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30641 emit_label (label);
30642 LABEL_NUSES (label) = 1;
30644 emit_move_insn (operand0, res);
30647 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30648 into OPERAND0. */
30649 void
30650 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30652 enum machine_mode mode = GET_MODE (operand0);
30653 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30655 /* C code for SSE variant we expand below.
30656 double xa = fabs (x), x2;
30657 if (!isless (xa, TWO52))
30658 return x;
30659 xa2 = xa + TWO52 - TWO52;
30660 Compensate:
30661 if (xa2 > xa)
30662 xa2 -= 1.0;
30663 x2 = copysign (xa2, x);
30664 return x2;
30667 TWO52 = ix86_gen_TWO52 (mode);
30669 /* Temporary for holding the result, initialized to the input
30670 operand to ease control flow. */
30671 res = gen_reg_rtx (mode);
30672 emit_move_insn (res, operand1);
30674 /* xa = abs (operand1) */
30675 xa = ix86_expand_sse_fabs (res, &smask);
30677 /* if (!isless (xa, TWO52)) goto label; */
30678 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30680 /* res = xa + TWO52 - TWO52; */
30681 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30682 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30683 emit_move_insn (res, tmp);
30685 /* generate 1.0 */
30686 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30688 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30689 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30690 emit_insn (gen_rtx_SET (VOIDmode, mask,
30691 gen_rtx_AND (mode, mask, one)));
30692 tmp = expand_simple_binop (mode, MINUS,
30693 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30694 emit_move_insn (res, tmp);
30696 /* res = copysign (res, operand1) */
30697 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30699 emit_label (label);
30700 LABEL_NUSES (label) = 1;
30702 emit_move_insn (operand0, res);
30705 /* Expand SSE sequence for computing round from OPERAND1 storing
30706 into OPERAND0. */
30707 void
30708 ix86_expand_round (rtx operand0, rtx operand1)
30710 /* C code for the stuff we're doing below:
30711 double xa = fabs (x);
30712 if (!isless (xa, TWO52))
30713 return x;
30714 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30715 return copysign (xa, x);
30717 enum machine_mode mode = GET_MODE (operand0);
30718 rtx res, TWO52, xa, label, xi, half, mask;
30719 const struct real_format *fmt;
30720 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30722 /* Temporary for holding the result, initialized to the input
30723 operand to ease control flow. */
30724 res = gen_reg_rtx (mode);
30725 emit_move_insn (res, operand1);
30727 TWO52 = ix86_gen_TWO52 (mode);
30728 xa = ix86_expand_sse_fabs (res, &mask);
30729 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30731 /* load nextafter (0.5, 0.0) */
30732 fmt = REAL_MODE_FORMAT (mode);
30733 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30734 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30736 /* xa = xa + 0.5 */
30737 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30738 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30740 /* xa = (double)(int64_t)xa */
30741 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30742 expand_fix (xi, xa, 0);
30743 expand_float (xa, xi, 0);
30745 /* res = copysign (xa, operand1) */
30746 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30748 emit_label (label);
30749 LABEL_NUSES (label) = 1;
30751 emit_move_insn (operand0, res);
30755 /* Table of valid machine attributes. */
30756 static const struct attribute_spec ix86_attribute_table[] =
30758 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30759 /* Stdcall attribute says callee is responsible for popping arguments
30760 if they are not variable. */
30761 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30762 /* Fastcall attribute says callee is responsible for popping arguments
30763 if they are not variable. */
30764 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30765 /* Thiscall attribute says callee is responsible for popping arguments
30766 if they are not variable. */
30767 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30768 /* Cdecl attribute says the callee is a normal C declaration */
30769 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30770 /* Regparm attribute specifies how many integer arguments are to be
30771 passed in registers. */
30772 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30773 /* Sseregparm attribute says we are using x86_64 calling conventions
30774 for FP arguments. */
30775 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30776 /* force_align_arg_pointer says this function realigns the stack at entry. */
30777 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30778 false, true, true, ix86_handle_cconv_attribute },
30779 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30780 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30781 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30782 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30783 #endif
30784 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30785 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30786 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30787 SUBTARGET_ATTRIBUTE_TABLE,
30788 #endif
30789 /* ms_abi and sysv_abi calling convention function attributes. */
30790 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30791 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30792 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30793 /* End element. */
30794 { NULL, 0, 0, false, false, false, NULL }
30797 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30798 static int
30799 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30800 tree vectype ATTRIBUTE_UNUSED,
30801 int misalign ATTRIBUTE_UNUSED)
30803 switch (type_of_cost)
30805 case scalar_stmt:
30806 return ix86_cost->scalar_stmt_cost;
30808 case scalar_load:
30809 return ix86_cost->scalar_load_cost;
30811 case scalar_store:
30812 return ix86_cost->scalar_store_cost;
30814 case vector_stmt:
30815 return ix86_cost->vec_stmt_cost;
30817 case vector_load:
30818 return ix86_cost->vec_align_load_cost;
30820 case vector_store:
30821 return ix86_cost->vec_store_cost;
30823 case vec_to_scalar:
30824 return ix86_cost->vec_to_scalar_cost;
30826 case scalar_to_vec:
30827 return ix86_cost->scalar_to_vec_cost;
30829 case unaligned_load:
30830 case unaligned_store:
30831 return ix86_cost->vec_unalign_load_cost;
30833 case cond_branch_taken:
30834 return ix86_cost->cond_taken_branch_cost;
30836 case cond_branch_not_taken:
30837 return ix86_cost->cond_not_taken_branch_cost;
30839 case vec_perm:
30840 return 1;
30842 default:
30843 gcc_unreachable ();
30848 /* Implement targetm.vectorize.builtin_vec_perm. */
30850 static tree
30851 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30853 tree itype = TREE_TYPE (vec_type);
30854 bool u = TYPE_UNSIGNED (itype);
30855 enum machine_mode vmode = TYPE_MODE (vec_type);
30856 enum ix86_builtins fcode;
30857 bool ok = TARGET_SSE2;
30859 switch (vmode)
30861 case V4DFmode:
30862 ok = TARGET_AVX;
30863 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30864 goto get_di;
30865 case V2DFmode:
30866 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30867 get_di:
30868 itype = ix86_get_builtin_type (IX86_BT_DI);
30869 break;
30871 case V8SFmode:
30872 ok = TARGET_AVX;
30873 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30874 goto get_si;
30875 case V4SFmode:
30876 ok = TARGET_SSE;
30877 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30878 get_si:
30879 itype = ix86_get_builtin_type (IX86_BT_SI);
30880 break;
30882 case V2DImode:
30883 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30884 break;
30885 case V4SImode:
30886 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30887 break;
30888 case V8HImode:
30889 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30890 break;
30891 case V16QImode:
30892 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30893 break;
30894 default:
30895 ok = false;
30896 break;
30899 if (!ok)
30900 return NULL_TREE;
30902 *mask_type = itype;
30903 return ix86_builtins[(int) fcode];
30906 /* Return a vector mode with twice as many elements as VMODE. */
30907 /* ??? Consider moving this to a table generated by genmodes.c. */
30909 static enum machine_mode
30910 doublesize_vector_mode (enum machine_mode vmode)
30912 switch (vmode)
30914 case V2SFmode: return V4SFmode;
30915 case V1DImode: return V2DImode;
30916 case V2SImode: return V4SImode;
30917 case V4HImode: return V8HImode;
30918 case V8QImode: return V16QImode;
30920 case V2DFmode: return V4DFmode;
30921 case V4SFmode: return V8SFmode;
30922 case V2DImode: return V4DImode;
30923 case V4SImode: return V8SImode;
30924 case V8HImode: return V16HImode;
30925 case V16QImode: return V32QImode;
30927 case V4DFmode: return V8DFmode;
30928 case V8SFmode: return V16SFmode;
30929 case V4DImode: return V8DImode;
30930 case V8SImode: return V16SImode;
30931 case V16HImode: return V32HImode;
30932 case V32QImode: return V64QImode;
30934 default:
30935 gcc_unreachable ();
30939 /* Construct (set target (vec_select op0 (parallel perm))) and
30940 return true if that's a valid instruction in the active ISA. */
30942 static bool
30943 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30945 rtx rperm[MAX_VECT_LEN], x;
30946 unsigned i;
30948 for (i = 0; i < nelt; ++i)
30949 rperm[i] = GEN_INT (perm[i]);
30951 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30952 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30953 x = gen_rtx_SET (VOIDmode, target, x);
30955 x = emit_insn (x);
30956 if (recog_memoized (x) < 0)
30958 remove_insn (x);
30959 return false;
30961 return true;
30964 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30966 static bool
30967 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30968 const unsigned char *perm, unsigned nelt)
30970 enum machine_mode v2mode;
30971 rtx x;
30973 v2mode = doublesize_vector_mode (GET_MODE (op0));
30974 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30975 return expand_vselect (target, x, perm, nelt);
30978 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30979 in terms of blendp[sd] / pblendw / pblendvb. */
30981 static bool
30982 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30984 enum machine_mode vmode = d->vmode;
30985 unsigned i, mask, nelt = d->nelt;
30986 rtx target, op0, op1, x;
30988 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30989 return false;
30990 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30991 return false;
30993 /* This is a blend, not a permute. Elements must stay in their
30994 respective lanes. */
30995 for (i = 0; i < nelt; ++i)
30997 unsigned e = d->perm[i];
30998 if (!(e == i || e == i + nelt))
30999 return false;
31002 if (d->testing_p)
31003 return true;
31005 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
31006 decision should be extracted elsewhere, so that we only try that
31007 sequence once all budget==3 options have been tried. */
31009 /* For bytes, see if bytes move in pairs so we can use pblendw with
31010 an immediate argument, rather than pblendvb with a vector argument. */
31011 if (vmode == V16QImode)
31013 bool pblendw_ok = true;
31014 for (i = 0; i < 16 && pblendw_ok; i += 2)
31015 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
31017 if (!pblendw_ok)
31019 rtx rperm[16], vperm;
31021 for (i = 0; i < nelt; ++i)
31022 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
31024 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31025 vperm = force_reg (V16QImode, vperm);
31027 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
31028 return true;
31032 target = d->target;
31033 op0 = d->op0;
31034 op1 = d->op1;
31035 mask = 0;
31037 switch (vmode)
31039 case V4DFmode:
31040 case V8SFmode:
31041 case V2DFmode:
31042 case V4SFmode:
31043 case V8HImode:
31044 for (i = 0; i < nelt; ++i)
31045 mask |= (d->perm[i] >= nelt) << i;
31046 break;
31048 case V2DImode:
31049 for (i = 0; i < 2; ++i)
31050 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
31051 goto do_subreg;
31053 case V4SImode:
31054 for (i = 0; i < 4; ++i)
31055 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
31056 goto do_subreg;
31058 case V16QImode:
31059 for (i = 0; i < 8; ++i)
31060 mask |= (d->perm[i * 2] >= 16) << i;
31062 do_subreg:
31063 vmode = V8HImode;
31064 target = gen_lowpart (vmode, target);
31065 op0 = gen_lowpart (vmode, op0);
31066 op1 = gen_lowpart (vmode, op1);
31067 break;
31069 default:
31070 gcc_unreachable ();
31073 /* This matches five different patterns with the different modes. */
31074 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
31075 x = gen_rtx_SET (VOIDmode, target, x);
31076 emit_insn (x);
31078 return true;
31081 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31082 in terms of the variable form of vpermilps.
31084 Note that we will have already failed the immediate input vpermilps,
31085 which requires that the high and low part shuffle be identical; the
31086 variable form doesn't require that. */
31088 static bool
31089 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
31091 rtx rperm[8], vperm;
31092 unsigned i;
31094 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
31095 return false;
31097 /* We can only permute within the 128-bit lane. */
31098 for (i = 0; i < 8; ++i)
31100 unsigned e = d->perm[i];
31101 if (i < 4 ? e >= 4 : e < 4)
31102 return false;
31105 if (d->testing_p)
31106 return true;
31108 for (i = 0; i < 8; ++i)
31110 unsigned e = d->perm[i];
31112 /* Within each 128-bit lane, the elements of op0 are numbered
31113 from 0 and the elements of op1 are numbered from 4. */
31114 if (e >= 8 + 4)
31115 e -= 8;
31116 else if (e >= 4)
31117 e -= 4;
31119 rperm[i] = GEN_INT (e);
31122 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
31123 vperm = force_reg (V8SImode, vperm);
31124 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
31126 return true;
31129 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31130 in terms of pshufb or vpperm. */
31132 static bool
31133 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
31135 unsigned i, nelt, eltsz;
31136 rtx rperm[16], vperm, target, op0, op1;
31138 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
31139 return false;
31140 if (GET_MODE_SIZE (d->vmode) != 16)
31141 return false;
31143 if (d->testing_p)
31144 return true;
31146 nelt = d->nelt;
31147 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31149 for (i = 0; i < nelt; ++i)
31151 unsigned j, e = d->perm[i];
31152 for (j = 0; j < eltsz; ++j)
31153 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
31156 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31157 vperm = force_reg (V16QImode, vperm);
31159 target = gen_lowpart (V16QImode, d->target);
31160 op0 = gen_lowpart (V16QImode, d->op0);
31161 if (d->op0 == d->op1)
31162 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
31163 else
31165 op1 = gen_lowpart (V16QImode, d->op1);
31166 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
31169 return true;
31172 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
31173 in a single instruction. */
31175 static bool
31176 expand_vec_perm_1 (struct expand_vec_perm_d *d)
31178 unsigned i, nelt = d->nelt;
31179 unsigned char perm2[MAX_VECT_LEN];
31181 /* Check plain VEC_SELECT first, because AVX has instructions that could
31182 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
31183 input where SEL+CONCAT may not. */
31184 if (d->op0 == d->op1)
31186 int mask = nelt - 1;
31188 for (i = 0; i < nelt; i++)
31189 perm2[i] = d->perm[i] & mask;
31191 if (expand_vselect (d->target, d->op0, perm2, nelt))
31192 return true;
31194 /* There are plenty of patterns in sse.md that are written for
31195 SEL+CONCAT and are not replicated for a single op. Perhaps
31196 that should be changed, to avoid the nastiness here. */
31198 /* Recognize interleave style patterns, which means incrementing
31199 every other permutation operand. */
31200 for (i = 0; i < nelt; i += 2)
31202 perm2[i] = d->perm[i] & mask;
31203 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
31205 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31206 return true;
31208 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
31209 if (nelt >= 4)
31211 for (i = 0; i < nelt; i += 4)
31213 perm2[i + 0] = d->perm[i + 0] & mask;
31214 perm2[i + 1] = d->perm[i + 1] & mask;
31215 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
31216 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
31219 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31220 return true;
31224 /* Finally, try the fully general two operand permute. */
31225 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
31226 return true;
31228 /* Recognize interleave style patterns with reversed operands. */
31229 if (d->op0 != d->op1)
31231 for (i = 0; i < nelt; ++i)
31233 unsigned e = d->perm[i];
31234 if (e >= nelt)
31235 e -= nelt;
31236 else
31237 e += nelt;
31238 perm2[i] = e;
31241 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
31242 return true;
31245 /* Try the SSE4.1 blend variable merge instructions. */
31246 if (expand_vec_perm_blend (d))
31247 return true;
31249 /* Try one of the AVX vpermil variable permutations. */
31250 if (expand_vec_perm_vpermil (d))
31251 return true;
31253 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
31254 if (expand_vec_perm_pshufb (d))
31255 return true;
31257 return false;
31260 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31261 in terms of a pair of pshuflw + pshufhw instructions. */
31263 static bool
31264 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
31266 unsigned char perm2[MAX_VECT_LEN];
31267 unsigned i;
31268 bool ok;
31270 if (d->vmode != V8HImode || d->op0 != d->op1)
31271 return false;
31273 /* The two permutations only operate in 64-bit lanes. */
31274 for (i = 0; i < 4; ++i)
31275 if (d->perm[i] >= 4)
31276 return false;
31277 for (i = 4; i < 8; ++i)
31278 if (d->perm[i] < 4)
31279 return false;
31281 if (d->testing_p)
31282 return true;
31284 /* Emit the pshuflw. */
31285 memcpy (perm2, d->perm, 4);
31286 for (i = 4; i < 8; ++i)
31287 perm2[i] = i;
31288 ok = expand_vselect (d->target, d->op0, perm2, 8);
31289 gcc_assert (ok);
31291 /* Emit the pshufhw. */
31292 memcpy (perm2 + 4, d->perm + 4, 4);
31293 for (i = 0; i < 4; ++i)
31294 perm2[i] = i;
31295 ok = expand_vselect (d->target, d->target, perm2, 8);
31296 gcc_assert (ok);
31298 return true;
31301 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31302 the permutation using the SSSE3 palignr instruction. This succeeds
31303 when all of the elements in PERM fit within one vector and we merely
31304 need to shift them down so that a single vector permutation has a
31305 chance to succeed. */
31307 static bool
31308 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
31310 unsigned i, nelt = d->nelt;
31311 unsigned min, max;
31312 bool in_order, ok;
31313 rtx shift;
31315 /* Even with AVX, palignr only operates on 128-bit vectors. */
31316 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31317 return false;
31319 min = nelt, max = 0;
31320 for (i = 0; i < nelt; ++i)
31322 unsigned e = d->perm[i];
31323 if (e < min)
31324 min = e;
31325 if (e > max)
31326 max = e;
31328 if (min == 0 || max - min >= nelt)
31329 return false;
31331 /* Given that we have SSSE3, we know we'll be able to implement the
31332 single operand permutation after the palignr with pshufb. */
31333 if (d->testing_p)
31334 return true;
31336 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
31337 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
31338 gen_lowpart (TImode, d->op1),
31339 gen_lowpart (TImode, d->op0), shift));
31341 d->op0 = d->op1 = d->target;
31343 in_order = true;
31344 for (i = 0; i < nelt; ++i)
31346 unsigned e = d->perm[i] - min;
31347 if (e != i)
31348 in_order = false;
31349 d->perm[i] = e;
31352 /* Test for the degenerate case where the alignment by itself
31353 produces the desired permutation. */
31354 if (in_order)
31355 return true;
31357 ok = expand_vec_perm_1 (d);
31358 gcc_assert (ok);
31360 return ok;
31363 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31364 a two vector permutation into a single vector permutation by using
31365 an interleave operation to merge the vectors. */
31367 static bool
31368 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
31370 struct expand_vec_perm_d dremap, dfinal;
31371 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
31372 unsigned contents, h1, h2, h3, h4;
31373 unsigned char remap[2 * MAX_VECT_LEN];
31374 rtx seq;
31375 bool ok;
31377 if (d->op0 == d->op1)
31378 return false;
31380 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
31381 lanes. We can use similar techniques with the vperm2f128 instruction,
31382 but it requires slightly different logic. */
31383 if (GET_MODE_SIZE (d->vmode) != 16)
31384 return false;
31386 /* Examine from whence the elements come. */
31387 contents = 0;
31388 for (i = 0; i < nelt; ++i)
31389 contents |= 1u << d->perm[i];
31391 /* Split the two input vectors into 4 halves. */
31392 h1 = (1u << nelt2) - 1;
31393 h2 = h1 << nelt2;
31394 h3 = h2 << nelt2;
31395 h4 = h3 << nelt2;
31397 memset (remap, 0xff, sizeof (remap));
31398 dremap = *d;
31400 /* If the elements from the low halves use interleave low, and similarly
31401 for interleave high. If the elements are from mis-matched halves, we
31402 can use shufps for V4SF/V4SI or do a DImode shuffle. */
31403 if ((contents & (h1 | h3)) == contents)
31405 for (i = 0; i < nelt2; ++i)
31407 remap[i] = i * 2;
31408 remap[i + nelt] = i * 2 + 1;
31409 dremap.perm[i * 2] = i;
31410 dremap.perm[i * 2 + 1] = i + nelt;
31413 else if ((contents & (h2 | h4)) == contents)
31415 for (i = 0; i < nelt2; ++i)
31417 remap[i + nelt2] = i * 2;
31418 remap[i + nelt + nelt2] = i * 2 + 1;
31419 dremap.perm[i * 2] = i + nelt2;
31420 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
31423 else if ((contents & (h1 | h4)) == contents)
31425 for (i = 0; i < nelt2; ++i)
31427 remap[i] = i;
31428 remap[i + nelt + nelt2] = i + nelt2;
31429 dremap.perm[i] = i;
31430 dremap.perm[i + nelt2] = i + nelt + nelt2;
31432 if (nelt != 4)
31434 dremap.vmode = V2DImode;
31435 dremap.nelt = 2;
31436 dremap.perm[0] = 0;
31437 dremap.perm[1] = 3;
31440 else if ((contents & (h2 | h3)) == contents)
31442 for (i = 0; i < nelt2; ++i)
31444 remap[i + nelt2] = i;
31445 remap[i + nelt] = i + nelt2;
31446 dremap.perm[i] = i + nelt2;
31447 dremap.perm[i + nelt2] = i + nelt;
31449 if (nelt != 4)
31451 dremap.vmode = V2DImode;
31452 dremap.nelt = 2;
31453 dremap.perm[0] = 1;
31454 dremap.perm[1] = 2;
31457 else
31458 return false;
31460 /* Use the remapping array set up above to move the elements from their
31461 swizzled locations into their final destinations. */
31462 dfinal = *d;
31463 for (i = 0; i < nelt; ++i)
31465 unsigned e = remap[d->perm[i]];
31466 gcc_assert (e < nelt);
31467 dfinal.perm[i] = e;
31469 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
31470 dfinal.op1 = dfinal.op0;
31471 dremap.target = dfinal.op0;
31473 /* Test if the final remap can be done with a single insn. For V4SFmode or
31474 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
31475 start_sequence ();
31476 ok = expand_vec_perm_1 (&dfinal);
31477 seq = get_insns ();
31478 end_sequence ();
31480 if (!ok)
31481 return false;
31483 if (dremap.vmode != dfinal.vmode)
31485 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
31486 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
31487 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
31490 ok = expand_vec_perm_1 (&dremap);
31491 gcc_assert (ok);
31493 emit_insn (seq);
31494 return true;
31497 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
31498 permutation with two pshufb insns and an ior. We should have already
31499 failed all two instruction sequences. */
31501 static bool
31502 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
31504 rtx rperm[2][16], vperm, l, h, op, m128;
31505 unsigned int i, nelt, eltsz;
31507 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31508 return false;
31509 gcc_assert (d->op0 != d->op1);
31511 nelt = d->nelt;
31512 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31514 /* Generate two permutation masks. If the required element is within
31515 the given vector it is shuffled into the proper lane. If the required
31516 element is in the other vector, force a zero into the lane by setting
31517 bit 7 in the permutation mask. */
31518 m128 = GEN_INT (-128);
31519 for (i = 0; i < nelt; ++i)
31521 unsigned j, e = d->perm[i];
31522 unsigned which = (e >= nelt);
31523 if (e >= nelt)
31524 e -= nelt;
31526 for (j = 0; j < eltsz; ++j)
31528 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
31529 rperm[1-which][i*eltsz + j] = m128;
31533 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
31534 vperm = force_reg (V16QImode, vperm);
31536 l = gen_reg_rtx (V16QImode);
31537 op = gen_lowpart (V16QImode, d->op0);
31538 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
31540 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
31541 vperm = force_reg (V16QImode, vperm);
31543 h = gen_reg_rtx (V16QImode);
31544 op = gen_lowpart (V16QImode, d->op1);
31545 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
31547 op = gen_lowpart (V16QImode, d->target);
31548 emit_insn (gen_iorv16qi3 (op, l, h));
31550 return true;
31553 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
31554 and extract-odd permutations. */
31556 static bool
31557 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
31559 rtx t1, t2, t3, t4;
31561 switch (d->vmode)
31563 case V4DFmode:
31564 t1 = gen_reg_rtx (V4DFmode);
31565 t2 = gen_reg_rtx (V4DFmode);
31567 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
31568 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
31569 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
31571 /* Now an unpck[lh]pd will produce the result required. */
31572 if (odd)
31573 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
31574 else
31575 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
31576 emit_insn (t3);
31577 break;
31579 case V8SFmode:
31581 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
31582 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
31583 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
31585 t1 = gen_reg_rtx (V8SFmode);
31586 t2 = gen_reg_rtx (V8SFmode);
31587 t3 = gen_reg_rtx (V8SFmode);
31588 t4 = gen_reg_rtx (V8SFmode);
31590 /* Shuffle within the 128-bit lanes to produce:
31591 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
31592 expand_vselect (t1, d->op0, perm1, 8);
31593 expand_vselect (t2, d->op1, perm1, 8);
31595 /* Shuffle the lanes around to produce:
31596 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
31597 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
31598 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
31600 /* Now a vpermil2p will produce the result required. */
31601 /* ??? The vpermil2p requires a vector constant. Another option
31602 is a unpck[lh]ps to merge the two vectors to produce
31603 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
31604 vpermilps to get the elements into the final order. */
31605 d->op0 = t3;
31606 d->op1 = t4;
31607 memcpy (d->perm, odd ? permo: perme, 8);
31608 expand_vec_perm_vpermil (d);
31610 break;
31612 case V2DFmode:
31613 case V4SFmode:
31614 case V2DImode:
31615 case V4SImode:
31616 /* These are always directly implementable by expand_vec_perm_1. */
31617 gcc_unreachable ();
31619 case V8HImode:
31620 if (TARGET_SSSE3)
31621 return expand_vec_perm_pshufb2 (d);
31622 else
31624 /* We need 2*log2(N)-1 operations to achieve odd/even
31625 with interleave. */
31626 t1 = gen_reg_rtx (V8HImode);
31627 t2 = gen_reg_rtx (V8HImode);
31628 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31629 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31630 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31631 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31632 if (odd)
31633 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31634 else
31635 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31636 emit_insn (t3);
31638 break;
31640 case V16QImode:
31641 if (TARGET_SSSE3)
31642 return expand_vec_perm_pshufb2 (d);
31643 else
31645 t1 = gen_reg_rtx (V16QImode);
31646 t2 = gen_reg_rtx (V16QImode);
31647 t3 = gen_reg_rtx (V16QImode);
31648 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31649 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31650 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31651 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31652 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31653 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31654 if (odd)
31655 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31656 else
31657 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31658 emit_insn (t3);
31660 break;
31662 default:
31663 gcc_unreachable ();
31666 return true;
31669 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31670 extract-even and extract-odd permutations. */
31672 static bool
31673 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31675 unsigned i, odd, nelt = d->nelt;
31677 odd = d->perm[0];
31678 if (odd != 0 && odd != 1)
31679 return false;
31681 for (i = 1; i < nelt; ++i)
31682 if (d->perm[i] != 2 * i + odd)
31683 return false;
31685 return expand_vec_perm_even_odd_1 (d, odd);
31688 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31689 permutations. We assume that expand_vec_perm_1 has already failed. */
31691 static bool
31692 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31694 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31695 enum machine_mode vmode = d->vmode;
31696 unsigned char perm2[4];
31697 rtx op0 = d->op0;
31698 bool ok;
31700 switch (vmode)
31702 case V4DFmode:
31703 case V8SFmode:
31704 /* These are special-cased in sse.md so that we can optionally
31705 use the vbroadcast instruction. They expand to two insns
31706 if the input happens to be in a register. */
31707 gcc_unreachable ();
31709 case V2DFmode:
31710 case V2DImode:
31711 case V4SFmode:
31712 case V4SImode:
31713 /* These are always implementable using standard shuffle patterns. */
31714 gcc_unreachable ();
31716 case V8HImode:
31717 case V16QImode:
31718 /* These can be implemented via interleave. We save one insn by
31719 stopping once we have promoted to V4SImode and then use pshufd. */
31722 optab otab = vec_interleave_low_optab;
31724 if (elt >= nelt2)
31726 otab = vec_interleave_high_optab;
31727 elt -= nelt2;
31729 nelt2 /= 2;
31731 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31732 vmode = get_mode_wider_vector (vmode);
31733 op0 = gen_lowpart (vmode, op0);
31735 while (vmode != V4SImode);
31737 memset (perm2, elt, 4);
31738 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31739 gcc_assert (ok);
31740 return true;
31742 default:
31743 gcc_unreachable ();
31747 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31748 broadcast permutations. */
31750 static bool
31751 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31753 unsigned i, elt, nelt = d->nelt;
31755 if (d->op0 != d->op1)
31756 return false;
31758 elt = d->perm[0];
31759 for (i = 1; i < nelt; ++i)
31760 if (d->perm[i] != elt)
31761 return false;
31763 return expand_vec_perm_broadcast_1 (d);
31766 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31767 With all of the interface bits taken care of, perform the expansion
31768 in D and return true on success. */
31770 static bool
31771 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31773 /* Try a single instruction expansion. */
31774 if (expand_vec_perm_1 (d))
31775 return true;
31777 /* Try sequences of two instructions. */
31779 if (expand_vec_perm_pshuflw_pshufhw (d))
31780 return true;
31782 if (expand_vec_perm_palignr (d))
31783 return true;
31785 if (expand_vec_perm_interleave2 (d))
31786 return true;
31788 if (expand_vec_perm_broadcast (d))
31789 return true;
31791 /* Try sequences of three instructions. */
31793 if (expand_vec_perm_pshufb2 (d))
31794 return true;
31796 /* ??? Look for narrow permutations whose element orderings would
31797 allow the promotion to a wider mode. */
31799 /* ??? Look for sequences of interleave or a wider permute that place
31800 the data into the correct lanes for a half-vector shuffle like
31801 pshuf[lh]w or vpermilps. */
31803 /* ??? Look for sequences of interleave that produce the desired results.
31804 The combinatorics of punpck[lh] get pretty ugly... */
31806 if (expand_vec_perm_even_odd (d))
31807 return true;
31809 return false;
31812 /* Extract the values from the vector CST into the permutation array in D.
31813 Return 0 on error, 1 if all values from the permutation come from the
31814 first vector, 2 if all values from the second vector, and 3 otherwise. */
31816 static int
31817 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31819 tree list = TREE_VECTOR_CST_ELTS (cst);
31820 unsigned i, nelt = d->nelt;
31821 int ret = 0;
31823 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31825 unsigned HOST_WIDE_INT e;
31827 if (!host_integerp (TREE_VALUE (list), 1))
31828 return 0;
31829 e = tree_low_cst (TREE_VALUE (list), 1);
31830 if (e >= 2 * nelt)
31831 return 0;
31833 ret |= (e < nelt ? 1 : 2);
31834 d->perm[i] = e;
31836 gcc_assert (list == NULL);
31838 /* For all elements from second vector, fold the elements to first. */
31839 if (ret == 2)
31840 for (i = 0; i < nelt; ++i)
31841 d->perm[i] -= nelt;
31843 return ret;
31846 static rtx
31847 ix86_expand_vec_perm_builtin (tree exp)
31849 struct expand_vec_perm_d d;
31850 tree arg0, arg1, arg2;
31852 arg0 = CALL_EXPR_ARG (exp, 0);
31853 arg1 = CALL_EXPR_ARG (exp, 1);
31854 arg2 = CALL_EXPR_ARG (exp, 2);
31856 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31857 d.nelt = GET_MODE_NUNITS (d.vmode);
31858 d.testing_p = false;
31859 gcc_assert (VECTOR_MODE_P (d.vmode));
31861 if (TREE_CODE (arg2) != VECTOR_CST)
31863 error_at (EXPR_LOCATION (exp),
31864 "vector permutation requires vector constant");
31865 goto exit_error;
31868 switch (extract_vec_perm_cst (&d, arg2))
31870 default:
31871 gcc_unreachable();
31873 case 0:
31874 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31875 goto exit_error;
31877 case 3:
31878 if (!operand_equal_p (arg0, arg1, 0))
31880 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31881 d.op0 = force_reg (d.vmode, d.op0);
31882 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31883 d.op1 = force_reg (d.vmode, d.op1);
31884 break;
31887 /* The elements of PERM do not suggest that only the first operand
31888 is used, but both operands are identical. Allow easier matching
31889 of the permutation by folding the permutation into the single
31890 input vector. */
31892 unsigned i, nelt = d.nelt;
31893 for (i = 0; i < nelt; ++i)
31894 if (d.perm[i] >= nelt)
31895 d.perm[i] -= nelt;
31897 /* FALLTHRU */
31899 case 1:
31900 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31901 d.op0 = force_reg (d.vmode, d.op0);
31902 d.op1 = d.op0;
31903 break;
31905 case 2:
31906 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31907 d.op0 = force_reg (d.vmode, d.op0);
31908 d.op1 = d.op0;
31909 break;
31912 d.target = gen_reg_rtx (d.vmode);
31913 if (ix86_expand_vec_perm_builtin_1 (&d))
31914 return d.target;
31916 /* For compiler generated permutations, we should never got here, because
31917 the compiler should also be checking the ok hook. But since this is a
31918 builtin the user has access too, so don't abort. */
31919 switch (d.nelt)
31921 case 2:
31922 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31923 break;
31924 case 4:
31925 sorry ("vector permutation (%d %d %d %d)",
31926 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31927 break;
31928 case 8:
31929 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31930 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31931 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31932 break;
31933 case 16:
31934 sorry ("vector permutation "
31935 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31936 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31937 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31938 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31939 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31940 break;
31941 default:
31942 gcc_unreachable ();
31944 exit_error:
31945 return CONST0_RTX (d.vmode);
31948 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31950 static bool
31951 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31953 struct expand_vec_perm_d d;
31954 int vec_mask;
31955 bool ret, one_vec;
31957 d.vmode = TYPE_MODE (vec_type);
31958 d.nelt = GET_MODE_NUNITS (d.vmode);
31959 d.testing_p = true;
31961 /* Given sufficient ISA support we can just return true here
31962 for selected vector modes. */
31963 if (GET_MODE_SIZE (d.vmode) == 16)
31965 /* All implementable with a single vpperm insn. */
31966 if (TARGET_XOP)
31967 return true;
31968 /* All implementable with 2 pshufb + 1 ior. */
31969 if (TARGET_SSSE3)
31970 return true;
31971 /* All implementable with shufpd or unpck[lh]pd. */
31972 if (d.nelt == 2)
31973 return true;
31976 vec_mask = extract_vec_perm_cst (&d, mask);
31978 /* This hook is cannot be called in response to something that the
31979 user does (unlike the builtin expander) so we shouldn't ever see
31980 an error generated from the extract. */
31981 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31982 one_vec = (vec_mask != 3);
31984 /* Implementable with shufps or pshufd. */
31985 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31986 return true;
31988 /* Otherwise we have to go through the motions and see if we can
31989 figure out how to generate the requested permutation. */
31990 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31991 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31992 if (!one_vec)
31993 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31995 start_sequence ();
31996 ret = ix86_expand_vec_perm_builtin_1 (&d);
31997 end_sequence ();
31999 return ret;
32002 void
32003 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
32005 struct expand_vec_perm_d d;
32006 unsigned i, nelt;
32008 d.target = targ;
32009 d.op0 = op0;
32010 d.op1 = op1;
32011 d.vmode = GET_MODE (targ);
32012 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
32013 d.testing_p = false;
32015 for (i = 0; i < nelt; ++i)
32016 d.perm[i] = i * 2 + odd;
32018 /* We'll either be able to implement the permutation directly... */
32019 if (expand_vec_perm_1 (&d))
32020 return;
32022 /* ... or we use the special-case patterns. */
32023 expand_vec_perm_even_odd_1 (&d, odd);
32026 /* This function returns the calling abi specific va_list type node.
32027 It returns the FNDECL specific va_list type. */
32029 static tree
32030 ix86_fn_abi_va_list (tree fndecl)
32032 if (!TARGET_64BIT)
32033 return va_list_type_node;
32034 gcc_assert (fndecl != NULL_TREE);
32036 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
32037 return ms_va_list_type_node;
32038 else
32039 return sysv_va_list_type_node;
32042 /* Returns the canonical va_list type specified by TYPE. If there
32043 is no valid TYPE provided, it return NULL_TREE. */
32045 static tree
32046 ix86_canonical_va_list_type (tree type)
32048 tree wtype, htype;
32050 /* Resolve references and pointers to va_list type. */
32051 if (TREE_CODE (type) == MEM_REF)
32052 type = TREE_TYPE (type);
32053 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
32054 type = TREE_TYPE (type);
32055 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
32056 type = TREE_TYPE (type);
32058 if (TARGET_64BIT)
32060 wtype = va_list_type_node;
32061 gcc_assert (wtype != NULL_TREE);
32062 htype = type;
32063 if (TREE_CODE (wtype) == ARRAY_TYPE)
32065 /* If va_list is an array type, the argument may have decayed
32066 to a pointer type, e.g. by being passed to another function.
32067 In that case, unwrap both types so that we can compare the
32068 underlying records. */
32069 if (TREE_CODE (htype) == ARRAY_TYPE
32070 || POINTER_TYPE_P (htype))
32072 wtype = TREE_TYPE (wtype);
32073 htype = TREE_TYPE (htype);
32076 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32077 return va_list_type_node;
32078 wtype = sysv_va_list_type_node;
32079 gcc_assert (wtype != NULL_TREE);
32080 htype = type;
32081 if (TREE_CODE (wtype) == ARRAY_TYPE)
32083 /* If va_list is an array type, the argument may have decayed
32084 to a pointer type, e.g. by being passed to another function.
32085 In that case, unwrap both types so that we can compare the
32086 underlying records. */
32087 if (TREE_CODE (htype) == ARRAY_TYPE
32088 || POINTER_TYPE_P (htype))
32090 wtype = TREE_TYPE (wtype);
32091 htype = TREE_TYPE (htype);
32094 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32095 return sysv_va_list_type_node;
32096 wtype = ms_va_list_type_node;
32097 gcc_assert (wtype != NULL_TREE);
32098 htype = type;
32099 if (TREE_CODE (wtype) == ARRAY_TYPE)
32101 /* If va_list is an array type, the argument may have decayed
32102 to a pointer type, e.g. by being passed to another function.
32103 In that case, unwrap both types so that we can compare the
32104 underlying records. */
32105 if (TREE_CODE (htype) == ARRAY_TYPE
32106 || POINTER_TYPE_P (htype))
32108 wtype = TREE_TYPE (wtype);
32109 htype = TREE_TYPE (htype);
32112 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32113 return ms_va_list_type_node;
32114 return NULL_TREE;
32116 return std_canonical_va_list_type (type);
32119 /* Iterate through the target-specific builtin types for va_list.
32120 IDX denotes the iterator, *PTREE is set to the result type of
32121 the va_list builtin, and *PNAME to its internal type.
32122 Returns zero if there is no element for this index, otherwise
32123 IDX should be increased upon the next call.
32124 Note, do not iterate a base builtin's name like __builtin_va_list.
32125 Used from c_common_nodes_and_builtins. */
32127 static int
32128 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
32130 if (TARGET_64BIT)
32132 switch (idx)
32134 default:
32135 break;
32137 case 0:
32138 *ptree = ms_va_list_type_node;
32139 *pname = "__builtin_ms_va_list";
32140 return 1;
32142 case 1:
32143 *ptree = sysv_va_list_type_node;
32144 *pname = "__builtin_sysv_va_list";
32145 return 1;
32149 return 0;
32152 #undef TARGET_SCHED_DISPATCH
32153 #define TARGET_SCHED_DISPATCH has_dispatch
32154 #undef TARGET_SCHED_DISPATCH_DO
32155 #define TARGET_SCHED_DISPATCH_DO do_dispatch
32157 /* The size of the dispatch window is the total number of bytes of
32158 object code allowed in a window. */
32159 #define DISPATCH_WINDOW_SIZE 16
32161 /* Number of dispatch windows considered for scheduling. */
32162 #define MAX_DISPATCH_WINDOWS 3
32164 /* Maximum number of instructions in a window. */
32165 #define MAX_INSN 4
32167 /* Maximum number of immediate operands in a window. */
32168 #define MAX_IMM 4
32170 /* Maximum number of immediate bits allowed in a window. */
32171 #define MAX_IMM_SIZE 128
32173 /* Maximum number of 32 bit immediates allowed in a window. */
32174 #define MAX_IMM_32 4
32176 /* Maximum number of 64 bit immediates allowed in a window. */
32177 #define MAX_IMM_64 2
32179 /* Maximum total of loads or prefetches allowed in a window. */
32180 #define MAX_LOAD 2
32182 /* Maximum total of stores allowed in a window. */
32183 #define MAX_STORE 1
32185 #undef BIG
32186 #define BIG 100
32189 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
32190 enum dispatch_group {
32191 disp_no_group = 0,
32192 disp_load,
32193 disp_store,
32194 disp_load_store,
32195 disp_prefetch,
32196 disp_imm,
32197 disp_imm_32,
32198 disp_imm_64,
32199 disp_branch,
32200 disp_cmp,
32201 disp_jcc,
32202 disp_last
32205 /* Number of allowable groups in a dispatch window. It is an array
32206 indexed by dispatch_group enum. 100 is used as a big number,
32207 because the number of these kind of operations does not have any
32208 effect in dispatch window, but we need them for other reasons in
32209 the table. */
32210 static unsigned int num_allowable_groups[disp_last] = {
32211 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
32214 char group_name[disp_last + 1][16] = {
32215 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
32216 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
32217 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
32220 /* Instruction path. */
32221 enum insn_path {
32222 no_path = 0,
32223 path_single, /* Single micro op. */
32224 path_double, /* Double micro op. */
32225 path_multi, /* Instructions with more than 2 micro op.. */
32226 last_path
32229 /* sched_insn_info defines a window to the instructions scheduled in
32230 the basic block. It contains a pointer to the insn_info table and
32231 the instruction scheduled.
32233 Windows are allocated for each basic block and are linked
32234 together. */
32235 typedef struct sched_insn_info_s {
32236 rtx insn;
32237 enum dispatch_group group;
32238 enum insn_path path;
32239 int byte_len;
32240 int imm_bytes;
32241 } sched_insn_info;
32243 /* Linked list of dispatch windows. This is a two way list of
32244 dispatch windows of a basic block. It contains information about
32245 the number of uops in the window and the total number of
32246 instructions and of bytes in the object code for this dispatch
32247 window. */
32248 typedef struct dispatch_windows_s {
32249 int num_insn; /* Number of insn in the window. */
32250 int num_uops; /* Number of uops in the window. */
32251 int window_size; /* Number of bytes in the window. */
32252 int window_num; /* Window number between 0 or 1. */
32253 int num_imm; /* Number of immediates in an insn. */
32254 int num_imm_32; /* Number of 32 bit immediates in an insn. */
32255 int num_imm_64; /* Number of 64 bit immediates in an insn. */
32256 int imm_size; /* Total immediates in the window. */
32257 int num_loads; /* Total memory loads in the window. */
32258 int num_stores; /* Total memory stores in the window. */
32259 int violation; /* Violation exists in window. */
32260 sched_insn_info *window; /* Pointer to the window. */
32261 struct dispatch_windows_s *next;
32262 struct dispatch_windows_s *prev;
32263 } dispatch_windows;
32265 /* Immediate valuse used in an insn. */
32266 typedef struct imm_info_s
32268 int imm;
32269 int imm32;
32270 int imm64;
32271 } imm_info;
32273 static dispatch_windows *dispatch_window_list;
32274 static dispatch_windows *dispatch_window_list1;
32276 /* Get dispatch group of insn. */
32278 static enum dispatch_group
32279 get_mem_group (rtx insn)
32281 enum attr_memory memory;
32283 if (INSN_CODE (insn) < 0)
32284 return disp_no_group;
32285 memory = get_attr_memory (insn);
32286 if (memory == MEMORY_STORE)
32287 return disp_store;
32289 if (memory == MEMORY_LOAD)
32290 return disp_load;
32292 if (memory == MEMORY_BOTH)
32293 return disp_load_store;
32295 return disp_no_group;
32298 /* Return true if insn is a compare instruction. */
32300 static bool
32301 is_cmp (rtx insn)
32303 enum attr_type type;
32305 type = get_attr_type (insn);
32306 return (type == TYPE_TEST
32307 || type == TYPE_ICMP
32308 || type == TYPE_FCMP
32309 || GET_CODE (PATTERN (insn)) == COMPARE);
32312 /* Return true if a dispatch violation encountered. */
32314 static bool
32315 dispatch_violation (void)
32317 if (dispatch_window_list->next)
32318 return dispatch_window_list->next->violation;
32319 return dispatch_window_list->violation;
32322 /* Return true if insn is a branch instruction. */
32324 static bool
32325 is_branch (rtx insn)
32327 return (CALL_P (insn) || JUMP_P (insn));
32330 /* Return true if insn is a prefetch instruction. */
32332 static bool
32333 is_prefetch (rtx insn)
32335 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
32338 /* This function initializes a dispatch window and the list container holding a
32339 pointer to the window. */
32341 static void
32342 init_window (int window_num)
32344 int i;
32345 dispatch_windows *new_list;
32347 if (window_num == 0)
32348 new_list = dispatch_window_list;
32349 else
32350 new_list = dispatch_window_list1;
32352 new_list->num_insn = 0;
32353 new_list->num_uops = 0;
32354 new_list->window_size = 0;
32355 new_list->next = NULL;
32356 new_list->prev = NULL;
32357 new_list->window_num = window_num;
32358 new_list->num_imm = 0;
32359 new_list->num_imm_32 = 0;
32360 new_list->num_imm_64 = 0;
32361 new_list->imm_size = 0;
32362 new_list->num_loads = 0;
32363 new_list->num_stores = 0;
32364 new_list->violation = false;
32366 for (i = 0; i < MAX_INSN; i++)
32368 new_list->window[i].insn = NULL;
32369 new_list->window[i].group = disp_no_group;
32370 new_list->window[i].path = no_path;
32371 new_list->window[i].byte_len = 0;
32372 new_list->window[i].imm_bytes = 0;
32374 return;
32377 /* This function allocates and initializes a dispatch window and the
32378 list container holding a pointer to the window. */
32380 static dispatch_windows *
32381 allocate_window (void)
32383 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
32384 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
32386 return new_list;
32389 /* This routine initializes the dispatch scheduling information. It
32390 initiates building dispatch scheduler tables and constructs the
32391 first dispatch window. */
32393 static void
32394 init_dispatch_sched (void)
32396 /* Allocate a dispatch list and a window. */
32397 dispatch_window_list = allocate_window ();
32398 dispatch_window_list1 = allocate_window ();
32399 init_window (0);
32400 init_window (1);
32403 /* This function returns true if a branch is detected. End of a basic block
32404 does not have to be a branch, but here we assume only branches end a
32405 window. */
32407 static bool
32408 is_end_basic_block (enum dispatch_group group)
32410 return group == disp_branch;
32413 /* This function is called when the end of a window processing is reached. */
32415 static void
32416 process_end_window (void)
32418 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
32419 if (dispatch_window_list->next)
32421 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
32422 gcc_assert (dispatch_window_list->window_size
32423 + dispatch_window_list1->window_size <= 48);
32424 init_window (1);
32426 init_window (0);
32429 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
32430 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
32431 for 48 bytes of instructions. Note that these windows are not dispatch
32432 windows that their sizes are DISPATCH_WINDOW_SIZE. */
32434 static dispatch_windows *
32435 allocate_next_window (int window_num)
32437 if (window_num == 0)
32439 if (dispatch_window_list->next)
32440 init_window (1);
32441 init_window (0);
32442 return dispatch_window_list;
32445 dispatch_window_list->next = dispatch_window_list1;
32446 dispatch_window_list1->prev = dispatch_window_list;
32448 return dispatch_window_list1;
32451 /* Increment the number of immediate operands of an instruction. */
32453 static int
32454 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
32456 if (*in_rtx == 0)
32457 return 0;
32459 switch ( GET_CODE (*in_rtx))
32461 case CONST:
32462 case SYMBOL_REF:
32463 case CONST_INT:
32464 (imm_values->imm)++;
32465 if (x86_64_immediate_operand (*in_rtx, SImode))
32466 (imm_values->imm32)++;
32467 else
32468 (imm_values->imm64)++;
32469 break;
32471 case CONST_DOUBLE:
32472 (imm_values->imm)++;
32473 (imm_values->imm64)++;
32474 break;
32476 case CODE_LABEL:
32477 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
32479 (imm_values->imm)++;
32480 (imm_values->imm32)++;
32482 break;
32484 default:
32485 break;
32488 return 0;
32491 /* Compute number of immediate operands of an instruction. */
32493 static void
32494 find_constant (rtx in_rtx, imm_info *imm_values)
32496 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
32497 (rtx_function) find_constant_1, (void *) imm_values);
32500 /* Return total size of immediate operands of an instruction along with number
32501 of corresponding immediate-operands. It initializes its parameters to zero
32502 befor calling FIND_CONSTANT.
32503 INSN is the input instruction. IMM is the total of immediates.
32504 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
32505 bit immediates. */
32507 static int
32508 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
32510 imm_info imm_values = {0, 0, 0};
32512 find_constant (insn, &imm_values);
32513 *imm = imm_values.imm;
32514 *imm32 = imm_values.imm32;
32515 *imm64 = imm_values.imm64;
32516 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
32519 /* This function indicates if an operand of an instruction is an
32520 immediate. */
32522 static bool
32523 has_immediate (rtx insn)
32525 int num_imm_operand;
32526 int num_imm32_operand;
32527 int num_imm64_operand;
32529 if (insn)
32530 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32531 &num_imm64_operand);
32532 return false;
32535 /* Return single or double path for instructions. */
32537 static enum insn_path
32538 get_insn_path (rtx insn)
32540 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
32542 if ((int)path == 0)
32543 return path_single;
32545 if ((int)path == 1)
32546 return path_double;
32548 return path_multi;
32551 /* Return insn dispatch group. */
32553 static enum dispatch_group
32554 get_insn_group (rtx insn)
32556 enum dispatch_group group = get_mem_group (insn);
32557 if (group)
32558 return group;
32560 if (is_branch (insn))
32561 return disp_branch;
32563 if (is_cmp (insn))
32564 return disp_cmp;
32566 if (has_immediate (insn))
32567 return disp_imm;
32569 if (is_prefetch (insn))
32570 return disp_prefetch;
32572 return disp_no_group;
32575 /* Count number of GROUP restricted instructions in a dispatch
32576 window WINDOW_LIST. */
32578 static int
32579 count_num_restricted (rtx insn, dispatch_windows *window_list)
32581 enum dispatch_group group = get_insn_group (insn);
32582 int imm_size;
32583 int num_imm_operand;
32584 int num_imm32_operand;
32585 int num_imm64_operand;
32587 if (group == disp_no_group)
32588 return 0;
32590 if (group == disp_imm)
32592 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32593 &num_imm64_operand);
32594 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
32595 || num_imm_operand + window_list->num_imm > MAX_IMM
32596 || (num_imm32_operand > 0
32597 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
32598 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
32599 || (num_imm64_operand > 0
32600 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
32601 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
32602 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
32603 && num_imm64_operand > 0
32604 && ((window_list->num_imm_64 > 0
32605 && window_list->num_insn >= 2)
32606 || window_list->num_insn >= 3)))
32607 return BIG;
32609 return 1;
32612 if ((group == disp_load_store
32613 && (window_list->num_loads >= MAX_LOAD
32614 || window_list->num_stores >= MAX_STORE))
32615 || ((group == disp_load
32616 || group == disp_prefetch)
32617 && window_list->num_loads >= MAX_LOAD)
32618 || (group == disp_store
32619 && window_list->num_stores >= MAX_STORE))
32620 return BIG;
32622 return 1;
32625 /* This function returns true if insn satisfies dispatch rules on the
32626 last window scheduled. */
32628 static bool
32629 fits_dispatch_window (rtx insn)
32631 dispatch_windows *window_list = dispatch_window_list;
32632 dispatch_windows *window_list_next = dispatch_window_list->next;
32633 unsigned int num_restrict;
32634 enum dispatch_group group = get_insn_group (insn);
32635 enum insn_path path = get_insn_path (insn);
32636 int sum;
32638 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
32639 instructions should be given the lowest priority in the
32640 scheduling process in Haifa scheduler to make sure they will be
32641 scheduled in the same dispatch window as the refrence to them. */
32642 if (group == disp_jcc || group == disp_cmp)
32643 return false;
32645 /* Check nonrestricted. */
32646 if (group == disp_no_group || group == disp_branch)
32647 return true;
32649 /* Get last dispatch window. */
32650 if (window_list_next)
32651 window_list = window_list_next;
32653 if (window_list->window_num == 1)
32655 sum = window_list->prev->window_size + window_list->window_size;
32657 if (sum == 32
32658 || (min_insn_size (insn) + sum) >= 48)
32659 /* Window 1 is full. Go for next window. */
32660 return true;
32663 num_restrict = count_num_restricted (insn, window_list);
32665 if (num_restrict > num_allowable_groups[group])
32666 return false;
32668 /* See if it fits in the first window. */
32669 if (window_list->window_num == 0)
32671 /* The first widow should have only single and double path
32672 uops. */
32673 if (path == path_double
32674 && (window_list->num_uops + 2) > MAX_INSN)
32675 return false;
32676 else if (path != path_single)
32677 return false;
32679 return true;
32682 /* Add an instruction INSN with NUM_UOPS micro-operations to the
32683 dispatch window WINDOW_LIST. */
32685 static void
32686 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
32688 int byte_len = min_insn_size (insn);
32689 int num_insn = window_list->num_insn;
32690 int imm_size;
32691 sched_insn_info *window = window_list->window;
32692 enum dispatch_group group = get_insn_group (insn);
32693 enum insn_path path = get_insn_path (insn);
32694 int num_imm_operand;
32695 int num_imm32_operand;
32696 int num_imm64_operand;
32698 if (!window_list->violation && group != disp_cmp
32699 && !fits_dispatch_window (insn))
32700 window_list->violation = true;
32702 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32703 &num_imm64_operand);
32705 /* Initialize window with new instruction. */
32706 window[num_insn].insn = insn;
32707 window[num_insn].byte_len = byte_len;
32708 window[num_insn].group = group;
32709 window[num_insn].path = path;
32710 window[num_insn].imm_bytes = imm_size;
32712 window_list->window_size += byte_len;
32713 window_list->num_insn = num_insn + 1;
32714 window_list->num_uops = window_list->num_uops + num_uops;
32715 window_list->imm_size += imm_size;
32716 window_list->num_imm += num_imm_operand;
32717 window_list->num_imm_32 += num_imm32_operand;
32718 window_list->num_imm_64 += num_imm64_operand;
32720 if (group == disp_store)
32721 window_list->num_stores += 1;
32722 else if (group == disp_load
32723 || group == disp_prefetch)
32724 window_list->num_loads += 1;
32725 else if (group == disp_load_store)
32727 window_list->num_stores += 1;
32728 window_list->num_loads += 1;
32732 /* Adds a scheduled instruction, INSN, to the current dispatch window.
32733 If the total bytes of instructions or the number of instructions in
32734 the window exceed allowable, it allocates a new window. */
32736 static void
32737 add_to_dispatch_window (rtx insn)
32739 int byte_len;
32740 dispatch_windows *window_list;
32741 dispatch_windows *next_list;
32742 dispatch_windows *window0_list;
32743 enum insn_path path;
32744 enum dispatch_group insn_group;
32745 bool insn_fits;
32746 int num_insn;
32747 int num_uops;
32748 int window_num;
32749 int insn_num_uops;
32750 int sum;
32752 if (INSN_CODE (insn) < 0)
32753 return;
32755 byte_len = min_insn_size (insn);
32756 window_list = dispatch_window_list;
32757 next_list = window_list->next;
32758 path = get_insn_path (insn);
32759 insn_group = get_insn_group (insn);
32761 /* Get the last dispatch window. */
32762 if (next_list)
32763 window_list = dispatch_window_list->next;
32765 if (path == path_single)
32766 insn_num_uops = 1;
32767 else if (path == path_double)
32768 insn_num_uops = 2;
32769 else
32770 insn_num_uops = (int) path;
32772 /* If current window is full, get a new window.
32773 Window number zero is full, if MAX_INSN uops are scheduled in it.
32774 Window number one is full, if window zero's bytes plus window
32775 one's bytes is 32, or if the bytes of the new instruction added
32776 to the total makes it greater than 48, or it has already MAX_INSN
32777 instructions in it. */
32778 num_insn = window_list->num_insn;
32779 num_uops = window_list->num_uops;
32780 window_num = window_list->window_num;
32781 insn_fits = fits_dispatch_window (insn);
32783 if (num_insn >= MAX_INSN
32784 || num_uops + insn_num_uops > MAX_INSN
32785 || !(insn_fits))
32787 window_num = ~window_num & 1;
32788 window_list = allocate_next_window (window_num);
32791 if (window_num == 0)
32793 add_insn_window (insn, window_list, insn_num_uops);
32794 if (window_list->num_insn >= MAX_INSN
32795 && insn_group == disp_branch)
32797 process_end_window ();
32798 return;
32801 else if (window_num == 1)
32803 window0_list = window_list->prev;
32804 sum = window0_list->window_size + window_list->window_size;
32805 if (sum == 32
32806 || (byte_len + sum) >= 48)
32808 process_end_window ();
32809 window_list = dispatch_window_list;
32812 add_insn_window (insn, window_list, insn_num_uops);
32814 else
32815 gcc_unreachable ();
32817 if (is_end_basic_block (insn_group))
32819 /* End of basic block is reached do end-basic-block process. */
32820 process_end_window ();
32821 return;
32825 /* Print the dispatch window, WINDOW_NUM, to FILE. */
32827 DEBUG_FUNCTION static void
32828 debug_dispatch_window_file (FILE *file, int window_num)
32830 dispatch_windows *list;
32831 int i;
32833 if (window_num == 0)
32834 list = dispatch_window_list;
32835 else
32836 list = dispatch_window_list1;
32838 fprintf (file, "Window #%d:\n", list->window_num);
32839 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
32840 list->num_insn, list->num_uops, list->window_size);
32841 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32842 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
32844 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
32845 list->num_stores);
32846 fprintf (file, " insn info:\n");
32848 for (i = 0; i < MAX_INSN; i++)
32850 if (!list->window[i].insn)
32851 break;
32852 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
32853 i, group_name[list->window[i].group],
32854 i, (void *)list->window[i].insn,
32855 i, list->window[i].path,
32856 i, list->window[i].byte_len,
32857 i, list->window[i].imm_bytes);
32861 /* Print to stdout a dispatch window. */
32863 DEBUG_FUNCTION void
32864 debug_dispatch_window (int window_num)
32866 debug_dispatch_window_file (stdout, window_num);
32869 /* Print INSN dispatch information to FILE. */
32871 DEBUG_FUNCTION static void
32872 debug_insn_dispatch_info_file (FILE *file, rtx insn)
32874 int byte_len;
32875 enum insn_path path;
32876 enum dispatch_group group;
32877 int imm_size;
32878 int num_imm_operand;
32879 int num_imm32_operand;
32880 int num_imm64_operand;
32882 if (INSN_CODE (insn) < 0)
32883 return;
32885 byte_len = min_insn_size (insn);
32886 path = get_insn_path (insn);
32887 group = get_insn_group (insn);
32888 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32889 &num_imm64_operand);
32891 fprintf (file, " insn info:\n");
32892 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
32893 group_name[group], path, byte_len);
32894 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32895 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
32898 /* Print to STDERR the status of the ready list with respect to
32899 dispatch windows. */
32901 DEBUG_FUNCTION void
32902 debug_ready_dispatch (void)
32904 int i;
32905 int no_ready = number_in_ready ();
32907 fprintf (stdout, "Number of ready: %d\n", no_ready);
32909 for (i = 0; i < no_ready; i++)
32910 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
32913 /* This routine is the driver of the dispatch scheduler. */
32915 static void
32916 do_dispatch (rtx insn, int mode)
32918 if (mode == DISPATCH_INIT)
32919 init_dispatch_sched ();
32920 else if (mode == ADD_TO_DISPATCH_WINDOW)
32921 add_to_dispatch_window (insn);
32924 /* Return TRUE if Dispatch Scheduling is supported. */
32926 static bool
32927 has_dispatch (rtx insn, int action)
32929 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
32930 switch (action)
32932 default:
32933 return false;
32935 case IS_DISPATCH_ON:
32936 return true;
32937 break;
32939 case IS_CMP:
32940 return is_cmp (insn);
32942 case DISPATCH_VIOLATION:
32943 return dispatch_violation ();
32945 case FITS_DISPATCH_WINDOW:
32946 return fits_dispatch_window (insn);
32949 return false;
32952 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
32953 place emms and femms instructions. */
32955 static unsigned int
32956 ix86_units_per_simd_word (enum machine_mode mode)
32958 /* Disable double precision vectorizer if needed. */
32959 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
32960 return UNITS_PER_WORD;
32962 #if 0
32963 /* FIXME: AVX has 32byte floating point vector operations and 16byte
32964 integer vector operations. But vectorizer doesn't support
32965 different sizes for integer and floating point vectors. We limit
32966 vector size to 16byte. */
32967 if (TARGET_AVX)
32968 return (mode == DFmode || mode == SFmode) ? 32 : 16;
32969 else
32970 #endif
32971 return TARGET_SSE ? 16 : UNITS_PER_WORD;
32974 /* Initialize the GCC target structure. */
32975 #undef TARGET_RETURN_IN_MEMORY
32976 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
32978 #undef TARGET_LEGITIMIZE_ADDRESS
32979 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
32981 #undef TARGET_ATTRIBUTE_TABLE
32982 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
32983 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32984 # undef TARGET_MERGE_DECL_ATTRIBUTES
32985 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
32986 #endif
32988 #undef TARGET_COMP_TYPE_ATTRIBUTES
32989 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
32991 #undef TARGET_INIT_BUILTINS
32992 #define TARGET_INIT_BUILTINS ix86_init_builtins
32993 #undef TARGET_BUILTIN_DECL
32994 #define TARGET_BUILTIN_DECL ix86_builtin_decl
32995 #undef TARGET_EXPAND_BUILTIN
32996 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
32998 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
32999 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
33000 ix86_builtin_vectorized_function
33002 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
33003 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
33005 #undef TARGET_BUILTIN_RECIPROCAL
33006 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
33008 #undef TARGET_ASM_FUNCTION_EPILOGUE
33009 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
33011 #undef TARGET_ENCODE_SECTION_INFO
33012 #ifndef SUBTARGET_ENCODE_SECTION_INFO
33013 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
33014 #else
33015 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
33016 #endif
33018 #undef TARGET_ASM_OPEN_PAREN
33019 #define TARGET_ASM_OPEN_PAREN ""
33020 #undef TARGET_ASM_CLOSE_PAREN
33021 #define TARGET_ASM_CLOSE_PAREN ""
33023 #undef TARGET_ASM_BYTE_OP
33024 #define TARGET_ASM_BYTE_OP ASM_BYTE
33026 #undef TARGET_ASM_ALIGNED_HI_OP
33027 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
33028 #undef TARGET_ASM_ALIGNED_SI_OP
33029 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
33030 #ifdef ASM_QUAD
33031 #undef TARGET_ASM_ALIGNED_DI_OP
33032 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
33033 #endif
33035 #undef TARGET_PROFILE_BEFORE_PROLOGUE
33036 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
33038 #undef TARGET_ASM_UNALIGNED_HI_OP
33039 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
33040 #undef TARGET_ASM_UNALIGNED_SI_OP
33041 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
33042 #undef TARGET_ASM_UNALIGNED_DI_OP
33043 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
33045 #undef TARGET_PRINT_OPERAND
33046 #define TARGET_PRINT_OPERAND ix86_print_operand
33047 #undef TARGET_PRINT_OPERAND_ADDRESS
33048 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
33049 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
33050 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
33051 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
33052 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
33054 #undef TARGET_SCHED_ADJUST_COST
33055 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
33056 #undef TARGET_SCHED_ISSUE_RATE
33057 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
33058 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
33059 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
33060 ia32_multipass_dfa_lookahead
33062 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
33063 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
33065 #ifdef HAVE_AS_TLS
33066 #undef TARGET_HAVE_TLS
33067 #define TARGET_HAVE_TLS true
33068 #endif
33069 #undef TARGET_CANNOT_FORCE_CONST_MEM
33070 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
33071 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
33072 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
33074 #undef TARGET_DELEGITIMIZE_ADDRESS
33075 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
33077 #undef TARGET_MS_BITFIELD_LAYOUT_P
33078 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
33080 #if TARGET_MACHO
33081 #undef TARGET_BINDS_LOCAL_P
33082 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
33083 #endif
33084 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33085 #undef TARGET_BINDS_LOCAL_P
33086 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
33087 #endif
33089 #undef TARGET_ASM_OUTPUT_MI_THUNK
33090 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
33091 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
33092 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
33094 #undef TARGET_ASM_FILE_START
33095 #define TARGET_ASM_FILE_START x86_file_start
33097 #undef TARGET_DEFAULT_TARGET_FLAGS
33098 #define TARGET_DEFAULT_TARGET_FLAGS \
33099 (TARGET_DEFAULT \
33100 | TARGET_SUBTARGET_DEFAULT \
33101 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
33102 | MASK_FUSED_MADD)
33104 #undef TARGET_HANDLE_OPTION
33105 #define TARGET_HANDLE_OPTION ix86_handle_option
33107 #undef TARGET_OPTION_OVERRIDE
33108 #define TARGET_OPTION_OVERRIDE ix86_option_override
33109 #undef TARGET_OPTION_OPTIMIZATION
33110 #define TARGET_OPTION_OPTIMIZATION ix86_option_optimization
33112 #undef TARGET_REGISTER_MOVE_COST
33113 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
33114 #undef TARGET_MEMORY_MOVE_COST
33115 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
33116 #undef TARGET_RTX_COSTS
33117 #define TARGET_RTX_COSTS ix86_rtx_costs
33118 #undef TARGET_ADDRESS_COST
33119 #define TARGET_ADDRESS_COST ix86_address_cost
33121 #undef TARGET_FIXED_CONDITION_CODE_REGS
33122 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
33123 #undef TARGET_CC_MODES_COMPATIBLE
33124 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
33126 #undef TARGET_MACHINE_DEPENDENT_REORG
33127 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
33129 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
33130 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
33132 #undef TARGET_BUILD_BUILTIN_VA_LIST
33133 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
33135 #undef TARGET_ENUM_VA_LIST_P
33136 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
33138 #undef TARGET_FN_ABI_VA_LIST
33139 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
33141 #undef TARGET_CANONICAL_VA_LIST_TYPE
33142 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
33144 #undef TARGET_EXPAND_BUILTIN_VA_START
33145 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
33147 #undef TARGET_MD_ASM_CLOBBERS
33148 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
33150 #undef TARGET_PROMOTE_PROTOTYPES
33151 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
33152 #undef TARGET_STRUCT_VALUE_RTX
33153 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
33154 #undef TARGET_SETUP_INCOMING_VARARGS
33155 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
33156 #undef TARGET_MUST_PASS_IN_STACK
33157 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
33158 #undef TARGET_FUNCTION_ARG_ADVANCE
33159 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
33160 #undef TARGET_FUNCTION_ARG
33161 #define TARGET_FUNCTION_ARG ix86_function_arg
33162 #undef TARGET_PASS_BY_REFERENCE
33163 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
33164 #undef TARGET_INTERNAL_ARG_POINTER
33165 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
33166 #undef TARGET_UPDATE_STACK_BOUNDARY
33167 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
33168 #undef TARGET_GET_DRAP_RTX
33169 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
33170 #undef TARGET_STRICT_ARGUMENT_NAMING
33171 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
33172 #undef TARGET_STATIC_CHAIN
33173 #define TARGET_STATIC_CHAIN ix86_static_chain
33174 #undef TARGET_TRAMPOLINE_INIT
33175 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
33176 #undef TARGET_RETURN_POPS_ARGS
33177 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
33179 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
33180 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
33182 #undef TARGET_SCALAR_MODE_SUPPORTED_P
33183 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
33185 #undef TARGET_VECTOR_MODE_SUPPORTED_P
33186 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
33188 #undef TARGET_C_MODE_FOR_SUFFIX
33189 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
33191 #ifdef HAVE_AS_TLS
33192 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
33193 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
33194 #endif
33196 #ifdef SUBTARGET_INSERT_ATTRIBUTES
33197 #undef TARGET_INSERT_ATTRIBUTES
33198 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
33199 #endif
33201 #undef TARGET_MANGLE_TYPE
33202 #define TARGET_MANGLE_TYPE ix86_mangle_type
33204 #undef TARGET_STACK_PROTECT_FAIL
33205 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
33207 #undef TARGET_SUPPORTS_SPLIT_STACK
33208 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
33210 #undef TARGET_FUNCTION_VALUE
33211 #define TARGET_FUNCTION_VALUE ix86_function_value
33213 #undef TARGET_FUNCTION_VALUE_REGNO_P
33214 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
33216 #undef TARGET_SECONDARY_RELOAD
33217 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
33219 #undef TARGET_CLASS_LIKELY_SPILLED_P
33220 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
33222 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
33223 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
33224 ix86_builtin_vectorization_cost
33225 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
33226 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
33227 ix86_vectorize_builtin_vec_perm
33228 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
33229 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
33230 ix86_vectorize_builtin_vec_perm_ok
33231 #undef TARGET_VECTORIZE_UNITS_PER_SIMD_WORD
33232 #define TARGET_VECTORIZE_UNITS_PER_SIMD_WORD \
33233 ix86_units_per_simd_word
33235 #undef TARGET_SET_CURRENT_FUNCTION
33236 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
33238 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
33239 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
33241 #undef TARGET_OPTION_SAVE
33242 #define TARGET_OPTION_SAVE ix86_function_specific_save
33244 #undef TARGET_OPTION_RESTORE
33245 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
33247 #undef TARGET_OPTION_PRINT
33248 #define TARGET_OPTION_PRINT ix86_function_specific_print
33250 #undef TARGET_CAN_INLINE_P
33251 #define TARGET_CAN_INLINE_P ix86_can_inline_p
33253 #undef TARGET_EXPAND_TO_RTL_HOOK
33254 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
33256 #undef TARGET_LEGITIMATE_ADDRESS_P
33257 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
33259 #undef TARGET_IRA_COVER_CLASSES
33260 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
33262 #undef TARGET_FRAME_POINTER_REQUIRED
33263 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
33265 #undef TARGET_CAN_ELIMINATE
33266 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
33268 #undef TARGET_EXTRA_LIVE_ON_ENTRY
33269 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
33271 #undef TARGET_ASM_CODE_END
33272 #define TARGET_ASM_CODE_END ix86_code_end
33274 struct gcc_target targetm = TARGET_INITIALIZER;
33276 #include "gt-i386.h"