Merge trunk version 208447 into gupc branch.
[official-gcc.git] / gcc / config / i386 / i386.c
blob2465ecaf7757914227f5c3a935c8fc740072c54e
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
85 static rtx legitimize_dllimport_symbol (rtx, bool);
86 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
87 static rtx legitimize_pe_coff_symbol (rtx, bool);
89 #ifndef CHECK_STACK_LIMIT
90 #define CHECK_STACK_LIMIT (-1)
91 #endif
93 /* Return index of given mode in mult and division cost tables. */
94 #define MODE_INDEX(mode) \
95 ((mode) == QImode ? 0 \
96 : (mode) == HImode ? 1 \
97 : (mode) == SImode ? 2 \
98 : (mode) == DImode ? 3 \
99 : 4)
101 /* Processor costs (relative to an add) */
102 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
103 #define COSTS_N_BYTES(N) ((N) * 2)
105 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
107 static stringop_algs ix86_size_memcpy[2] = {
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
110 static stringop_algs ix86_size_memset[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 const
115 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
116 COSTS_N_BYTES (2), /* cost of an add instruction */
117 COSTS_N_BYTES (3), /* cost of a lea instruction */
118 COSTS_N_BYTES (2), /* variable shift costs */
119 COSTS_N_BYTES (3), /* constant shift costs */
120 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
121 COSTS_N_BYTES (3), /* HI */
122 COSTS_N_BYTES (3), /* SI */
123 COSTS_N_BYTES (3), /* DI */
124 COSTS_N_BYTES (5)}, /* other */
125 0, /* cost of multiply per each bit set */
126 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
127 COSTS_N_BYTES (3), /* HI */
128 COSTS_N_BYTES (3), /* SI */
129 COSTS_N_BYTES (3), /* DI */
130 COSTS_N_BYTES (5)}, /* other */
131 COSTS_N_BYTES (3), /* cost of movsx */
132 COSTS_N_BYTES (3), /* cost of movzx */
133 0, /* "large" insn */
134 2, /* MOVE_RATIO */
135 2, /* cost for loading QImode using movzbl */
136 {2, 2, 2}, /* cost of loading integer registers
137 in QImode, HImode and SImode.
138 Relative to reg-reg move (2). */
139 {2, 2, 2}, /* cost of storing integer registers */
140 2, /* cost of reg,reg fld/fst */
141 {2, 2, 2}, /* cost of loading fp registers
142 in SFmode, DFmode and XFmode */
143 {2, 2, 2}, /* cost of storing fp registers
144 in SFmode, DFmode and XFmode */
145 3, /* cost of moving MMX register */
146 {3, 3}, /* cost of loading MMX registers
147 in SImode and DImode */
148 {3, 3}, /* cost of storing MMX registers
149 in SImode and DImode */
150 3, /* cost of moving SSE register */
151 {3, 3, 3}, /* cost of loading SSE registers
152 in SImode, DImode and TImode */
153 {3, 3, 3}, /* cost of storing SSE registers
154 in SImode, DImode and TImode */
155 3, /* MMX or SSE register to integer */
156 0, /* size of l1 cache */
157 0, /* size of l2 cache */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 2, /* Branch cost */
161 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
162 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
163 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
164 COSTS_N_BYTES (2), /* cost of FABS instruction. */
165 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
166 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 ix86_size_memcpy,
168 ix86_size_memset,
169 1, /* scalar_stmt_cost. */
170 1, /* scalar load_cost. */
171 1, /* scalar_store_cost. */
172 1, /* vec_stmt_cost. */
173 1, /* vec_to_scalar_cost. */
174 1, /* scalar_to_vec_cost. */
175 1, /* vec_align_load_cost. */
176 1, /* vec_unalign_load_cost. */
177 1, /* vec_store_cost. */
178 1, /* cond_taken_branch_cost. */
179 1, /* cond_not_taken_branch_cost. */
182 /* Processor costs (relative to an add) */
183 static stringop_algs i386_memcpy[2] = {
184 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
185 DUMMY_STRINGOP_ALGS};
186 static stringop_algs i386_memset[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
190 static const
191 struct processor_costs i386_cost = { /* 386 specific costs */
192 COSTS_N_INSNS (1), /* cost of an add instruction */
193 COSTS_N_INSNS (1), /* cost of a lea instruction */
194 COSTS_N_INSNS (3), /* variable shift costs */
195 COSTS_N_INSNS (2), /* constant shift costs */
196 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
197 COSTS_N_INSNS (6), /* HI */
198 COSTS_N_INSNS (6), /* SI */
199 COSTS_N_INSNS (6), /* DI */
200 COSTS_N_INSNS (6)}, /* other */
201 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
202 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
203 COSTS_N_INSNS (23), /* HI */
204 COSTS_N_INSNS (23), /* SI */
205 COSTS_N_INSNS (23), /* DI */
206 COSTS_N_INSNS (23)}, /* other */
207 COSTS_N_INSNS (3), /* cost of movsx */
208 COSTS_N_INSNS (2), /* cost of movzx */
209 15, /* "large" insn */
210 3, /* MOVE_RATIO */
211 4, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {8, 8, 8}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {8, 8, 8}, /* cost of storing fp registers
220 in SFmode, DFmode and XFmode */
221 2, /* cost of moving MMX register */
222 {4, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {4, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of l1 cache */
233 0, /* size of l2 cache */
234 0, /* size of prefetch block */
235 0, /* number of parallel prefetches */
236 1, /* Branch cost */
237 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
238 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
239 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
240 COSTS_N_INSNS (22), /* cost of FABS instruction. */
241 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
242 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 i386_memcpy,
244 i386_memset,
245 1, /* scalar_stmt_cost. */
246 1, /* scalar load_cost. */
247 1, /* scalar_store_cost. */
248 1, /* vec_stmt_cost. */
249 1, /* vec_to_scalar_cost. */
250 1, /* scalar_to_vec_cost. */
251 1, /* vec_align_load_cost. */
252 2, /* vec_unalign_load_cost. */
253 1, /* vec_store_cost. */
254 3, /* cond_taken_branch_cost. */
255 1, /* cond_not_taken_branch_cost. */
258 static stringop_algs i486_memcpy[2] = {
259 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
260 DUMMY_STRINGOP_ALGS};
261 static stringop_algs i486_memset[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
265 static const
266 struct processor_costs i486_cost = { /* 486 specific costs */
267 COSTS_N_INSNS (1), /* cost of an add instruction */
268 COSTS_N_INSNS (1), /* cost of a lea instruction */
269 COSTS_N_INSNS (3), /* variable shift costs */
270 COSTS_N_INSNS (2), /* constant shift costs */
271 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
272 COSTS_N_INSNS (12), /* HI */
273 COSTS_N_INSNS (12), /* SI */
274 COSTS_N_INSNS (12), /* DI */
275 COSTS_N_INSNS (12)}, /* other */
276 1, /* cost of multiply per each bit set */
277 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
278 COSTS_N_INSNS (40), /* HI */
279 COSTS_N_INSNS (40), /* SI */
280 COSTS_N_INSNS (40), /* DI */
281 COSTS_N_INSNS (40)}, /* other */
282 COSTS_N_INSNS (3), /* cost of movsx */
283 COSTS_N_INSNS (2), /* cost of movzx */
284 15, /* "large" insn */
285 3, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {2, 4, 2}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 4, 2}, /* cost of storing integer registers */
291 2, /* cost of reg,reg fld/fst */
292 {8, 8, 8}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {8, 8, 8}, /* cost of storing fp registers
295 in SFmode, DFmode and XFmode */
296 2, /* cost of moving MMX register */
297 {4, 8}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {4, 8}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {4, 8, 16}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {4, 8, 16}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 3, /* MMX or SSE register to integer */
307 4, /* size of l1 cache. 486 has 8kB cache
308 shared for code and data, so 4kB is
309 not really precise. */
310 4, /* size of l2 cache */
311 0, /* size of prefetch block */
312 0, /* number of parallel prefetches */
313 1, /* Branch cost */
314 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
315 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
316 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
317 COSTS_N_INSNS (3), /* cost of FABS instruction. */
318 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
319 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 i486_memcpy,
321 i486_memset,
322 1, /* scalar_stmt_cost. */
323 1, /* scalar load_cost. */
324 1, /* scalar_store_cost. */
325 1, /* vec_stmt_cost. */
326 1, /* vec_to_scalar_cost. */
327 1, /* scalar_to_vec_cost. */
328 1, /* vec_align_load_cost. */
329 2, /* vec_unalign_load_cost. */
330 1, /* vec_store_cost. */
331 3, /* cond_taken_branch_cost. */
332 1, /* cond_not_taken_branch_cost. */
335 static stringop_algs pentium_memcpy[2] = {
336 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
337 DUMMY_STRINGOP_ALGS};
338 static stringop_algs pentium_memset[2] = {
339 {libcall, {{-1, rep_prefix_4_byte, false}}},
340 DUMMY_STRINGOP_ALGS};
342 static const
343 struct processor_costs pentium_cost = {
344 COSTS_N_INSNS (1), /* cost of an add instruction */
345 COSTS_N_INSNS (1), /* cost of a lea instruction */
346 COSTS_N_INSNS (4), /* variable shift costs */
347 COSTS_N_INSNS (1), /* constant shift costs */
348 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
349 COSTS_N_INSNS (11), /* HI */
350 COSTS_N_INSNS (11), /* SI */
351 COSTS_N_INSNS (11), /* DI */
352 COSTS_N_INSNS (11)}, /* other */
353 0, /* cost of multiply per each bit set */
354 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
355 COSTS_N_INSNS (25), /* HI */
356 COSTS_N_INSNS (25), /* SI */
357 COSTS_N_INSNS (25), /* DI */
358 COSTS_N_INSNS (25)}, /* other */
359 COSTS_N_INSNS (3), /* cost of movsx */
360 COSTS_N_INSNS (2), /* cost of movzx */
361 8, /* "large" insn */
362 6, /* MOVE_RATIO */
363 6, /* cost for loading QImode using movzbl */
364 {2, 4, 2}, /* cost of loading integer registers
365 in QImode, HImode and SImode.
366 Relative to reg-reg move (2). */
367 {2, 4, 2}, /* cost of storing integer registers */
368 2, /* cost of reg,reg fld/fst */
369 {2, 2, 6}, /* cost of loading fp registers
370 in SFmode, DFmode and XFmode */
371 {4, 4, 6}, /* cost of storing fp registers
372 in SFmode, DFmode and XFmode */
373 8, /* cost of moving MMX register */
374 {8, 8}, /* cost of loading MMX registers
375 in SImode and DImode */
376 {8, 8}, /* cost of storing MMX registers
377 in SImode and DImode */
378 2, /* cost of moving SSE register */
379 {4, 8, 16}, /* cost of loading SSE registers
380 in SImode, DImode and TImode */
381 {4, 8, 16}, /* cost of storing SSE registers
382 in SImode, DImode and TImode */
383 3, /* MMX or SSE register to integer */
384 8, /* size of l1 cache. */
385 8, /* size of l2 cache */
386 0, /* size of prefetch block */
387 0, /* number of parallel prefetches */
388 2, /* Branch cost */
389 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
390 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
391 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
392 COSTS_N_INSNS (1), /* cost of FABS instruction. */
393 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
394 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 pentium_memcpy,
396 pentium_memset,
397 1, /* scalar_stmt_cost. */
398 1, /* scalar load_cost. */
399 1, /* scalar_store_cost. */
400 1, /* vec_stmt_cost. */
401 1, /* vec_to_scalar_cost. */
402 1, /* scalar_to_vec_cost. */
403 1, /* vec_align_load_cost. */
404 2, /* vec_unalign_load_cost. */
405 1, /* vec_store_cost. */
406 3, /* cond_taken_branch_cost. */
407 1, /* cond_not_taken_branch_cost. */
410 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
411 (we ensure the alignment). For small blocks inline loop is still a
412 noticeable win, for bigger blocks either rep movsl or rep movsb is
413 way to go. Rep movsb has apparently more expensive startup time in CPU,
414 but after 4K the difference is down in the noise. */
415 static stringop_algs pentiumpro_memcpy[2] = {
416 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
417 {8192, rep_prefix_4_byte, false},
418 {-1, rep_prefix_1_byte, false}}},
419 DUMMY_STRINGOP_ALGS};
420 static stringop_algs pentiumpro_memset[2] = {
421 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
422 {8192, rep_prefix_4_byte, false},
423 {-1, libcall, false}}},
424 DUMMY_STRINGOP_ALGS};
425 static const
426 struct processor_costs pentiumpro_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (1), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (4), /* HI */
433 COSTS_N_INSNS (4), /* SI */
434 COSTS_N_INSNS (4), /* DI */
435 COSTS_N_INSNS (4)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (17), /* HI */
439 COSTS_N_INSNS (17), /* SI */
440 COSTS_N_INSNS (17), /* DI */
441 COSTS_N_INSNS (17)}, /* other */
442 COSTS_N_INSNS (1), /* cost of movsx */
443 COSTS_N_INSNS (1), /* cost of movzx */
444 8, /* "large" insn */
445 6, /* MOVE_RATIO */
446 2, /* cost for loading QImode using movzbl */
447 {4, 4, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 2, 2}, /* cost of storing integer registers */
451 2, /* cost of reg,reg fld/fst */
452 {2, 2, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 6}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 3, /* MMX or SSE register to integer */
467 8, /* size of l1 cache. */
468 256, /* size of l2 cache */
469 32, /* size of prefetch block */
470 6, /* number of parallel prefetches */
471 2, /* Branch cost */
472 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
473 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
474 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
475 COSTS_N_INSNS (2), /* cost of FABS instruction. */
476 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
477 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
478 pentiumpro_memcpy,
479 pentiumpro_memset,
480 1, /* scalar_stmt_cost. */
481 1, /* scalar load_cost. */
482 1, /* scalar_store_cost. */
483 1, /* vec_stmt_cost. */
484 1, /* vec_to_scalar_cost. */
485 1, /* scalar_to_vec_cost. */
486 1, /* vec_align_load_cost. */
487 2, /* vec_unalign_load_cost. */
488 1, /* vec_store_cost. */
489 3, /* cond_taken_branch_cost. */
490 1, /* cond_not_taken_branch_cost. */
493 static stringop_algs geode_memcpy[2] = {
494 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
495 DUMMY_STRINGOP_ALGS};
496 static stringop_algs geode_memset[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static const
500 struct processor_costs geode_cost = {
501 COSTS_N_INSNS (1), /* cost of an add instruction */
502 COSTS_N_INSNS (1), /* cost of a lea instruction */
503 COSTS_N_INSNS (2), /* variable shift costs */
504 COSTS_N_INSNS (1), /* constant shift costs */
505 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
506 COSTS_N_INSNS (4), /* HI */
507 COSTS_N_INSNS (7), /* SI */
508 COSTS_N_INSNS (7), /* DI */
509 COSTS_N_INSNS (7)}, /* other */
510 0, /* cost of multiply per each bit set */
511 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
512 COSTS_N_INSNS (23), /* HI */
513 COSTS_N_INSNS (39), /* SI */
514 COSTS_N_INSNS (39), /* DI */
515 COSTS_N_INSNS (39)}, /* other */
516 COSTS_N_INSNS (1), /* cost of movsx */
517 COSTS_N_INSNS (1), /* cost of movzx */
518 8, /* "large" insn */
519 4, /* MOVE_RATIO */
520 1, /* cost for loading QImode using movzbl */
521 {1, 1, 1}, /* cost of loading integer registers
522 in QImode, HImode and SImode.
523 Relative to reg-reg move (2). */
524 {1, 1, 1}, /* cost of storing integer registers */
525 1, /* cost of reg,reg fld/fst */
526 {1, 1, 1}, /* cost of loading fp registers
527 in SFmode, DFmode and XFmode */
528 {4, 6, 6}, /* cost of storing fp registers
529 in SFmode, DFmode and XFmode */
531 1, /* cost of moving MMX register */
532 {1, 1}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {1, 1}, /* cost of storing MMX registers
535 in SImode and DImode */
536 1, /* cost of moving SSE register */
537 {1, 1, 1}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {1, 1, 1}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 1, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 128, /* size of l2 cache. */
544 32, /* size of prefetch block */
545 1, /* number of parallel prefetches */
546 1, /* Branch cost */
547 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (1), /* cost of FABS instruction. */
551 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
553 geode_memcpy,
554 geode_memset,
555 1, /* scalar_stmt_cost. */
556 1, /* scalar load_cost. */
557 1, /* scalar_store_cost. */
558 1, /* vec_stmt_cost. */
559 1, /* vec_to_scalar_cost. */
560 1, /* scalar_to_vec_cost. */
561 1, /* vec_align_load_cost. */
562 2, /* vec_unalign_load_cost. */
563 1, /* vec_store_cost. */
564 3, /* cond_taken_branch_cost. */
565 1, /* cond_not_taken_branch_cost. */
568 static stringop_algs k6_memcpy[2] = {
569 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
570 DUMMY_STRINGOP_ALGS};
571 static stringop_algs k6_memset[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static const
575 struct processor_costs k6_cost = {
576 COSTS_N_INSNS (1), /* cost of an add instruction */
577 COSTS_N_INSNS (2), /* cost of a lea instruction */
578 COSTS_N_INSNS (1), /* variable shift costs */
579 COSTS_N_INSNS (1), /* constant shift costs */
580 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
581 COSTS_N_INSNS (3), /* HI */
582 COSTS_N_INSNS (3), /* SI */
583 COSTS_N_INSNS (3), /* DI */
584 COSTS_N_INSNS (3)}, /* other */
585 0, /* cost of multiply per each bit set */
586 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
587 COSTS_N_INSNS (18), /* HI */
588 COSTS_N_INSNS (18), /* SI */
589 COSTS_N_INSNS (18), /* DI */
590 COSTS_N_INSNS (18)}, /* other */
591 COSTS_N_INSNS (2), /* cost of movsx */
592 COSTS_N_INSNS (2), /* cost of movzx */
593 8, /* "large" insn */
594 4, /* MOVE_RATIO */
595 3, /* cost for loading QImode using movzbl */
596 {4, 5, 4}, /* cost of loading integer registers
597 in QImode, HImode and SImode.
598 Relative to reg-reg move (2). */
599 {2, 3, 2}, /* cost of storing integer registers */
600 4, /* cost of reg,reg fld/fst */
601 {6, 6, 6}, /* cost of loading fp registers
602 in SFmode, DFmode and XFmode */
603 {4, 4, 4}, /* cost of storing fp registers
604 in SFmode, DFmode and XFmode */
605 2, /* cost of moving MMX register */
606 {2, 2}, /* cost of loading MMX registers
607 in SImode and DImode */
608 {2, 2}, /* cost of storing MMX registers
609 in SImode and DImode */
610 2, /* cost of moving SSE register */
611 {2, 2, 8}, /* cost of loading SSE registers
612 in SImode, DImode and TImode */
613 {2, 2, 8}, /* cost of storing SSE registers
614 in SImode, DImode and TImode */
615 6, /* MMX or SSE register to integer */
616 32, /* size of l1 cache. */
617 32, /* size of l2 cache. Some models
618 have integrated l2 cache, but
619 optimizing for k6 is not important
620 enough to worry about that. */
621 32, /* size of prefetch block */
622 1, /* number of parallel prefetches */
623 1, /* Branch cost */
624 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
625 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
626 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
627 COSTS_N_INSNS (2), /* cost of FABS instruction. */
628 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
629 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
630 k6_memcpy,
631 k6_memset,
632 1, /* scalar_stmt_cost. */
633 1, /* scalar load_cost. */
634 1, /* scalar_store_cost. */
635 1, /* vec_stmt_cost. */
636 1, /* vec_to_scalar_cost. */
637 1, /* scalar_to_vec_cost. */
638 1, /* vec_align_load_cost. */
639 2, /* vec_unalign_load_cost. */
640 1, /* vec_store_cost. */
641 3, /* cond_taken_branch_cost. */
642 1, /* cond_not_taken_branch_cost. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 static stringop_algs athlon_memcpy[2] = {
649 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651 static stringop_algs athlon_memset[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static const
655 struct processor_costs athlon_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (2), /* cost of a lea instruction */
658 COSTS_N_INSNS (1), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (5), /* HI */
662 COSTS_N_INSNS (5), /* SI */
663 COSTS_N_INSNS (5), /* DI */
664 COSTS_N_INSNS (5)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (26), /* HI */
668 COSTS_N_INSNS (42), /* SI */
669 COSTS_N_INSNS (74), /* DI */
670 COSTS_N_INSNS (74)}, /* other */
671 COSTS_N_INSNS (1), /* cost of movsx */
672 COSTS_N_INSNS (1), /* cost of movzx */
673 8, /* "large" insn */
674 9, /* MOVE_RATIO */
675 4, /* cost for loading QImode using movzbl */
676 {3, 4, 3}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {3, 4, 3}, /* cost of storing integer registers */
680 4, /* cost of reg,reg fld/fst */
681 {4, 4, 12}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {6, 6, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 4}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 4}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 4, 6}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 4, 5}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 5, /* MMX or SSE register to integer */
696 64, /* size of l1 cache. */
697 256, /* size of l2 cache. */
698 64, /* size of prefetch block */
699 6, /* number of parallel prefetches */
700 5, /* Branch cost */
701 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (2), /* cost of FABS instruction. */
705 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
707 athlon_memcpy,
708 athlon_memset,
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
722 /* K8 has optimized REP instruction for medium sized blocks, but for very
723 small blocks it is better to use loop. For large blocks, libcall can
724 do nontemporary accesses and beat inline considerably. */
725 static stringop_algs k8_memcpy[2] = {
726 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
727 {-1, rep_prefix_4_byte, false}}},
728 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
729 {-1, libcall, false}}}};
730 static stringop_algs k8_memset[2] = {
731 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
732 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
733 {libcall, {{48, unrolled_loop, false},
734 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
735 static const
736 struct processor_costs k8_cost = {
737 COSTS_N_INSNS (1), /* cost of an add instruction */
738 COSTS_N_INSNS (2), /* cost of a lea instruction */
739 COSTS_N_INSNS (1), /* variable shift costs */
740 COSTS_N_INSNS (1), /* constant shift costs */
741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
742 COSTS_N_INSNS (4), /* HI */
743 COSTS_N_INSNS (3), /* SI */
744 COSTS_N_INSNS (4), /* DI */
745 COSTS_N_INSNS (5)}, /* other */
746 0, /* cost of multiply per each bit set */
747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
748 COSTS_N_INSNS (26), /* HI */
749 COSTS_N_INSNS (42), /* SI */
750 COSTS_N_INSNS (74), /* DI */
751 COSTS_N_INSNS (74)}, /* other */
752 COSTS_N_INSNS (1), /* cost of movsx */
753 COSTS_N_INSNS (1), /* cost of movzx */
754 8, /* "large" insn */
755 9, /* MOVE_RATIO */
756 4, /* cost for loading QImode using movzbl */
757 {3, 4, 3}, /* cost of loading integer registers
758 in QImode, HImode and SImode.
759 Relative to reg-reg move (2). */
760 {3, 4, 3}, /* cost of storing integer registers */
761 4, /* cost of reg,reg fld/fst */
762 {4, 4, 12}, /* cost of loading fp registers
763 in SFmode, DFmode and XFmode */
764 {6, 6, 8}, /* cost of storing fp registers
765 in SFmode, DFmode and XFmode */
766 2, /* cost of moving MMX register */
767 {3, 3}, /* cost of loading MMX registers
768 in SImode and DImode */
769 {4, 4}, /* cost of storing MMX registers
770 in SImode and DImode */
771 2, /* cost of moving SSE register */
772 {4, 3, 6}, /* cost of loading SSE registers
773 in SImode, DImode and TImode */
774 {4, 4, 5}, /* cost of storing SSE registers
775 in SImode, DImode and TImode */
776 5, /* MMX or SSE register to integer */
777 64, /* size of l1 cache. */
778 512, /* size of l2 cache. */
779 64, /* size of prefetch block */
780 /* New AMD processors never drop prefetches; if they cannot be performed
781 immediately, they are queued. We set number of simultaneous prefetches
782 to a large constant to reflect this (it probably is not a good idea not
783 to limit number of prefetches at all, as their execution also takes some
784 time). */
785 100, /* number of parallel prefetches */
786 3, /* Branch cost */
787 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
788 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
789 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
790 COSTS_N_INSNS (2), /* cost of FABS instruction. */
791 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
792 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
794 k8_memcpy,
795 k8_memset,
796 4, /* scalar_stmt_cost. */
797 2, /* scalar load_cost. */
798 2, /* scalar_store_cost. */
799 5, /* vec_stmt_cost. */
800 0, /* vec_to_scalar_cost. */
801 2, /* scalar_to_vec_cost. */
802 2, /* vec_align_load_cost. */
803 3, /* vec_unalign_load_cost. */
804 3, /* vec_store_cost. */
805 3, /* cond_taken_branch_cost. */
806 2, /* cond_not_taken_branch_cost. */
809 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
810 very small blocks it is better to use loop. For large blocks, libcall can
811 do nontemporary accesses and beat inline considerably. */
812 static stringop_algs amdfam10_memcpy[2] = {
813 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
814 {-1, rep_prefix_4_byte, false}}},
815 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
816 {-1, libcall, false}}}};
817 static stringop_algs amdfam10_memset[2] = {
818 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
819 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
820 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
821 {-1, libcall, false}}}};
822 struct processor_costs amdfam10_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 512, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 amdfam10_memcpy,
889 amdfam10_memset,
890 4, /* scalar_stmt_cost. */
891 2, /* scalar load_cost. */
892 2, /* scalar_store_cost. */
893 6, /* vec_stmt_cost. */
894 0, /* vec_to_scalar_cost. */
895 2, /* scalar_to_vec_cost. */
896 2, /* vec_align_load_cost. */
897 2, /* vec_unalign_load_cost. */
898 2, /* vec_store_cost. */
899 2, /* cond_taken_branch_cost. */
900 1, /* cond_not_taken_branch_cost. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 static stringop_algs bdver1_memcpy[2] = {
907 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
908 {-1, rep_prefix_4_byte, false}}},
909 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
910 {-1, libcall, false}}}};
911 static stringop_algs bdver1_memset[2] = {
912 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
914 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
915 {-1, libcall, false}}}};
917 const struct processor_costs bdver1_cost = {
918 COSTS_N_INSNS (1), /* cost of an add instruction */
919 COSTS_N_INSNS (1), /* cost of a lea instruction */
920 COSTS_N_INSNS (1), /* variable shift costs */
921 COSTS_N_INSNS (1), /* constant shift costs */
922 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
923 COSTS_N_INSNS (4), /* HI */
924 COSTS_N_INSNS (4), /* SI */
925 COSTS_N_INSNS (6), /* DI */
926 COSTS_N_INSNS (6)}, /* other */
927 0, /* cost of multiply per each bit set */
928 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
929 COSTS_N_INSNS (35), /* HI */
930 COSTS_N_INSNS (51), /* SI */
931 COSTS_N_INSNS (83), /* DI */
932 COSTS_N_INSNS (83)}, /* other */
933 COSTS_N_INSNS (1), /* cost of movsx */
934 COSTS_N_INSNS (1), /* cost of movzx */
935 8, /* "large" insn */
936 9, /* MOVE_RATIO */
937 4, /* cost for loading QImode using movzbl */
938 {5, 5, 4}, /* cost of loading integer registers
939 in QImode, HImode and SImode.
940 Relative to reg-reg move (2). */
941 {4, 4, 4}, /* cost of storing integer registers */
942 2, /* cost of reg,reg fld/fst */
943 {5, 5, 12}, /* cost of loading fp registers
944 in SFmode, DFmode and XFmode */
945 {4, 4, 8}, /* cost of storing fp registers
946 in SFmode, DFmode and XFmode */
947 2, /* cost of moving MMX register */
948 {4, 4}, /* cost of loading MMX registers
949 in SImode and DImode */
950 {4, 4}, /* cost of storing MMX registers
951 in SImode and DImode */
952 2, /* cost of moving SSE register */
953 {4, 4, 4}, /* cost of loading SSE registers
954 in SImode, DImode and TImode */
955 {4, 4, 4}, /* cost of storing SSE registers
956 in SImode, DImode and TImode */
957 2, /* MMX or SSE register to integer */
958 /* On K8:
959 MOVD reg64, xmmreg Double FSTORE 4
960 MOVD reg32, xmmreg Double FSTORE 4
961 On AMDFAM10:
962 MOVD reg64, xmmreg Double FADD 3
963 1/1 1/1
964 MOVD reg32, xmmreg Double FADD 3
965 1/1 1/1 */
966 16, /* size of l1 cache. */
967 2048, /* size of l2 cache. */
968 64, /* size of prefetch block */
969 /* New AMD processors never drop prefetches; if they cannot be performed
970 immediately, they are queued. We set number of simultaneous prefetches
971 to a large constant to reflect this (it probably is not a good idea not
972 to limit number of prefetches at all, as their execution also takes some
973 time). */
974 100, /* number of parallel prefetches */
975 2, /* Branch cost */
976 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
977 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
978 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
979 COSTS_N_INSNS (2), /* cost of FABS instruction. */
980 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
981 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
983 bdver1_memcpy,
984 bdver1_memset,
985 6, /* scalar_stmt_cost. */
986 4, /* scalar load_cost. */
987 4, /* scalar_store_cost. */
988 6, /* vec_stmt_cost. */
989 0, /* vec_to_scalar_cost. */
990 2, /* scalar_to_vec_cost. */
991 4, /* vec_align_load_cost. */
992 4, /* vec_unalign_load_cost. */
993 4, /* vec_store_cost. */
994 2, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
998 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
999 very small blocks it is better to use loop. For large blocks, libcall
1000 can do nontemporary accesses and beat inline considerably. */
1002 static stringop_algs bdver2_memcpy[2] = {
1003 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1004 {-1, rep_prefix_4_byte, false}}},
1005 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1006 {-1, libcall, false}}}};
1007 static stringop_algs bdver2_memset[2] = {
1008 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1009 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1010 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1011 {-1, libcall, false}}}};
1013 const struct processor_costs bdver2_cost = {
1014 COSTS_N_INSNS (1), /* cost of an add instruction */
1015 COSTS_N_INSNS (1), /* cost of a lea instruction */
1016 COSTS_N_INSNS (1), /* variable shift costs */
1017 COSTS_N_INSNS (1), /* constant shift costs */
1018 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1019 COSTS_N_INSNS (4), /* HI */
1020 COSTS_N_INSNS (4), /* SI */
1021 COSTS_N_INSNS (6), /* DI */
1022 COSTS_N_INSNS (6)}, /* other */
1023 0, /* cost of multiply per each bit set */
1024 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1025 COSTS_N_INSNS (35), /* HI */
1026 COSTS_N_INSNS (51), /* SI */
1027 COSTS_N_INSNS (83), /* DI */
1028 COSTS_N_INSNS (83)}, /* other */
1029 COSTS_N_INSNS (1), /* cost of movsx */
1030 COSTS_N_INSNS (1), /* cost of movzx */
1031 8, /* "large" insn */
1032 9, /* MOVE_RATIO */
1033 4, /* cost for loading QImode using movzbl */
1034 {5, 5, 4}, /* cost of loading integer registers
1035 in QImode, HImode and SImode.
1036 Relative to reg-reg move (2). */
1037 {4, 4, 4}, /* cost of storing integer registers */
1038 2, /* cost of reg,reg fld/fst */
1039 {5, 5, 12}, /* cost of loading fp registers
1040 in SFmode, DFmode and XFmode */
1041 {4, 4, 8}, /* cost of storing fp registers
1042 in SFmode, DFmode and XFmode */
1043 2, /* cost of moving MMX register */
1044 {4, 4}, /* cost of loading MMX registers
1045 in SImode and DImode */
1046 {4, 4}, /* cost of storing MMX registers
1047 in SImode and DImode */
1048 2, /* cost of moving SSE register */
1049 {4, 4, 4}, /* cost of loading SSE registers
1050 in SImode, DImode and TImode */
1051 {4, 4, 4}, /* cost of storing SSE registers
1052 in SImode, DImode and TImode */
1053 2, /* MMX or SSE register to integer */
1054 /* On K8:
1055 MOVD reg64, xmmreg Double FSTORE 4
1056 MOVD reg32, xmmreg Double FSTORE 4
1057 On AMDFAM10:
1058 MOVD reg64, xmmreg Double FADD 3
1059 1/1 1/1
1060 MOVD reg32, xmmreg Double FADD 3
1061 1/1 1/1 */
1062 16, /* size of l1 cache. */
1063 2048, /* size of l2 cache. */
1064 64, /* size of prefetch block */
1065 /* New AMD processors never drop prefetches; if they cannot be performed
1066 immediately, they are queued. We set number of simultaneous prefetches
1067 to a large constant to reflect this (it probably is not a good idea not
1068 to limit number of prefetches at all, as their execution also takes some
1069 time). */
1070 100, /* number of parallel prefetches */
1071 2, /* Branch cost */
1072 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1073 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1074 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1075 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1076 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1077 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1079 bdver2_memcpy,
1080 bdver2_memset,
1081 6, /* scalar_stmt_cost. */
1082 4, /* scalar load_cost. */
1083 4, /* scalar_store_cost. */
1084 6, /* vec_stmt_cost. */
1085 0, /* vec_to_scalar_cost. */
1086 2, /* scalar_to_vec_cost. */
1087 4, /* vec_align_load_cost. */
1088 4, /* vec_unalign_load_cost. */
1089 4, /* vec_store_cost. */
1090 2, /* cond_taken_branch_cost. */
1091 1, /* cond_not_taken_branch_cost. */
1095 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1096 very small blocks it is better to use loop. For large blocks, libcall
1097 can do nontemporary accesses and beat inline considerably. */
1098 static stringop_algs bdver3_memcpy[2] = {
1099 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1100 {-1, rep_prefix_4_byte, false}}},
1101 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1102 {-1, libcall, false}}}};
1103 static stringop_algs bdver3_memset[2] = {
1104 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1105 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1106 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1107 {-1, libcall, false}}}};
1108 struct processor_costs bdver3_cost = {
1109 COSTS_N_INSNS (1), /* cost of an add instruction */
1110 COSTS_N_INSNS (1), /* cost of a lea instruction */
1111 COSTS_N_INSNS (1), /* variable shift costs */
1112 COSTS_N_INSNS (1), /* constant shift costs */
1113 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1114 COSTS_N_INSNS (4), /* HI */
1115 COSTS_N_INSNS (4), /* SI */
1116 COSTS_N_INSNS (6), /* DI */
1117 COSTS_N_INSNS (6)}, /* other */
1118 0, /* cost of multiply per each bit set */
1119 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1120 COSTS_N_INSNS (35), /* HI */
1121 COSTS_N_INSNS (51), /* SI */
1122 COSTS_N_INSNS (83), /* DI */
1123 COSTS_N_INSNS (83)}, /* other */
1124 COSTS_N_INSNS (1), /* cost of movsx */
1125 COSTS_N_INSNS (1), /* cost of movzx */
1126 8, /* "large" insn */
1127 9, /* MOVE_RATIO */
1128 4, /* cost for loading QImode using movzbl */
1129 {5, 5, 4}, /* cost of loading integer registers
1130 in QImode, HImode and SImode.
1131 Relative to reg-reg move (2). */
1132 {4, 4, 4}, /* cost of storing integer registers */
1133 2, /* cost of reg,reg fld/fst */
1134 {5, 5, 12}, /* cost of loading fp registers
1135 in SFmode, DFmode and XFmode */
1136 {4, 4, 8}, /* cost of storing fp registers
1137 in SFmode, DFmode and XFmode */
1138 2, /* cost of moving MMX register */
1139 {4, 4}, /* cost of loading MMX registers
1140 in SImode and DImode */
1141 {4, 4}, /* cost of storing MMX registers
1142 in SImode and DImode */
1143 2, /* cost of moving SSE register */
1144 {4, 4, 4}, /* cost of loading SSE registers
1145 in SImode, DImode and TImode */
1146 {4, 4, 4}, /* cost of storing SSE registers
1147 in SImode, DImode and TImode */
1148 2, /* MMX or SSE register to integer */
1149 16, /* size of l1 cache. */
1150 2048, /* size of l2 cache. */
1151 64, /* size of prefetch block */
1152 /* New AMD processors never drop prefetches; if they cannot be performed
1153 immediately, they are queued. We set number of simultaneous prefetches
1154 to a large constant to reflect this (it probably is not a good idea not
1155 to limit number of prefetches at all, as their execution also takes some
1156 time). */
1157 100, /* number of parallel prefetches */
1158 2, /* Branch cost */
1159 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1166 bdver3_memcpy,
1167 bdver3_memset,
1168 6, /* scalar_stmt_cost. */
1169 4, /* scalar load_cost. */
1170 4, /* scalar_store_cost. */
1171 6, /* vec_stmt_cost. */
1172 0, /* vec_to_scalar_cost. */
1173 2, /* scalar_to_vec_cost. */
1174 4, /* vec_align_load_cost. */
1175 4, /* vec_unalign_load_cost. */
1176 4, /* vec_store_cost. */
1177 2, /* cond_taken_branch_cost. */
1178 1, /* cond_not_taken_branch_cost. */
1181 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1182 very small blocks it is better to use loop. For large blocks, libcall
1183 can do nontemporary accesses and beat inline considerably. */
1184 static stringop_algs bdver4_memcpy[2] = {
1185 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1186 {-1, rep_prefix_4_byte, false}}},
1187 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1188 {-1, libcall, false}}}};
1189 static stringop_algs bdver4_memset[2] = {
1190 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1191 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1192 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1193 {-1, libcall, false}}}};
1194 struct processor_costs bdver4_cost = {
1195 COSTS_N_INSNS (1), /* cost of an add instruction */
1196 COSTS_N_INSNS (1), /* cost of a lea instruction */
1197 COSTS_N_INSNS (1), /* variable shift costs */
1198 COSTS_N_INSNS (1), /* constant shift costs */
1199 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1200 COSTS_N_INSNS (4), /* HI */
1201 COSTS_N_INSNS (4), /* SI */
1202 COSTS_N_INSNS (6), /* DI */
1203 COSTS_N_INSNS (6)}, /* other */
1204 0, /* cost of multiply per each bit set */
1205 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1206 COSTS_N_INSNS (35), /* HI */
1207 COSTS_N_INSNS (51), /* SI */
1208 COSTS_N_INSNS (83), /* DI */
1209 COSTS_N_INSNS (83)}, /* other */
1210 COSTS_N_INSNS (1), /* cost of movsx */
1211 COSTS_N_INSNS (1), /* cost of movzx */
1212 8, /* "large" insn */
1213 9, /* MOVE_RATIO */
1214 4, /* cost for loading QImode using movzbl */
1215 {5, 5, 4}, /* cost of loading integer registers
1216 in QImode, HImode and SImode.
1217 Relative to reg-reg move (2). */
1218 {4, 4, 4}, /* cost of storing integer registers */
1219 2, /* cost of reg,reg fld/fst */
1220 {5, 5, 12}, /* cost of loading fp registers
1221 in SFmode, DFmode and XFmode */
1222 {4, 4, 8}, /* cost of storing fp registers
1223 in SFmode, DFmode and XFmode */
1224 2, /* cost of moving MMX register */
1225 {4, 4}, /* cost of loading MMX registers
1226 in SImode and DImode */
1227 {4, 4}, /* cost of storing MMX registers
1228 in SImode and DImode */
1229 2, /* cost of moving SSE register */
1230 {4, 4, 4}, /* cost of loading SSE registers
1231 in SImode, DImode and TImode */
1232 {4, 4, 4}, /* cost of storing SSE registers
1233 in SImode, DImode and TImode */
1234 2, /* MMX or SSE register to integer */
1235 16, /* size of l1 cache. */
1236 2048, /* size of l2 cache. */
1237 64, /* size of prefetch block */
1238 /* New AMD processors never drop prefetches; if they cannot be performed
1239 immediately, they are queued. We set number of simultaneous prefetches
1240 to a large constant to reflect this (it probably is not a good idea not
1241 to limit number of prefetches at all, as their execution also takes some
1242 time). */
1243 100, /* number of parallel prefetches */
1244 2, /* Branch cost */
1245 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1246 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1247 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1248 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1249 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1250 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1252 bdver4_memcpy,
1253 bdver4_memset,
1254 6, /* scalar_stmt_cost. */
1255 4, /* scalar load_cost. */
1256 4, /* scalar_store_cost. */
1257 6, /* vec_stmt_cost. */
1258 0, /* vec_to_scalar_cost. */
1259 2, /* scalar_to_vec_cost. */
1260 4, /* vec_align_load_cost. */
1261 4, /* vec_unalign_load_cost. */
1262 4, /* vec_store_cost. */
1263 2, /* cond_taken_branch_cost. */
1264 1, /* cond_not_taken_branch_cost. */
1267 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1268 very small blocks it is better to use loop. For large blocks, libcall can
1269 do nontemporary accesses and beat inline considerably. */
1270 static stringop_algs btver1_memcpy[2] = {
1271 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1272 {-1, rep_prefix_4_byte, false}}},
1273 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1274 {-1, libcall, false}}}};
1275 static stringop_algs btver1_memset[2] = {
1276 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1277 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1278 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1279 {-1, libcall, false}}}};
1280 const struct processor_costs btver1_cost = {
1281 COSTS_N_INSNS (1), /* cost of an add instruction */
1282 COSTS_N_INSNS (2), /* cost of a lea instruction */
1283 COSTS_N_INSNS (1), /* variable shift costs */
1284 COSTS_N_INSNS (1), /* constant shift costs */
1285 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1286 COSTS_N_INSNS (4), /* HI */
1287 COSTS_N_INSNS (3), /* SI */
1288 COSTS_N_INSNS (4), /* DI */
1289 COSTS_N_INSNS (5)}, /* other */
1290 0, /* cost of multiply per each bit set */
1291 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1292 COSTS_N_INSNS (35), /* HI */
1293 COSTS_N_INSNS (51), /* SI */
1294 COSTS_N_INSNS (83), /* DI */
1295 COSTS_N_INSNS (83)}, /* other */
1296 COSTS_N_INSNS (1), /* cost of movsx */
1297 COSTS_N_INSNS (1), /* cost of movzx */
1298 8, /* "large" insn */
1299 9, /* MOVE_RATIO */
1300 4, /* cost for loading QImode using movzbl */
1301 {3, 4, 3}, /* cost of loading integer registers
1302 in QImode, HImode and SImode.
1303 Relative to reg-reg move (2). */
1304 {3, 4, 3}, /* cost of storing integer registers */
1305 4, /* cost of reg,reg fld/fst */
1306 {4, 4, 12}, /* cost of loading fp registers
1307 in SFmode, DFmode and XFmode */
1308 {6, 6, 8}, /* cost of storing fp registers
1309 in SFmode, DFmode and XFmode */
1310 2, /* cost of moving MMX register */
1311 {3, 3}, /* cost of loading MMX registers
1312 in SImode and DImode */
1313 {4, 4}, /* cost of storing MMX registers
1314 in SImode and DImode */
1315 2, /* cost of moving SSE register */
1316 {4, 4, 3}, /* cost of loading SSE registers
1317 in SImode, DImode and TImode */
1318 {4, 4, 5}, /* cost of storing SSE registers
1319 in SImode, DImode and TImode */
1320 3, /* MMX or SSE register to integer */
1321 /* On K8:
1322 MOVD reg64, xmmreg Double FSTORE 4
1323 MOVD reg32, xmmreg Double FSTORE 4
1324 On AMDFAM10:
1325 MOVD reg64, xmmreg Double FADD 3
1326 1/1 1/1
1327 MOVD reg32, xmmreg Double FADD 3
1328 1/1 1/1 */
1329 32, /* size of l1 cache. */
1330 512, /* size of l2 cache. */
1331 64, /* size of prefetch block */
1332 100, /* number of parallel prefetches */
1333 2, /* Branch cost */
1334 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1335 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1336 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1337 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1338 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1339 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1341 btver1_memcpy,
1342 btver1_memset,
1343 4, /* scalar_stmt_cost. */
1344 2, /* scalar load_cost. */
1345 2, /* scalar_store_cost. */
1346 6, /* vec_stmt_cost. */
1347 0, /* vec_to_scalar_cost. */
1348 2, /* scalar_to_vec_cost. */
1349 2, /* vec_align_load_cost. */
1350 2, /* vec_unalign_load_cost. */
1351 2, /* vec_store_cost. */
1352 2, /* cond_taken_branch_cost. */
1353 1, /* cond_not_taken_branch_cost. */
1356 static stringop_algs btver2_memcpy[2] = {
1357 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1358 {-1, rep_prefix_4_byte, false}}},
1359 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1360 {-1, libcall, false}}}};
1361 static stringop_algs btver2_memset[2] = {
1362 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1363 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1364 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1365 {-1, libcall, false}}}};
1366 const struct processor_costs btver2_cost = {
1367 COSTS_N_INSNS (1), /* cost of an add instruction */
1368 COSTS_N_INSNS (2), /* cost of a lea instruction */
1369 COSTS_N_INSNS (1), /* variable shift costs */
1370 COSTS_N_INSNS (1), /* constant shift costs */
1371 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1372 COSTS_N_INSNS (4), /* HI */
1373 COSTS_N_INSNS (3), /* SI */
1374 COSTS_N_INSNS (4), /* DI */
1375 COSTS_N_INSNS (5)}, /* other */
1376 0, /* cost of multiply per each bit set */
1377 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1378 COSTS_N_INSNS (35), /* HI */
1379 COSTS_N_INSNS (51), /* SI */
1380 COSTS_N_INSNS (83), /* DI */
1381 COSTS_N_INSNS (83)}, /* other */
1382 COSTS_N_INSNS (1), /* cost of movsx */
1383 COSTS_N_INSNS (1), /* cost of movzx */
1384 8, /* "large" insn */
1385 9, /* MOVE_RATIO */
1386 4, /* cost for loading QImode using movzbl */
1387 {3, 4, 3}, /* cost of loading integer registers
1388 in QImode, HImode and SImode.
1389 Relative to reg-reg move (2). */
1390 {3, 4, 3}, /* cost of storing integer registers */
1391 4, /* cost of reg,reg fld/fst */
1392 {4, 4, 12}, /* cost of loading fp registers
1393 in SFmode, DFmode and XFmode */
1394 {6, 6, 8}, /* cost of storing fp registers
1395 in SFmode, DFmode and XFmode */
1396 2, /* cost of moving MMX register */
1397 {3, 3}, /* cost of loading MMX registers
1398 in SImode and DImode */
1399 {4, 4}, /* cost of storing MMX registers
1400 in SImode and DImode */
1401 2, /* cost of moving SSE register */
1402 {4, 4, 3}, /* cost of loading SSE registers
1403 in SImode, DImode and TImode */
1404 {4, 4, 5}, /* cost of storing SSE registers
1405 in SImode, DImode and TImode */
1406 3, /* MMX or SSE register to integer */
1407 /* On K8:
1408 MOVD reg64, xmmreg Double FSTORE 4
1409 MOVD reg32, xmmreg Double FSTORE 4
1410 On AMDFAM10:
1411 MOVD reg64, xmmreg Double FADD 3
1412 1/1 1/1
1413 MOVD reg32, xmmreg Double FADD 3
1414 1/1 1/1 */
1415 32, /* size of l1 cache. */
1416 2048, /* size of l2 cache. */
1417 64, /* size of prefetch block */
1418 100, /* number of parallel prefetches */
1419 2, /* Branch cost */
1420 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1421 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1422 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1423 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1424 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1425 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1426 btver2_memcpy,
1427 btver2_memset,
1428 4, /* scalar_stmt_cost. */
1429 2, /* scalar load_cost. */
1430 2, /* scalar_store_cost. */
1431 6, /* vec_stmt_cost. */
1432 0, /* vec_to_scalar_cost. */
1433 2, /* scalar_to_vec_cost. */
1434 2, /* vec_align_load_cost. */
1435 2, /* vec_unalign_load_cost. */
1436 2, /* vec_store_cost. */
1437 2, /* cond_taken_branch_cost. */
1438 1, /* cond_not_taken_branch_cost. */
1441 static stringop_algs pentium4_memcpy[2] = {
1442 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1443 DUMMY_STRINGOP_ALGS};
1444 static stringop_algs pentium4_memset[2] = {
1445 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1446 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1447 DUMMY_STRINGOP_ALGS};
1449 static const
1450 struct processor_costs pentium4_cost = {
1451 COSTS_N_INSNS (1), /* cost of an add instruction */
1452 COSTS_N_INSNS (3), /* cost of a lea instruction */
1453 COSTS_N_INSNS (4), /* variable shift costs */
1454 COSTS_N_INSNS (4), /* constant shift costs */
1455 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1456 COSTS_N_INSNS (15), /* HI */
1457 COSTS_N_INSNS (15), /* SI */
1458 COSTS_N_INSNS (15), /* DI */
1459 COSTS_N_INSNS (15)}, /* other */
1460 0, /* cost of multiply per each bit set */
1461 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1462 COSTS_N_INSNS (56), /* HI */
1463 COSTS_N_INSNS (56), /* SI */
1464 COSTS_N_INSNS (56), /* DI */
1465 COSTS_N_INSNS (56)}, /* other */
1466 COSTS_N_INSNS (1), /* cost of movsx */
1467 COSTS_N_INSNS (1), /* cost of movzx */
1468 16, /* "large" insn */
1469 6, /* MOVE_RATIO */
1470 2, /* cost for loading QImode using movzbl */
1471 {4, 5, 4}, /* cost of loading integer registers
1472 in QImode, HImode and SImode.
1473 Relative to reg-reg move (2). */
1474 {2, 3, 2}, /* cost of storing integer registers */
1475 2, /* cost of reg,reg fld/fst */
1476 {2, 2, 6}, /* cost of loading fp registers
1477 in SFmode, DFmode and XFmode */
1478 {4, 4, 6}, /* cost of storing fp registers
1479 in SFmode, DFmode and XFmode */
1480 2, /* cost of moving MMX register */
1481 {2, 2}, /* cost of loading MMX registers
1482 in SImode and DImode */
1483 {2, 2}, /* cost of storing MMX registers
1484 in SImode and DImode */
1485 12, /* cost of moving SSE register */
1486 {12, 12, 12}, /* cost of loading SSE registers
1487 in SImode, DImode and TImode */
1488 {2, 2, 8}, /* cost of storing SSE registers
1489 in SImode, DImode and TImode */
1490 10, /* MMX or SSE register to integer */
1491 8, /* size of l1 cache. */
1492 256, /* size of l2 cache. */
1493 64, /* size of prefetch block */
1494 6, /* number of parallel prefetches */
1495 2, /* Branch cost */
1496 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1497 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1498 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1499 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1500 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1501 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1502 pentium4_memcpy,
1503 pentium4_memset,
1504 1, /* scalar_stmt_cost. */
1505 1, /* scalar load_cost. */
1506 1, /* scalar_store_cost. */
1507 1, /* vec_stmt_cost. */
1508 1, /* vec_to_scalar_cost. */
1509 1, /* scalar_to_vec_cost. */
1510 1, /* vec_align_load_cost. */
1511 2, /* vec_unalign_load_cost. */
1512 1, /* vec_store_cost. */
1513 3, /* cond_taken_branch_cost. */
1514 1, /* cond_not_taken_branch_cost. */
1517 static stringop_algs nocona_memcpy[2] = {
1518 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1519 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1520 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1522 static stringop_algs nocona_memset[2] = {
1523 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1524 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1525 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1526 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1528 static const
1529 struct processor_costs nocona_cost = {
1530 COSTS_N_INSNS (1), /* cost of an add instruction */
1531 COSTS_N_INSNS (1), /* cost of a lea instruction */
1532 COSTS_N_INSNS (1), /* variable shift costs */
1533 COSTS_N_INSNS (1), /* constant shift costs */
1534 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1535 COSTS_N_INSNS (10), /* HI */
1536 COSTS_N_INSNS (10), /* SI */
1537 COSTS_N_INSNS (10), /* DI */
1538 COSTS_N_INSNS (10)}, /* other */
1539 0, /* cost of multiply per each bit set */
1540 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1541 COSTS_N_INSNS (66), /* HI */
1542 COSTS_N_INSNS (66), /* SI */
1543 COSTS_N_INSNS (66), /* DI */
1544 COSTS_N_INSNS (66)}, /* other */
1545 COSTS_N_INSNS (1), /* cost of movsx */
1546 COSTS_N_INSNS (1), /* cost of movzx */
1547 16, /* "large" insn */
1548 17, /* MOVE_RATIO */
1549 4, /* cost for loading QImode using movzbl */
1550 {4, 4, 4}, /* cost of loading integer registers
1551 in QImode, HImode and SImode.
1552 Relative to reg-reg move (2). */
1553 {4, 4, 4}, /* cost of storing integer registers */
1554 3, /* cost of reg,reg fld/fst */
1555 {12, 12, 12}, /* cost of loading fp registers
1556 in SFmode, DFmode and XFmode */
1557 {4, 4, 4}, /* cost of storing fp registers
1558 in SFmode, DFmode and XFmode */
1559 6, /* cost of moving MMX register */
1560 {12, 12}, /* cost of loading MMX registers
1561 in SImode and DImode */
1562 {12, 12}, /* cost of storing MMX registers
1563 in SImode and DImode */
1564 6, /* cost of moving SSE register */
1565 {12, 12, 12}, /* cost of loading SSE registers
1566 in SImode, DImode and TImode */
1567 {12, 12, 12}, /* cost of storing SSE registers
1568 in SImode, DImode and TImode */
1569 8, /* MMX or SSE register to integer */
1570 8, /* size of l1 cache. */
1571 1024, /* size of l2 cache. */
1572 64, /* size of prefetch block */
1573 8, /* number of parallel prefetches */
1574 1, /* Branch cost */
1575 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1576 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1577 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1578 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1579 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1580 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1581 nocona_memcpy,
1582 nocona_memset,
1583 1, /* scalar_stmt_cost. */
1584 1, /* scalar load_cost. */
1585 1, /* scalar_store_cost. */
1586 1, /* vec_stmt_cost. */
1587 1, /* vec_to_scalar_cost. */
1588 1, /* scalar_to_vec_cost. */
1589 1, /* vec_align_load_cost. */
1590 2, /* vec_unalign_load_cost. */
1591 1, /* vec_store_cost. */
1592 3, /* cond_taken_branch_cost. */
1593 1, /* cond_not_taken_branch_cost. */
1596 static stringop_algs atom_memcpy[2] = {
1597 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1598 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1599 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1600 static stringop_algs atom_memset[2] = {
1601 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1602 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1604 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1605 static const
1606 struct processor_costs atom_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1609 COSTS_N_INSNS (1), /* variable shift costs */
1610 COSTS_N_INSNS (1), /* constant shift costs */
1611 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (4), /* HI */
1613 COSTS_N_INSNS (3), /* SI */
1614 COSTS_N_INSNS (4), /* DI */
1615 COSTS_N_INSNS (2)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (26), /* HI */
1619 COSTS_N_INSNS (42), /* SI */
1620 COSTS_N_INSNS (74), /* DI */
1621 COSTS_N_INSNS (74)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 8, /* "large" insn */
1625 17, /* MOVE_RATIO */
1626 4, /* cost for loading QImode using movzbl */
1627 {4, 4, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {4, 4, 4}, /* cost of storing integer registers */
1631 4, /* cost of reg,reg fld/fst */
1632 {12, 12, 12}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {6, 6, 8}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {8, 8}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {8, 8}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 2, /* cost of moving SSE register */
1642 {8, 8, 8}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {8, 8, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 5, /* MMX or SSE register to integer */
1647 32, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 3, /* Branch cost */
1652 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1658 atom_memcpy,
1659 atom_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1673 static stringop_algs slm_memcpy[2] = {
1674 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1676 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1677 static stringop_algs slm_memset[2] = {
1678 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1679 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1680 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1681 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1682 static const
1683 struct processor_costs slm_cost = {
1684 COSTS_N_INSNS (1), /* cost of an add instruction */
1685 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1686 COSTS_N_INSNS (1), /* variable shift costs */
1687 COSTS_N_INSNS (1), /* constant shift costs */
1688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1689 COSTS_N_INSNS (3), /* HI */
1690 COSTS_N_INSNS (3), /* SI */
1691 COSTS_N_INSNS (4), /* DI */
1692 COSTS_N_INSNS (2)}, /* other */
1693 0, /* cost of multiply per each bit set */
1694 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1695 COSTS_N_INSNS (26), /* HI */
1696 COSTS_N_INSNS (42), /* SI */
1697 COSTS_N_INSNS (74), /* DI */
1698 COSTS_N_INSNS (74)}, /* other */
1699 COSTS_N_INSNS (1), /* cost of movsx */
1700 COSTS_N_INSNS (1), /* cost of movzx */
1701 8, /* "large" insn */
1702 17, /* MOVE_RATIO */
1703 4, /* cost for loading QImode using movzbl */
1704 {4, 4, 4}, /* cost of loading integer registers
1705 in QImode, HImode and SImode.
1706 Relative to reg-reg move (2). */
1707 {4, 4, 4}, /* cost of storing integer registers */
1708 4, /* cost of reg,reg fld/fst */
1709 {12, 12, 12}, /* cost of loading fp registers
1710 in SFmode, DFmode and XFmode */
1711 {6, 6, 8}, /* cost of storing fp registers
1712 in SFmode, DFmode and XFmode */
1713 2, /* cost of moving MMX register */
1714 {8, 8}, /* cost of loading MMX registers
1715 in SImode and DImode */
1716 {8, 8}, /* cost of storing MMX registers
1717 in SImode and DImode */
1718 2, /* cost of moving SSE register */
1719 {8, 8, 8}, /* cost of loading SSE registers
1720 in SImode, DImode and TImode */
1721 {8, 8, 8}, /* cost of storing SSE registers
1722 in SImode, DImode and TImode */
1723 5, /* MMX or SSE register to integer */
1724 32, /* size of l1 cache. */
1725 256, /* size of l2 cache. */
1726 64, /* size of prefetch block */
1727 6, /* number of parallel prefetches */
1728 3, /* Branch cost */
1729 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1730 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1731 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1732 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1733 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1734 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 slm_memcpy,
1736 slm_memset,
1737 1, /* scalar_stmt_cost. */
1738 1, /* scalar load_cost. */
1739 1, /* scalar_store_cost. */
1740 1, /* vec_stmt_cost. */
1741 1, /* vec_to_scalar_cost. */
1742 1, /* scalar_to_vec_cost. */
1743 1, /* vec_align_load_cost. */
1744 2, /* vec_unalign_load_cost. */
1745 1, /* vec_store_cost. */
1746 3, /* cond_taken_branch_cost. */
1747 1, /* cond_not_taken_branch_cost. */
1750 static stringop_algs intel_memcpy[2] = {
1751 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1752 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1754 static stringop_algs intel_memset[2] = {
1755 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1756 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1757 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1758 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1759 static const
1760 struct processor_costs intel_cost = {
1761 COSTS_N_INSNS (1), /* cost of an add instruction */
1762 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1763 COSTS_N_INSNS (1), /* variable shift costs */
1764 COSTS_N_INSNS (1), /* constant shift costs */
1765 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1766 COSTS_N_INSNS (3), /* HI */
1767 COSTS_N_INSNS (3), /* SI */
1768 COSTS_N_INSNS (4), /* DI */
1769 COSTS_N_INSNS (2)}, /* other */
1770 0, /* cost of multiply per each bit set */
1771 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1772 COSTS_N_INSNS (26), /* HI */
1773 COSTS_N_INSNS (42), /* SI */
1774 COSTS_N_INSNS (74), /* DI */
1775 COSTS_N_INSNS (74)}, /* other */
1776 COSTS_N_INSNS (1), /* cost of movsx */
1777 COSTS_N_INSNS (1), /* cost of movzx */
1778 8, /* "large" insn */
1779 17, /* MOVE_RATIO */
1780 4, /* cost for loading QImode using movzbl */
1781 {4, 4, 4}, /* cost of loading integer registers
1782 in QImode, HImode and SImode.
1783 Relative to reg-reg move (2). */
1784 {4, 4, 4}, /* cost of storing integer registers */
1785 4, /* cost of reg,reg fld/fst */
1786 {12, 12, 12}, /* cost of loading fp registers
1787 in SFmode, DFmode and XFmode */
1788 {6, 6, 8}, /* cost of storing fp registers
1789 in SFmode, DFmode and XFmode */
1790 2, /* cost of moving MMX register */
1791 {8, 8}, /* cost of loading MMX registers
1792 in SImode and DImode */
1793 {8, 8}, /* cost of storing MMX registers
1794 in SImode and DImode */
1795 2, /* cost of moving SSE register */
1796 {8, 8, 8}, /* cost of loading SSE registers
1797 in SImode, DImode and TImode */
1798 {8, 8, 8}, /* cost of storing SSE registers
1799 in SImode, DImode and TImode */
1800 5, /* MMX or SSE register to integer */
1801 32, /* size of l1 cache. */
1802 256, /* size of l2 cache. */
1803 64, /* size of prefetch block */
1804 6, /* number of parallel prefetches */
1805 3, /* Branch cost */
1806 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1807 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1808 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1809 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1810 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1811 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1812 intel_memcpy,
1813 intel_memset,
1814 1, /* scalar_stmt_cost. */
1815 1, /* scalar load_cost. */
1816 1, /* scalar_store_cost. */
1817 1, /* vec_stmt_cost. */
1818 1, /* vec_to_scalar_cost. */
1819 1, /* scalar_to_vec_cost. */
1820 1, /* vec_align_load_cost. */
1821 2, /* vec_unalign_load_cost. */
1822 1, /* vec_store_cost. */
1823 3, /* cond_taken_branch_cost. */
1824 1, /* cond_not_taken_branch_cost. */
1827 /* Generic should produce code tuned for Core-i7 (and newer chips)
1828 and btver1 (and newer chips). */
1830 static stringop_algs generic_memcpy[2] = {
1831 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1832 {-1, libcall, false}}},
1833 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1834 {-1, libcall, false}}}};
1835 static stringop_algs generic_memset[2] = {
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1837 {-1, libcall, false}}},
1838 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1839 {-1, libcall, false}}}};
1840 static const
1841 struct processor_costs generic_cost = {
1842 COSTS_N_INSNS (1), /* cost of an add instruction */
1843 /* On all chips taken into consideration lea is 2 cycles and more. With
1844 this cost however our current implementation of synth_mult results in
1845 use of unnecessary temporary registers causing regression on several
1846 SPECfp benchmarks. */
1847 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1848 COSTS_N_INSNS (1), /* variable shift costs */
1849 COSTS_N_INSNS (1), /* constant shift costs */
1850 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1851 COSTS_N_INSNS (4), /* HI */
1852 COSTS_N_INSNS (3), /* SI */
1853 COSTS_N_INSNS (4), /* DI */
1854 COSTS_N_INSNS (2)}, /* other */
1855 0, /* cost of multiply per each bit set */
1856 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1857 COSTS_N_INSNS (26), /* HI */
1858 COSTS_N_INSNS (42), /* SI */
1859 COSTS_N_INSNS (74), /* DI */
1860 COSTS_N_INSNS (74)}, /* other */
1861 COSTS_N_INSNS (1), /* cost of movsx */
1862 COSTS_N_INSNS (1), /* cost of movzx */
1863 8, /* "large" insn */
1864 17, /* MOVE_RATIO */
1865 4, /* cost for loading QImode using movzbl */
1866 {4, 4, 4}, /* cost of loading integer registers
1867 in QImode, HImode and SImode.
1868 Relative to reg-reg move (2). */
1869 {4, 4, 4}, /* cost of storing integer registers */
1870 4, /* cost of reg,reg fld/fst */
1871 {12, 12, 12}, /* cost of loading fp registers
1872 in SFmode, DFmode and XFmode */
1873 {6, 6, 8}, /* cost of storing fp registers
1874 in SFmode, DFmode and XFmode */
1875 2, /* cost of moving MMX register */
1876 {8, 8}, /* cost of loading MMX registers
1877 in SImode and DImode */
1878 {8, 8}, /* cost of storing MMX registers
1879 in SImode and DImode */
1880 2, /* cost of moving SSE register */
1881 {8, 8, 8}, /* cost of loading SSE registers
1882 in SImode, DImode and TImode */
1883 {8, 8, 8}, /* cost of storing SSE registers
1884 in SImode, DImode and TImode */
1885 5, /* MMX or SSE register to integer */
1886 32, /* size of l1 cache. */
1887 512, /* size of l2 cache. */
1888 64, /* size of prefetch block */
1889 6, /* number of parallel prefetches */
1890 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1891 value is increased to perhaps more appropriate value of 5. */
1892 3, /* Branch cost */
1893 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1894 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1895 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1896 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1897 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1898 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1899 generic_memcpy,
1900 generic_memset,
1901 1, /* scalar_stmt_cost. */
1902 1, /* scalar load_cost. */
1903 1, /* scalar_store_cost. */
1904 1, /* vec_stmt_cost. */
1905 1, /* vec_to_scalar_cost. */
1906 1, /* scalar_to_vec_cost. */
1907 1, /* vec_align_load_cost. */
1908 2, /* vec_unalign_load_cost. */
1909 1, /* vec_store_cost. */
1910 3, /* cond_taken_branch_cost. */
1911 1, /* cond_not_taken_branch_cost. */
1914 /* core_cost should produce code tuned for Core familly of CPUs. */
1915 static stringop_algs core_memcpy[2] = {
1916 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1917 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1918 {-1, libcall, false}}}};
1919 static stringop_algs core_memset[2] = {
1920 {libcall, {{6, loop_1_byte, true},
1921 {24, loop, true},
1922 {8192, rep_prefix_4_byte, true},
1923 {-1, libcall, false}}},
1924 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1925 {-1, libcall, false}}}};
1927 static const
1928 struct processor_costs core_cost = {
1929 COSTS_N_INSNS (1), /* cost of an add instruction */
1930 /* On all chips taken into consideration lea is 2 cycles and more. With
1931 this cost however our current implementation of synth_mult results in
1932 use of unnecessary temporary registers causing regression on several
1933 SPECfp benchmarks. */
1934 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1935 COSTS_N_INSNS (1), /* variable shift costs */
1936 COSTS_N_INSNS (1), /* constant shift costs */
1937 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1938 COSTS_N_INSNS (4), /* HI */
1939 COSTS_N_INSNS (3), /* SI */
1940 COSTS_N_INSNS (4), /* DI */
1941 COSTS_N_INSNS (2)}, /* other */
1942 0, /* cost of multiply per each bit set */
1943 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1944 COSTS_N_INSNS (26), /* HI */
1945 COSTS_N_INSNS (42), /* SI */
1946 COSTS_N_INSNS (74), /* DI */
1947 COSTS_N_INSNS (74)}, /* other */
1948 COSTS_N_INSNS (1), /* cost of movsx */
1949 COSTS_N_INSNS (1), /* cost of movzx */
1950 8, /* "large" insn */
1951 17, /* MOVE_RATIO */
1952 4, /* cost for loading QImode using movzbl */
1953 {4, 4, 4}, /* cost of loading integer registers
1954 in QImode, HImode and SImode.
1955 Relative to reg-reg move (2). */
1956 {4, 4, 4}, /* cost of storing integer registers */
1957 4, /* cost of reg,reg fld/fst */
1958 {12, 12, 12}, /* cost of loading fp registers
1959 in SFmode, DFmode and XFmode */
1960 {6, 6, 8}, /* cost of storing fp registers
1961 in SFmode, DFmode and XFmode */
1962 2, /* cost of moving MMX register */
1963 {8, 8}, /* cost of loading MMX registers
1964 in SImode and DImode */
1965 {8, 8}, /* cost of storing MMX registers
1966 in SImode and DImode */
1967 2, /* cost of moving SSE register */
1968 {8, 8, 8}, /* cost of loading SSE registers
1969 in SImode, DImode and TImode */
1970 {8, 8, 8}, /* cost of storing SSE registers
1971 in SImode, DImode and TImode */
1972 5, /* MMX or SSE register to integer */
1973 64, /* size of l1 cache. */
1974 512, /* size of l2 cache. */
1975 64, /* size of prefetch block */
1976 6, /* number of parallel prefetches */
1977 /* FIXME perhaps more appropriate value is 5. */
1978 3, /* Branch cost */
1979 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1980 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1981 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1982 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1983 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1984 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1985 core_memcpy,
1986 core_memset,
1987 1, /* scalar_stmt_cost. */
1988 1, /* scalar load_cost. */
1989 1, /* scalar_store_cost. */
1990 1, /* vec_stmt_cost. */
1991 1, /* vec_to_scalar_cost. */
1992 1, /* scalar_to_vec_cost. */
1993 1, /* vec_align_load_cost. */
1994 2, /* vec_unalign_load_cost. */
1995 1, /* vec_store_cost. */
1996 3, /* cond_taken_branch_cost. */
1997 1, /* cond_not_taken_branch_cost. */
2001 /* Set by -mtune. */
2002 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2004 /* Set by -mtune or -Os. */
2005 const struct processor_costs *ix86_cost = &pentium_cost;
2007 /* Processor feature/optimization bitmasks. */
2008 #define m_386 (1<<PROCESSOR_I386)
2009 #define m_486 (1<<PROCESSOR_I486)
2010 #define m_PENT (1<<PROCESSOR_PENTIUM)
2011 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2012 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2013 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2014 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2015 #define m_CORE2 (1<<PROCESSOR_CORE2)
2016 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2017 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2018 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2019 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2020 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2021 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2022 #define m_INTEL (1<<PROCESSOR_INTEL)
2024 #define m_GEODE (1<<PROCESSOR_GEODE)
2025 #define m_K6 (1<<PROCESSOR_K6)
2026 #define m_K6_GEODE (m_K6 | m_GEODE)
2027 #define m_K8 (1<<PROCESSOR_K8)
2028 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2029 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2030 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2031 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2032 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2033 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2034 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2035 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2036 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2037 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2038 #define m_BTVER (m_BTVER1 | m_BTVER2)
2039 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2041 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2043 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2044 #undef DEF_TUNE
2045 #define DEF_TUNE(tune, name, selector) name,
2046 #include "x86-tune.def"
2047 #undef DEF_TUNE
2050 /* Feature tests against the various tunings. */
2051 unsigned char ix86_tune_features[X86_TUNE_LAST];
2053 /* Feature tests against the various tunings used to create ix86_tune_features
2054 based on the processor mask. */
2055 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2056 #undef DEF_TUNE
2057 #define DEF_TUNE(tune, name, selector) selector,
2058 #include "x86-tune.def"
2059 #undef DEF_TUNE
2062 /* Feature tests against the various architecture variations. */
2063 unsigned char ix86_arch_features[X86_ARCH_LAST];
2065 /* Feature tests against the various architecture variations, used to create
2066 ix86_arch_features based on the processor mask. */
2067 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2068 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2069 ~(m_386 | m_486 | m_PENT | m_K6),
2071 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2072 ~m_386,
2074 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2075 ~(m_386 | m_486),
2077 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2078 ~m_386,
2080 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2081 ~m_386,
2084 /* In case the average insn count for single function invocation is
2085 lower than this constant, emit fast (but longer) prologue and
2086 epilogue code. */
2087 #define FAST_PROLOGUE_INSN_COUNT 20
2089 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2090 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2091 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2092 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2094 /* Array of the smallest class containing reg number REGNO, indexed by
2095 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2097 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2099 /* ax, dx, cx, bx */
2100 AREG, DREG, CREG, BREG,
2101 /* si, di, bp, sp */
2102 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2103 /* FP registers */
2104 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2105 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2106 /* arg pointer */
2107 NON_Q_REGS,
2108 /* flags, fpsr, fpcr, frame */
2109 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2110 /* SSE registers */
2111 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2112 SSE_REGS, SSE_REGS,
2113 /* MMX registers */
2114 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2115 MMX_REGS, MMX_REGS,
2116 /* REX registers */
2117 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2118 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2119 /* SSE REX registers */
2120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2121 SSE_REGS, SSE_REGS,
2122 /* AVX-512 SSE registers */
2123 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2124 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2125 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 /* Mask registers. */
2128 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2129 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 /* The "default" register map used in 32bit mode. */
2134 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2143 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2145 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2148 /* The "default" register map used in 64bit mode. */
2150 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2152 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2153 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2154 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2155 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2156 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2157 8,9,10,11,12,13,14,15, /* extended integer registers */
2158 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2159 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2160 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2161 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2164 /* Define the register numbers to be used in Dwarf debugging information.
2165 The SVR4 reference port C compiler uses the following register numbers
2166 in its Dwarf output code:
2167 0 for %eax (gcc regno = 0)
2168 1 for %ecx (gcc regno = 2)
2169 2 for %edx (gcc regno = 1)
2170 3 for %ebx (gcc regno = 3)
2171 4 for %esp (gcc regno = 7)
2172 5 for %ebp (gcc regno = 6)
2173 6 for %esi (gcc regno = 4)
2174 7 for %edi (gcc regno = 5)
2175 The following three DWARF register numbers are never generated by
2176 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2177 believes these numbers have these meanings.
2178 8 for %eip (no gcc equivalent)
2179 9 for %eflags (gcc regno = 17)
2180 10 for %trapno (no gcc equivalent)
2181 It is not at all clear how we should number the FP stack registers
2182 for the x86 architecture. If the version of SDB on x86/svr4 were
2183 a bit less brain dead with respect to floating-point then we would
2184 have a precedent to follow with respect to DWARF register numbers
2185 for x86 FP registers, but the SDB on x86/svr4 is so completely
2186 broken with respect to FP registers that it is hardly worth thinking
2187 of it as something to strive for compatibility with.
2188 The version of x86/svr4 SDB I have at the moment does (partially)
2189 seem to believe that DWARF register number 11 is associated with
2190 the x86 register %st(0), but that's about all. Higher DWARF
2191 register numbers don't seem to be associated with anything in
2192 particular, and even for DWARF regno 11, SDB only seems to under-
2193 stand that it should say that a variable lives in %st(0) (when
2194 asked via an `=' command) if we said it was in DWARF regno 11,
2195 but SDB still prints garbage when asked for the value of the
2196 variable in question (via a `/' command).
2197 (Also note that the labels SDB prints for various FP stack regs
2198 when doing an `x' command are all wrong.)
2199 Note that these problems generally don't affect the native SVR4
2200 C compiler because it doesn't allow the use of -O with -g and
2201 because when it is *not* optimizing, it allocates a memory
2202 location for each floating-point variable, and the memory
2203 location is what gets described in the DWARF AT_location
2204 attribute for the variable in question.
2205 Regardless of the severe mental illness of the x86/svr4 SDB, we
2206 do something sensible here and we use the following DWARF
2207 register numbers. Note that these are all stack-top-relative
2208 numbers.
2209 11 for %st(0) (gcc regno = 8)
2210 12 for %st(1) (gcc regno = 9)
2211 13 for %st(2) (gcc regno = 10)
2212 14 for %st(3) (gcc regno = 11)
2213 15 for %st(4) (gcc regno = 12)
2214 16 for %st(5) (gcc regno = 13)
2215 17 for %st(6) (gcc regno = 14)
2216 18 for %st(7) (gcc regno = 15)
2218 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2220 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2221 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2222 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2223 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2224 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2225 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2226 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2227 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2229 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2232 /* Define parameter passing and return registers. */
2234 static int const x86_64_int_parameter_registers[6] =
2236 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2239 static int const x86_64_ms_abi_int_parameter_registers[4] =
2241 CX_REG, DX_REG, R8_REG, R9_REG
2244 static int const x86_64_int_return_registers[4] =
2246 AX_REG, DX_REG, DI_REG, SI_REG
2249 /* Additional registers that are clobbered by SYSV calls. */
2251 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2253 SI_REG, DI_REG,
2254 XMM6_REG, XMM7_REG,
2255 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2256 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2259 /* Define the structure for the machine field in struct function. */
2261 struct GTY(()) stack_local_entry {
2262 unsigned short mode;
2263 unsigned short n;
2264 rtx rtl;
2265 struct stack_local_entry *next;
2268 /* Structure describing stack frame layout.
2269 Stack grows downward:
2271 [arguments]
2272 <- ARG_POINTER
2273 saved pc
2275 saved static chain if ix86_static_chain_on_stack
2277 saved frame pointer if frame_pointer_needed
2278 <- HARD_FRAME_POINTER
2279 [saved regs]
2280 <- regs_save_offset
2281 [padding0]
2283 [saved SSE regs]
2284 <- sse_regs_save_offset
2285 [padding1] |
2286 | <- FRAME_POINTER
2287 [va_arg registers] |
2289 [frame] |
2291 [padding2] | = to_allocate
2292 <- STACK_POINTER
2294 struct ix86_frame
2296 int nsseregs;
2297 int nregs;
2298 int va_arg_size;
2299 int red_zone_size;
2300 int outgoing_arguments_size;
2302 /* The offsets relative to ARG_POINTER. */
2303 HOST_WIDE_INT frame_pointer_offset;
2304 HOST_WIDE_INT hard_frame_pointer_offset;
2305 HOST_WIDE_INT stack_pointer_offset;
2306 HOST_WIDE_INT hfp_save_offset;
2307 HOST_WIDE_INT reg_save_offset;
2308 HOST_WIDE_INT sse_reg_save_offset;
2310 /* When save_regs_using_mov is set, emit prologue using
2311 move instead of push instructions. */
2312 bool save_regs_using_mov;
2315 /* Which cpu are we scheduling for. */
2316 enum attr_cpu ix86_schedule;
2318 /* Which cpu are we optimizing for. */
2319 enum processor_type ix86_tune;
2321 /* Which instruction set architecture to use. */
2322 enum processor_type ix86_arch;
2324 /* True if processor has SSE prefetch instruction. */
2325 unsigned char x86_prefetch_sse;
2327 /* -mstackrealign option */
2328 static const char ix86_force_align_arg_pointer_string[]
2329 = "force_align_arg_pointer";
2331 static rtx (*ix86_gen_leave) (void);
2332 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2333 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2334 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2335 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2336 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2339 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2344 /* Preferred alignment for stack boundary in bits. */
2345 unsigned int ix86_preferred_stack_boundary;
2347 /* Alignment for incoming stack boundary in bits specified at
2348 command line. */
2349 static unsigned int ix86_user_incoming_stack_boundary;
2351 /* Default alignment for incoming stack boundary in bits. */
2352 static unsigned int ix86_default_incoming_stack_boundary;
2354 /* Alignment for incoming stack boundary in bits. */
2355 unsigned int ix86_incoming_stack_boundary;
2357 /* Calling abi specific va_list type nodes. */
2358 static GTY(()) tree sysv_va_list_type_node;
2359 static GTY(()) tree ms_va_list_type_node;
2361 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2362 char internal_label_prefix[16];
2363 int internal_label_prefix_len;
2365 /* Fence to use after loop using movnt. */
2366 tree x86_mfence;
2368 /* Register class used for passing given 64bit part of the argument.
2369 These represent classes as documented by the PS ABI, with the exception
2370 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2371 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2373 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2374 whenever possible (upper half does contain padding). */
2375 enum x86_64_reg_class
2377 X86_64_NO_CLASS,
2378 X86_64_INTEGER_CLASS,
2379 X86_64_INTEGERSI_CLASS,
2380 X86_64_SSE_CLASS,
2381 X86_64_SSESF_CLASS,
2382 X86_64_SSEDF_CLASS,
2383 X86_64_SSEUP_CLASS,
2384 X86_64_X87_CLASS,
2385 X86_64_X87UP_CLASS,
2386 X86_64_COMPLEX_X87_CLASS,
2387 X86_64_MEMORY_CLASS
2390 #define MAX_CLASSES 8
2392 /* Table of constants used by fldpi, fldln2, etc.... */
2393 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2394 static bool ext_80387_constants_init = 0;
2397 static struct machine_function * ix86_init_machine_status (void);
2398 static rtx ix86_function_value (const_tree, const_tree, bool);
2399 static bool ix86_function_value_regno_p (const unsigned int);
2400 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2401 const_tree);
2402 static rtx ix86_static_chain (const_tree, bool);
2403 static int ix86_function_regparm (const_tree, const_tree);
2404 static void ix86_compute_frame_layout (struct ix86_frame *);
2405 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2406 rtx, rtx, int);
2407 static void ix86_add_new_builtins (HOST_WIDE_INT);
2408 static tree ix86_canonical_va_list_type (tree);
2409 static void predict_jump (int);
2410 static unsigned int split_stack_prologue_scratch_regno (void);
2411 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2413 enum ix86_function_specific_strings
2415 IX86_FUNCTION_SPECIFIC_ARCH,
2416 IX86_FUNCTION_SPECIFIC_TUNE,
2417 IX86_FUNCTION_SPECIFIC_MAX
2420 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2421 const char *, enum fpmath_unit, bool);
2422 static void ix86_function_specific_save (struct cl_target_option *,
2423 struct gcc_options *opts);
2424 static void ix86_function_specific_restore (struct gcc_options *opts,
2425 struct cl_target_option *);
2426 static void ix86_function_specific_print (FILE *, int,
2427 struct cl_target_option *);
2428 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2429 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2430 struct gcc_options *,
2431 struct gcc_options *,
2432 struct gcc_options *);
2433 static bool ix86_can_inline_p (tree, tree);
2434 static void ix86_set_current_function (tree);
2435 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2437 static enum calling_abi ix86_function_abi (const_tree);
2440 #ifndef SUBTARGET32_DEFAULT_CPU
2441 #define SUBTARGET32_DEFAULT_CPU "i386"
2442 #endif
2444 /* Whether -mtune= or -march= were specified */
2445 static int ix86_tune_defaulted;
2446 static int ix86_arch_specified;
2448 /* Vectorization library interface and handlers. */
2449 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2451 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2452 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2454 /* Processor target table, indexed by processor number */
2455 struct ptt
2457 const char *const name; /* processor name */
2458 const struct processor_costs *cost; /* Processor costs */
2459 const int align_loop; /* Default alignments. */
2460 const int align_loop_max_skip;
2461 const int align_jump;
2462 const int align_jump_max_skip;
2463 const int align_func;
2466 /* This table must be in sync with enum processor_type in i386.h. */
2467 static const struct ptt processor_target_table[PROCESSOR_max] =
2469 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2470 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2471 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2472 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2473 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2474 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2475 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2476 {"core2", &core_cost, 16, 10, 16, 10, 16},
2477 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2478 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2479 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2480 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2481 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2482 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2483 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2484 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2485 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2486 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2487 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2488 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2489 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2490 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2491 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2492 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2493 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2496 static bool
2497 gate_insert_vzeroupper (void)
2499 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2502 static unsigned int
2503 rest_of_handle_insert_vzeroupper (void)
2505 int i;
2507 /* vzeroupper instructions are inserted immediately after reload to
2508 account for possible spills from 256bit registers. The pass
2509 reuses mode switching infrastructure by re-running mode insertion
2510 pass, so disable entities that have already been processed. */
2511 for (i = 0; i < MAX_386_ENTITIES; i++)
2512 ix86_optimize_mode_switching[i] = 0;
2514 ix86_optimize_mode_switching[AVX_U128] = 1;
2516 /* Call optimize_mode_switching. */
2517 g->get_passes ()->execute_pass_mode_switching ();
2518 return 0;
2521 namespace {
2523 const pass_data pass_data_insert_vzeroupper =
2525 RTL_PASS, /* type */
2526 "vzeroupper", /* name */
2527 OPTGROUP_NONE, /* optinfo_flags */
2528 true, /* has_gate */
2529 true, /* has_execute */
2530 TV_NONE, /* tv_id */
2531 0, /* properties_required */
2532 0, /* properties_provided */
2533 0, /* properties_destroyed */
2534 0, /* todo_flags_start */
2535 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2538 class pass_insert_vzeroupper : public rtl_opt_pass
2540 public:
2541 pass_insert_vzeroupper(gcc::context *ctxt)
2542 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2545 /* opt_pass methods: */
2546 bool gate () { return gate_insert_vzeroupper (); }
2547 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2549 }; // class pass_insert_vzeroupper
2551 } // anon namespace
2553 rtl_opt_pass *
2554 make_pass_insert_vzeroupper (gcc::context *ctxt)
2556 return new pass_insert_vzeroupper (ctxt);
2559 /* Return true if a red-zone is in use. */
2561 static inline bool
2562 ix86_using_red_zone (void)
2564 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 /* Return a string that documents the current -m options. The caller is
2568 responsible for freeing the string. */
2570 static char *
2571 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2572 const char *tune, enum fpmath_unit fpmath,
2573 bool add_nl_p)
2575 struct ix86_target_opts
2577 const char *option; /* option string */
2578 HOST_WIDE_INT mask; /* isa mask options */
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2585 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2586 { "-mfma", OPTION_MASK_ISA_FMA },
2587 { "-mxop", OPTION_MASK_ISA_XOP },
2588 { "-mlwp", OPTION_MASK_ISA_LWP },
2589 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2590 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2591 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2592 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2593 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2594 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2595 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2596 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2597 { "-msse3", OPTION_MASK_ISA_SSE3 },
2598 { "-msse2", OPTION_MASK_ISA_SSE2 },
2599 { "-msse", OPTION_MASK_ISA_SSE },
2600 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2601 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2602 { "-mmmx", OPTION_MASK_ISA_MMX },
2603 { "-mabm", OPTION_MASK_ISA_ABM },
2604 { "-mbmi", OPTION_MASK_ISA_BMI },
2605 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2606 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2607 { "-mhle", OPTION_MASK_ISA_HLE },
2608 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2609 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2610 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2611 { "-madx", OPTION_MASK_ISA_ADX },
2612 { "-mtbm", OPTION_MASK_ISA_TBM },
2613 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2614 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2615 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2616 { "-maes", OPTION_MASK_ISA_AES },
2617 { "-msha", OPTION_MASK_ISA_SHA },
2618 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2619 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2620 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2621 { "-mf16c", OPTION_MASK_ISA_F16C },
2622 { "-mrtm", OPTION_MASK_ISA_RTM },
2623 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2624 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2625 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2628 /* Flag options. */
2629 static struct ix86_target_opts flag_opts[] =
2631 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2632 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2633 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2634 { "-m80387", MASK_80387 },
2635 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2636 { "-malign-double", MASK_ALIGN_DOUBLE },
2637 { "-mcld", MASK_CLD },
2638 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2639 { "-mieee-fp", MASK_IEEE_FP },
2640 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2641 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2642 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2643 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2644 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2645 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2646 { "-mno-red-zone", MASK_NO_RED_ZONE },
2647 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2648 { "-mrecip", MASK_RECIP },
2649 { "-mrtd", MASK_RTD },
2650 { "-msseregparm", MASK_SSEREGPARM },
2651 { "-mstack-arg-probe", MASK_STACK_PROBE },
2652 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2653 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2654 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2655 { "-mvzeroupper", MASK_VZEROUPPER },
2656 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2657 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2658 { "-mprefer-avx128", MASK_PREFER_AVX128},
2661 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2663 char isa_other[40];
2664 char target_other[40];
2665 unsigned num = 0;
2666 unsigned i, j;
2667 char *ret;
2668 char *ptr;
2669 size_t len;
2670 size_t line_len;
2671 size_t sep_len;
2672 const char *abi;
2674 memset (opts, '\0', sizeof (opts));
2676 /* Add -march= option. */
2677 if (arch)
2679 opts[num][0] = "-march=";
2680 opts[num++][1] = arch;
2683 /* Add -mtune= option. */
2684 if (tune)
2686 opts[num][0] = "-mtune=";
2687 opts[num++][1] = tune;
2690 /* Add -m32/-m64/-mx32. */
2691 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2693 if ((isa & OPTION_MASK_ABI_64) != 0)
2694 abi = "-m64";
2695 else
2696 abi = "-mx32";
2697 isa &= ~ (OPTION_MASK_ISA_64BIT
2698 | OPTION_MASK_ABI_64
2699 | OPTION_MASK_ABI_X32);
2701 else
2702 abi = "-m32";
2703 opts[num++][0] = abi;
2705 /* Pick out the options in isa options. */
2706 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2708 if ((isa & isa_opts[i].mask) != 0)
2710 opts[num++][0] = isa_opts[i].option;
2711 isa &= ~ isa_opts[i].mask;
2715 if (isa && add_nl_p)
2717 opts[num++][0] = isa_other;
2718 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2719 isa);
2722 /* Add flag options. */
2723 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2725 if ((flags & flag_opts[i].mask) != 0)
2727 opts[num++][0] = flag_opts[i].option;
2728 flags &= ~ flag_opts[i].mask;
2732 if (flags && add_nl_p)
2734 opts[num++][0] = target_other;
2735 sprintf (target_other, "(other flags: %#x)", flags);
2738 /* Add -fpmath= option. */
2739 if (fpmath)
2741 opts[num][0] = "-mfpmath=";
2742 switch ((int) fpmath)
2744 case FPMATH_387:
2745 opts[num++][1] = "387";
2746 break;
2748 case FPMATH_SSE:
2749 opts[num++][1] = "sse";
2750 break;
2752 case FPMATH_387 | FPMATH_SSE:
2753 opts[num++][1] = "sse+387";
2754 break;
2756 default:
2757 gcc_unreachable ();
2761 /* Any options? */
2762 if (num == 0)
2763 return NULL;
2765 gcc_assert (num < ARRAY_SIZE (opts));
2767 /* Size the string. */
2768 len = 0;
2769 sep_len = (add_nl_p) ? 3 : 1;
2770 for (i = 0; i < num; i++)
2772 len += sep_len;
2773 for (j = 0; j < 2; j++)
2774 if (opts[i][j])
2775 len += strlen (opts[i][j]);
2778 /* Build the string. */
2779 ret = ptr = (char *) xmalloc (len);
2780 line_len = 0;
2782 for (i = 0; i < num; i++)
2784 size_t len2[2];
2786 for (j = 0; j < 2; j++)
2787 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2789 if (i != 0)
2791 *ptr++ = ' ';
2792 line_len++;
2794 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2796 *ptr++ = '\\';
2797 *ptr++ = '\n';
2798 line_len = 0;
2802 for (j = 0; j < 2; j++)
2803 if (opts[i][j])
2805 memcpy (ptr, opts[i][j], len2[j]);
2806 ptr += len2[j];
2807 line_len += len2[j];
2811 *ptr = '\0';
2812 gcc_assert (ret + len >= ptr);
2814 return ret;
2817 /* Return true, if profiling code should be emitted before
2818 prologue. Otherwise it returns false.
2819 Note: For x86 with "hotfix" it is sorried. */
2820 static bool
2821 ix86_profile_before_prologue (void)
2823 return flag_fentry != 0;
2826 /* Function that is callable from the debugger to print the current
2827 options. */
2828 void ATTRIBUTE_UNUSED
2829 ix86_debug_options (void)
2831 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2832 ix86_arch_string, ix86_tune_string,
2833 ix86_fpmath, true);
2835 if (opts)
2837 fprintf (stderr, "%s\n\n", opts);
2838 free (opts);
2840 else
2841 fputs ("<no options>\n\n", stderr);
2843 return;
2846 static const char *stringop_alg_names[] = {
2847 #define DEF_ENUM
2848 #define DEF_ALG(alg, name) #name,
2849 #include "stringop.def"
2850 #undef DEF_ENUM
2851 #undef DEF_ALG
2854 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2855 The string is of the following form (or comma separated list of it):
2857 strategy_alg:max_size:[align|noalign]
2859 where the full size range for the strategy is either [0, max_size] or
2860 [min_size, max_size], in which min_size is the max_size + 1 of the
2861 preceding range. The last size range must have max_size == -1.
2863 Examples:
2866 -mmemcpy-strategy=libcall:-1:noalign
2868 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2872 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2874 This is to tell the compiler to use the following strategy for memset
2875 1) when the expected size is between [1, 16], use rep_8byte strategy;
2876 2) when the size is between [17, 2048], use vector_loop;
2877 3) when the size is > 2048, use libcall. */
2879 struct stringop_size_range
2881 int max;
2882 stringop_alg alg;
2883 bool noalign;
2886 static void
2887 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2889 const struct stringop_algs *default_algs;
2890 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2891 char *curr_range_str, *next_range_str;
2892 int i = 0, n = 0;
2894 if (is_memset)
2895 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2896 else
2897 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2899 curr_range_str = strategy_str;
2903 int maxs;
2904 char alg_name[128];
2905 char align[16];
2906 next_range_str = strchr (curr_range_str, ',');
2907 if (next_range_str)
2908 *next_range_str++ = '\0';
2910 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2911 alg_name, &maxs, align))
2913 error ("wrong arg %s to option %s", curr_range_str,
2914 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2915 return;
2918 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2920 error ("size ranges of option %s should be increasing",
2921 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2922 return;
2925 for (i = 0; i < last_alg; i++)
2926 if (!strcmp (alg_name, stringop_alg_names[i]))
2927 break;
2929 if (i == last_alg)
2931 error ("wrong stringop strategy name %s specified for option %s",
2932 alg_name,
2933 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2934 return;
2937 input_ranges[n].max = maxs;
2938 input_ranges[n].alg = (stringop_alg) i;
2939 if (!strcmp (align, "align"))
2940 input_ranges[n].noalign = false;
2941 else if (!strcmp (align, "noalign"))
2942 input_ranges[n].noalign = true;
2943 else
2945 error ("unknown alignment %s specified for option %s",
2946 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2947 return;
2949 n++;
2950 curr_range_str = next_range_str;
2952 while (curr_range_str);
2954 if (input_ranges[n - 1].max != -1)
2956 error ("the max value for the last size range should be -1"
2957 " for option %s",
2958 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2959 return;
2962 if (n > MAX_STRINGOP_ALGS)
2964 error ("too many size ranges specified in option %s",
2965 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2966 return;
2969 /* Now override the default algs array. */
2970 for (i = 0; i < n; i++)
2972 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2973 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2974 = input_ranges[i].alg;
2975 *const_cast<int *>(&default_algs->size[i].noalign)
2976 = input_ranges[i].noalign;
2981 /* parse -mtune-ctrl= option. When DUMP is true,
2982 print the features that are explicitly set. */
2984 static void
2985 parse_mtune_ctrl_str (bool dump)
2987 if (!ix86_tune_ctrl_string)
2988 return;
2990 char *next_feature_string = NULL;
2991 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2992 char *orig = curr_feature_string;
2993 int i;
2996 bool clear = false;
2998 next_feature_string = strchr (curr_feature_string, ',');
2999 if (next_feature_string)
3000 *next_feature_string++ = '\0';
3001 if (*curr_feature_string == '^')
3003 curr_feature_string++;
3004 clear = true;
3006 for (i = 0; i < X86_TUNE_LAST; i++)
3008 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3010 ix86_tune_features[i] = !clear;
3011 if (dump)
3012 fprintf (stderr, "Explicitly %s feature %s\n",
3013 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3014 break;
3017 if (i == X86_TUNE_LAST)
3018 error ("Unknown parameter to option -mtune-ctrl: %s",
3019 clear ? curr_feature_string - 1 : curr_feature_string);
3020 curr_feature_string = next_feature_string;
3022 while (curr_feature_string);
3023 free (orig);
3026 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3027 processor type. */
3029 static void
3030 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3032 unsigned int ix86_tune_mask = 1u << ix86_tune;
3033 int i;
3035 for (i = 0; i < X86_TUNE_LAST; ++i)
3037 if (ix86_tune_no_default)
3038 ix86_tune_features[i] = 0;
3039 else
3040 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3043 if (dump)
3045 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3046 for (i = 0; i < X86_TUNE_LAST; i++)
3047 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3048 ix86_tune_features[i] ? "on" : "off");
3051 parse_mtune_ctrl_str (dump);
3055 /* Override various settings based on options. If MAIN_ARGS_P, the
3056 options are from the command line, otherwise they are from
3057 attributes. */
3059 static void
3060 ix86_option_override_internal (bool main_args_p,
3061 struct gcc_options *opts,
3062 struct gcc_options *opts_set)
3064 int i;
3065 unsigned int ix86_arch_mask;
3066 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3067 const char *prefix;
3068 const char *suffix;
3069 const char *sw;
3071 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3072 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3073 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3074 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3075 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3076 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3077 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3078 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3079 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3080 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3081 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3082 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3083 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3084 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3085 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3086 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3087 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3088 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3089 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3090 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3091 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3092 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3093 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3094 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3095 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3096 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3097 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3098 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3099 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3100 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3101 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3102 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3103 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3104 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3105 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3106 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3107 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3108 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3109 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3110 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3111 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3112 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3113 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3114 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3115 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3116 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3118 #define PTA_CORE2 \
3119 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3120 | PTA_CX16 | PTA_FXSR)
3121 #define PTA_NEHALEM \
3122 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3123 #define PTA_WESTMERE \
3124 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3125 #define PTA_SANDYBRIDGE \
3126 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3127 #define PTA_IVYBRIDGE \
3128 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3129 #define PTA_HASWELL \
3130 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3131 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE)
3132 #define PTA_BROADWELL \
3133 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3134 #define PTA_BONNELL \
3135 (PTA_CORE2 | PTA_MOVBE)
3136 #define PTA_SILVERMONT \
3137 (PTA_WESTMERE | PTA_MOVBE)
3139 /* if this reaches 64, need to widen struct pta flags below */
3141 static struct pta
3143 const char *const name; /* processor name or nickname. */
3144 const enum processor_type processor;
3145 const enum attr_cpu schedule;
3146 const unsigned HOST_WIDE_INT flags;
3148 const processor_alias_table[] =
3150 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3151 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3152 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3153 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3154 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3155 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3156 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3157 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3158 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3159 PTA_MMX | PTA_SSE | PTA_FXSR},
3160 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3161 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3162 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3163 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3164 PTA_MMX | PTA_SSE | PTA_FXSR},
3165 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3166 PTA_MMX | PTA_SSE | PTA_FXSR},
3167 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3168 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3169 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3170 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3171 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3172 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3173 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3174 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3175 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3176 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3177 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3178 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3179 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3180 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3181 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3182 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3183 PTA_SANDYBRIDGE},
3184 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3185 PTA_SANDYBRIDGE},
3186 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3187 PTA_IVYBRIDGE},
3188 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3189 PTA_IVYBRIDGE},
3190 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3191 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3192 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3193 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3194 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3195 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3196 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3197 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3198 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3199 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3200 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3201 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3202 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3203 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3204 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3205 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3206 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3207 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3209 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3210 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3211 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3212 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3213 {"x86-64", PROCESSOR_K8, CPU_K8,
3214 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3215 {"k8", PROCESSOR_K8, CPU_K8,
3216 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3217 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3218 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3219 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3220 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3221 {"opteron", PROCESSOR_K8, CPU_K8,
3222 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3223 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3224 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3225 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3226 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3227 {"athlon64", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3229 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3230 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3231 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3232 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3233 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3234 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3235 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3236 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3237 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3238 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3239 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3240 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3241 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3242 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3243 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3244 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3245 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3246 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3247 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3248 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3249 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3250 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3251 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3252 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3253 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3254 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3255 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3256 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3257 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3258 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3259 | PTA_XSAVEOPT | PTA_FSGSBASE},
3260 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3261 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3262 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3263 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3264 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3265 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3266 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3267 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3270 | PTA_FXSR | PTA_XSAVE},
3271 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3272 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3273 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3274 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3275 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3276 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3278 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3279 PTA_64BIT
3280 | PTA_HLE /* flags are only used for -march switch. */ },
3283 /* -mrecip options. */
3284 static struct
3286 const char *string; /* option name */
3287 unsigned int mask; /* mask bits to set */
3289 const recip_options[] =
3291 { "all", RECIP_MASK_ALL },
3292 { "none", RECIP_MASK_NONE },
3293 { "div", RECIP_MASK_DIV },
3294 { "sqrt", RECIP_MASK_SQRT },
3295 { "vec-div", RECIP_MASK_VEC_DIV },
3296 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3299 int const pta_size = ARRAY_SIZE (processor_alias_table);
3301 /* Set up prefix/suffix so the error messages refer to either the command
3302 line argument, or the attribute(target). */
3303 if (main_args_p)
3305 prefix = "-m";
3306 suffix = "";
3307 sw = "switch";
3309 else
3311 prefix = "option(\"";
3312 suffix = "\")";
3313 sw = "attribute";
3316 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3317 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3318 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3319 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3320 #ifdef TARGET_BI_ARCH
3321 else
3323 #if TARGET_BI_ARCH == 1
3324 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3325 is on and OPTION_MASK_ABI_X32 is off. We turn off
3326 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3327 -mx32. */
3328 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3329 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3330 #else
3331 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3332 on and OPTION_MASK_ABI_64 is off. We turn off
3333 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3334 -m64. */
3335 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3336 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3337 #endif
3339 #endif
3341 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3343 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3344 OPTION_MASK_ABI_64 for TARGET_X32. */
3345 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3346 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3348 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3349 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3350 | OPTION_MASK_ABI_X32
3351 | OPTION_MASK_ABI_64);
3352 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3354 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3355 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3356 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3357 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3360 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3361 SUBTARGET_OVERRIDE_OPTIONS;
3362 #endif
3364 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3365 SUBSUBTARGET_OVERRIDE_OPTIONS;
3366 #endif
3368 /* -fPIC is the default for x86_64. */
3369 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3370 opts->x_flag_pic = 2;
3372 /* Need to check -mtune=generic first. */
3373 if (opts->x_ix86_tune_string)
3375 /* As special support for cross compilers we read -mtune=native
3376 as -mtune=generic. With native compilers we won't see the
3377 -mtune=native, as it was changed by the driver. */
3378 if (!strcmp (opts->x_ix86_tune_string, "native"))
3380 opts->x_ix86_tune_string = "generic";
3382 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3383 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3384 "%stune=k8%s or %stune=generic%s instead as appropriate",
3385 prefix, suffix, prefix, suffix, prefix, suffix);
3387 else
3389 if (opts->x_ix86_arch_string)
3390 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3391 if (!opts->x_ix86_tune_string)
3393 opts->x_ix86_tune_string
3394 = processor_target_table[TARGET_CPU_DEFAULT].name;
3395 ix86_tune_defaulted = 1;
3398 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3399 or defaulted. We need to use a sensible tune option. */
3400 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3402 opts->x_ix86_tune_string = "generic";
3406 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3407 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3409 /* rep; movq isn't available in 32-bit code. */
3410 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3411 opts->x_ix86_stringop_alg = no_stringop;
3414 if (!opts->x_ix86_arch_string)
3415 opts->x_ix86_arch_string
3416 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3417 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3418 else
3419 ix86_arch_specified = 1;
3421 if (opts_set->x_ix86_pmode)
3423 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3424 && opts->x_ix86_pmode == PMODE_SI)
3425 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3426 && opts->x_ix86_pmode == PMODE_DI))
3427 error ("address mode %qs not supported in the %s bit mode",
3428 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3429 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3431 else
3432 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3433 ? PMODE_DI : PMODE_SI;
3435 if (!opts_set->x_ix86_abi)
3436 opts->x_ix86_abi = DEFAULT_ABI;
3438 /* For targets using ms ABI enable ms-extensions, if not
3439 explicit turned off. For non-ms ABI we turn off this
3440 option. */
3441 if (!opts_set->x_flag_ms_extensions)
3442 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3444 if (opts_set->x_ix86_cmodel)
3446 switch (opts->x_ix86_cmodel)
3448 case CM_SMALL:
3449 case CM_SMALL_PIC:
3450 if (opts->x_flag_pic)
3451 opts->x_ix86_cmodel = CM_SMALL_PIC;
3452 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3453 error ("code model %qs not supported in the %s bit mode",
3454 "small", "32");
3455 break;
3457 case CM_MEDIUM:
3458 case CM_MEDIUM_PIC:
3459 if (opts->x_flag_pic)
3460 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3461 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3462 error ("code model %qs not supported in the %s bit mode",
3463 "medium", "32");
3464 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3465 error ("code model %qs not supported in x32 mode",
3466 "medium");
3467 break;
3469 case CM_LARGE:
3470 case CM_LARGE_PIC:
3471 if (opts->x_flag_pic)
3472 opts->x_ix86_cmodel = CM_LARGE_PIC;
3473 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3474 error ("code model %qs not supported in the %s bit mode",
3475 "large", "32");
3476 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3477 error ("code model %qs not supported in x32 mode",
3478 "large");
3479 break;
3481 case CM_32:
3482 if (opts->x_flag_pic)
3483 error ("code model %s does not support PIC mode", "32");
3484 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3485 error ("code model %qs not supported in the %s bit mode",
3486 "32", "64");
3487 break;
3489 case CM_KERNEL:
3490 if (opts->x_flag_pic)
3492 error ("code model %s does not support PIC mode", "kernel");
3493 opts->x_ix86_cmodel = CM_32;
3495 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3496 error ("code model %qs not supported in the %s bit mode",
3497 "kernel", "32");
3498 break;
3500 default:
3501 gcc_unreachable ();
3504 else
3506 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3507 use of rip-relative addressing. This eliminates fixups that
3508 would otherwise be needed if this object is to be placed in a
3509 DLL, and is essentially just as efficient as direct addressing. */
3510 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3511 && (TARGET_RDOS || TARGET_PECOFF))
3512 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3513 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3514 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3515 else
3516 opts->x_ix86_cmodel = CM_32;
3518 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3520 error ("-masm=intel not supported in this configuration");
3521 opts->x_ix86_asm_dialect = ASM_ATT;
3523 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3524 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3525 sorry ("%i-bit mode not compiled in",
3526 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3528 for (i = 0; i < pta_size; i++)
3529 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3531 ix86_schedule = processor_alias_table[i].schedule;
3532 ix86_arch = processor_alias_table[i].processor;
3533 /* Default cpu tuning to the architecture. */
3534 ix86_tune = ix86_arch;
3536 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3537 && !(processor_alias_table[i].flags & PTA_64BIT))
3538 error ("CPU you selected does not support x86-64 "
3539 "instruction set");
3541 if (processor_alias_table[i].flags & PTA_MMX
3542 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3543 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3544 if (processor_alias_table[i].flags & PTA_3DNOW
3545 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3546 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3547 if (processor_alias_table[i].flags & PTA_3DNOW_A
3548 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3549 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3550 if (processor_alias_table[i].flags & PTA_SSE
3551 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3552 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3553 if (processor_alias_table[i].flags & PTA_SSE2
3554 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3555 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3556 if (processor_alias_table[i].flags & PTA_SSE3
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3559 if (processor_alias_table[i].flags & PTA_SSSE3
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3562 if (processor_alias_table[i].flags & PTA_SSE4_1
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3565 if (processor_alias_table[i].flags & PTA_SSE4_2
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3568 if (processor_alias_table[i].flags & PTA_AVX
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3571 if (processor_alias_table[i].flags & PTA_AVX2
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3574 if (processor_alias_table[i].flags & PTA_FMA
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3577 if (processor_alias_table[i].flags & PTA_SSE4A
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3580 if (processor_alias_table[i].flags & PTA_FMA4
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3583 if (processor_alias_table[i].flags & PTA_XOP
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3586 if (processor_alias_table[i].flags & PTA_LWP
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3589 if (processor_alias_table[i].flags & PTA_ABM
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3592 if (processor_alias_table[i].flags & PTA_BMI
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3595 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3598 if (processor_alias_table[i].flags & PTA_TBM
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3601 if (processor_alias_table[i].flags & PTA_BMI2
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3604 if (processor_alias_table[i].flags & PTA_CX16
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3607 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3610 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3611 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3614 if (processor_alias_table[i].flags & PTA_MOVBE
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3617 if (processor_alias_table[i].flags & PTA_AES
3618 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3619 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3620 if (processor_alias_table[i].flags & PTA_SHA
3621 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3622 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3623 if (processor_alias_table[i].flags & PTA_PCLMUL
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3626 if (processor_alias_table[i].flags & PTA_FSGSBASE
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3629 if (processor_alias_table[i].flags & PTA_RDRND
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3632 if (processor_alias_table[i].flags & PTA_F16C
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3635 if (processor_alias_table[i].flags & PTA_RTM
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3638 if (processor_alias_table[i].flags & PTA_HLE
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3641 if (processor_alias_table[i].flags & PTA_PRFCHW
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3644 if (processor_alias_table[i].flags & PTA_RDSEED
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3647 if (processor_alias_table[i].flags & PTA_ADX
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3650 if (processor_alias_table[i].flags & PTA_FXSR
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3653 if (processor_alias_table[i].flags & PTA_XSAVE
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3656 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3659 if (processor_alias_table[i].flags & PTA_AVX512F
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3662 if (processor_alias_table[i].flags & PTA_AVX512ER
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3665 if (processor_alias_table[i].flags & PTA_AVX512PF
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3668 if (processor_alias_table[i].flags & PTA_AVX512CD
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3671 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3674 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3675 x86_prefetch_sse = true;
3677 break;
3680 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3681 error ("generic CPU can be used only for %stune=%s %s",
3682 prefix, suffix, sw);
3683 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3684 error ("intel CPU can be used only for %stune=%s %s",
3685 prefix, suffix, sw);
3686 else if (i == pta_size)
3687 error ("bad value (%s) for %sarch=%s %s",
3688 opts->x_ix86_arch_string, prefix, suffix, sw);
3690 ix86_arch_mask = 1u << ix86_arch;
3691 for (i = 0; i < X86_ARCH_LAST; ++i)
3692 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3694 for (i = 0; i < pta_size; i++)
3695 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3697 ix86_schedule = processor_alias_table[i].schedule;
3698 ix86_tune = processor_alias_table[i].processor;
3699 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3701 if (!(processor_alias_table[i].flags & PTA_64BIT))
3703 if (ix86_tune_defaulted)
3705 opts->x_ix86_tune_string = "x86-64";
3706 for (i = 0; i < pta_size; i++)
3707 if (! strcmp (opts->x_ix86_tune_string,
3708 processor_alias_table[i].name))
3709 break;
3710 ix86_schedule = processor_alias_table[i].schedule;
3711 ix86_tune = processor_alias_table[i].processor;
3713 else
3714 error ("CPU you selected does not support x86-64 "
3715 "instruction set");
3718 /* Intel CPUs have always interpreted SSE prefetch instructions as
3719 NOPs; so, we can enable SSE prefetch instructions even when
3720 -mtune (rather than -march) points us to a processor that has them.
3721 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3722 higher processors. */
3723 if (TARGET_CMOV
3724 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3725 x86_prefetch_sse = true;
3726 break;
3729 if (ix86_tune_specified && i == pta_size)
3730 error ("bad value (%s) for %stune=%s %s",
3731 opts->x_ix86_tune_string, prefix, suffix, sw);
3733 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3735 #ifndef USE_IX86_FRAME_POINTER
3736 #define USE_IX86_FRAME_POINTER 0
3737 #endif
3739 #ifndef USE_X86_64_FRAME_POINTER
3740 #define USE_X86_64_FRAME_POINTER 0
3741 #endif
3743 /* Set the default values for switches whose default depends on TARGET_64BIT
3744 in case they weren't overwritten by command line options. */
3745 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3747 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3748 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3749 if (opts->x_flag_asynchronous_unwind_tables
3750 && !opts_set->x_flag_unwind_tables
3751 && TARGET_64BIT_MS_ABI)
3752 opts->x_flag_unwind_tables = 1;
3753 if (opts->x_flag_asynchronous_unwind_tables == 2)
3754 opts->x_flag_unwind_tables
3755 = opts->x_flag_asynchronous_unwind_tables = 1;
3756 if (opts->x_flag_pcc_struct_return == 2)
3757 opts->x_flag_pcc_struct_return = 0;
3759 else
3761 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3762 opts->x_flag_omit_frame_pointer
3763 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3764 if (opts->x_flag_asynchronous_unwind_tables == 2)
3765 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3766 if (opts->x_flag_pcc_struct_return == 2)
3767 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3770 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3771 if (opts->x_optimize_size)
3772 ix86_cost = &ix86_size_cost;
3773 else
3774 ix86_cost = ix86_tune_cost;
3776 /* Arrange to set up i386_stack_locals for all functions. */
3777 init_machine_status = ix86_init_machine_status;
3779 /* Validate -mregparm= value. */
3780 if (opts_set->x_ix86_regparm)
3782 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3783 warning (0, "-mregparm is ignored in 64-bit mode");
3784 if (opts->x_ix86_regparm > REGPARM_MAX)
3786 error ("-mregparm=%d is not between 0 and %d",
3787 opts->x_ix86_regparm, REGPARM_MAX);
3788 opts->x_ix86_regparm = 0;
3791 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3792 opts->x_ix86_regparm = REGPARM_MAX;
3794 /* Default align_* from the processor table. */
3795 if (opts->x_align_loops == 0)
3797 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3798 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3800 if (opts->x_align_jumps == 0)
3802 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3803 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3805 if (opts->x_align_functions == 0)
3807 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3810 /* Provide default for -mbranch-cost= value. */
3811 if (!opts_set->x_ix86_branch_cost)
3812 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3814 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3816 opts->x_target_flags
3817 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3819 /* Enable by default the SSE and MMX builtins. Do allow the user to
3820 explicitly disable any of these. In particular, disabling SSE and
3821 MMX for kernel code is extremely useful. */
3822 if (!ix86_arch_specified)
3823 opts->x_ix86_isa_flags
3824 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3825 | TARGET_SUBTARGET64_ISA_DEFAULT)
3826 & ~opts->x_ix86_isa_flags_explicit);
3828 if (TARGET_RTD_P (opts->x_target_flags))
3829 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3831 else
3833 opts->x_target_flags
3834 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3836 if (!ix86_arch_specified)
3837 opts->x_ix86_isa_flags
3838 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3840 /* i386 ABI does not specify red zone. It still makes sense to use it
3841 when programmer takes care to stack from being destroyed. */
3842 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3843 opts->x_target_flags |= MASK_NO_RED_ZONE;
3846 /* Keep nonleaf frame pointers. */
3847 if (opts->x_flag_omit_frame_pointer)
3848 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3849 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3850 opts->x_flag_omit_frame_pointer = 1;
3852 /* If we're doing fast math, we don't care about comparison order
3853 wrt NaNs. This lets us use a shorter comparison sequence. */
3854 if (opts->x_flag_finite_math_only)
3855 opts->x_target_flags &= ~MASK_IEEE_FP;
3857 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3858 since the insns won't need emulation. */
3859 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3860 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3862 /* Likewise, if the target doesn't have a 387, or we've specified
3863 software floating point, don't use 387 inline intrinsics. */
3864 if (!TARGET_80387_P (opts->x_target_flags))
3865 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3867 /* Turn on MMX builtins for -msse. */
3868 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3869 opts->x_ix86_isa_flags
3870 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3872 /* Enable SSE prefetch. */
3873 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3874 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3875 x86_prefetch_sse = true;
3877 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3878 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3879 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3880 opts->x_ix86_isa_flags
3881 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3883 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3884 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3885 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3886 opts->x_ix86_isa_flags
3887 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3889 /* Enable lzcnt instruction for -mabm. */
3890 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3891 opts->x_ix86_isa_flags
3892 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3894 /* Validate -mpreferred-stack-boundary= value or default it to
3895 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3896 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3897 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3899 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3900 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3901 int max = (TARGET_SEH ? 4 : 12);
3903 if (opts->x_ix86_preferred_stack_boundary_arg < min
3904 || opts->x_ix86_preferred_stack_boundary_arg > max)
3906 if (min == max)
3907 error ("-mpreferred-stack-boundary is not supported "
3908 "for this target");
3909 else
3910 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3911 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3913 else
3914 ix86_preferred_stack_boundary
3915 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3918 /* Set the default value for -mstackrealign. */
3919 if (opts->x_ix86_force_align_arg_pointer == -1)
3920 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3922 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3924 /* Validate -mincoming-stack-boundary= value or default it to
3925 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3926 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3927 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3929 if (opts->x_ix86_incoming_stack_boundary_arg
3930 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3931 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3932 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3933 opts->x_ix86_incoming_stack_boundary_arg,
3934 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3935 else
3937 ix86_user_incoming_stack_boundary
3938 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3939 ix86_incoming_stack_boundary
3940 = ix86_user_incoming_stack_boundary;
3944 /* Accept -msseregparm only if at least SSE support is enabled. */
3945 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3946 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3947 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3949 if (opts_set->x_ix86_fpmath)
3951 if (opts->x_ix86_fpmath & FPMATH_SSE)
3953 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3955 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3956 opts->x_ix86_fpmath = FPMATH_387;
3958 else if ((opts->x_ix86_fpmath & FPMATH_387)
3959 && !TARGET_80387_P (opts->x_target_flags))
3961 warning (0, "387 instruction set disabled, using SSE arithmetics");
3962 opts->x_ix86_fpmath = FPMATH_SSE;
3966 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3967 fpmath=387. The second is however default at many targets since the
3968 extra 80bit precision of temporaries is considered to be part of ABI.
3969 Overwrite the default at least for -ffast-math.
3970 TODO: -mfpmath=both seems to produce same performing code with bit
3971 smaller binaries. It is however not clear if register allocation is
3972 ready for this setting.
3973 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3974 codegen. We may switch to 387 with -ffast-math for size optimized
3975 functions. */
3976 else if (fast_math_flags_set_p (&global_options)
3977 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3978 opts->x_ix86_fpmath = FPMATH_SSE;
3979 else
3980 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3982 /* If the i387 is disabled, then do not return values in it. */
3983 if (!TARGET_80387_P (opts->x_target_flags))
3984 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
3986 /* Use external vectorized library in vectorizing intrinsics. */
3987 if (opts_set->x_ix86_veclibabi_type)
3988 switch (opts->x_ix86_veclibabi_type)
3990 case ix86_veclibabi_type_svml:
3991 ix86_veclib_handler = ix86_veclibabi_svml;
3992 break;
3994 case ix86_veclibabi_type_acml:
3995 ix86_veclib_handler = ix86_veclibabi_acml;
3996 break;
3998 default:
3999 gcc_unreachable ();
4002 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4003 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4004 && !opts->x_optimize_size)
4005 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4007 /* If stack probes are required, the space used for large function
4008 arguments on the stack must also be probed, so enable
4009 -maccumulate-outgoing-args so this happens in the prologue. */
4010 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4011 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4013 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4014 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4015 "for correctness", prefix, suffix);
4016 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4019 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4021 char *p;
4022 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4023 p = strchr (internal_label_prefix, 'X');
4024 internal_label_prefix_len = p - internal_label_prefix;
4025 *p = '\0';
4028 /* When scheduling description is not available, disable scheduler pass
4029 so it won't slow down the compilation and make x87 code slower. */
4030 if (!TARGET_SCHEDULE)
4031 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4033 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4034 ix86_tune_cost->simultaneous_prefetches,
4035 opts->x_param_values,
4036 opts_set->x_param_values);
4037 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4038 ix86_tune_cost->prefetch_block,
4039 opts->x_param_values,
4040 opts_set->x_param_values);
4041 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4042 ix86_tune_cost->l1_cache_size,
4043 opts->x_param_values,
4044 opts_set->x_param_values);
4045 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4046 ix86_tune_cost->l2_cache_size,
4047 opts->x_param_values,
4048 opts_set->x_param_values);
4050 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4051 if (opts->x_flag_prefetch_loop_arrays < 0
4052 && HAVE_prefetch
4053 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4054 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4055 opts->x_flag_prefetch_loop_arrays = 1;
4057 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4058 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4059 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4060 targetm.expand_builtin_va_start = NULL;
4062 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4064 ix86_gen_leave = gen_leave_rex64;
4065 if (Pmode == DImode)
4067 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4068 ix86_gen_tls_local_dynamic_base_64
4069 = gen_tls_local_dynamic_base_64_di;
4071 else
4073 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4074 ix86_gen_tls_local_dynamic_base_64
4075 = gen_tls_local_dynamic_base_64_si;
4078 else
4079 ix86_gen_leave = gen_leave;
4081 if (Pmode == DImode)
4083 ix86_gen_add3 = gen_adddi3;
4084 ix86_gen_sub3 = gen_subdi3;
4085 ix86_gen_sub3_carry = gen_subdi3_carry;
4086 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4087 ix86_gen_andsp = gen_anddi3;
4088 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4089 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4090 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4091 ix86_gen_monitor = gen_sse3_monitor_di;
4093 else
4095 ix86_gen_add3 = gen_addsi3;
4096 ix86_gen_sub3 = gen_subsi3;
4097 ix86_gen_sub3_carry = gen_subsi3_carry;
4098 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4099 ix86_gen_andsp = gen_andsi3;
4100 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4101 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4102 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4103 ix86_gen_monitor = gen_sse3_monitor_si;
4106 #ifdef USE_IX86_CLD
4107 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4108 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4109 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4110 #endif
4112 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4114 if (opts->x_flag_fentry > 0)
4115 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4116 "with -fpic");
4117 opts->x_flag_fentry = 0;
4119 else if (TARGET_SEH)
4121 if (opts->x_flag_fentry == 0)
4122 sorry ("-mno-fentry isn%'t compatible with SEH");
4123 opts->x_flag_fentry = 1;
4125 else if (opts->x_flag_fentry < 0)
4127 #if defined(PROFILE_BEFORE_PROLOGUE)
4128 opts->x_flag_fentry = 1;
4129 #else
4130 opts->x_flag_fentry = 0;
4131 #endif
4134 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4135 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4136 AVX unaligned load/store. */
4137 if (!opts->x_optimize_size)
4139 if (flag_expensive_optimizations
4140 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4141 opts->x_target_flags |= MASK_VZEROUPPER;
4142 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4143 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4144 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4145 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4146 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4147 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4148 /* Enable 128-bit AVX instruction generation
4149 for the auto-vectorizer. */
4150 if (TARGET_AVX128_OPTIMAL
4151 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4152 opts->x_target_flags |= MASK_PREFER_AVX128;
4155 if (opts->x_ix86_recip_name)
4157 char *p = ASTRDUP (opts->x_ix86_recip_name);
4158 char *q;
4159 unsigned int mask, i;
4160 bool invert;
4162 while ((q = strtok (p, ",")) != NULL)
4164 p = NULL;
4165 if (*q == '!')
4167 invert = true;
4168 q++;
4170 else
4171 invert = false;
4173 if (!strcmp (q, "default"))
4174 mask = RECIP_MASK_ALL;
4175 else
4177 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4178 if (!strcmp (q, recip_options[i].string))
4180 mask = recip_options[i].mask;
4181 break;
4184 if (i == ARRAY_SIZE (recip_options))
4186 error ("unknown option for -mrecip=%s", q);
4187 invert = false;
4188 mask = RECIP_MASK_NONE;
4192 opts->x_recip_mask_explicit |= mask;
4193 if (invert)
4194 opts->x_recip_mask &= ~mask;
4195 else
4196 opts->x_recip_mask |= mask;
4200 if (TARGET_RECIP_P (opts->x_target_flags))
4201 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4202 else if (opts_set->x_target_flags & MASK_RECIP)
4203 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4205 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4206 for 64-bit Bionic. */
4207 if (TARGET_HAS_BIONIC
4208 && !(opts_set->x_target_flags
4209 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4210 opts->x_target_flags |= (TARGET_64BIT
4211 ? MASK_LONG_DOUBLE_128
4212 : MASK_LONG_DOUBLE_64);
4214 /* Only one of them can be active. */
4215 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4216 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4218 /* Save the initial options in case the user does function specific
4219 options. */
4220 if (main_args_p)
4221 target_option_default_node = target_option_current_node
4222 = build_target_option_node (opts);
4224 /* Handle stack protector */
4225 if (!opts_set->x_ix86_stack_protector_guard)
4226 opts->x_ix86_stack_protector_guard
4227 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4229 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4230 if (opts->x_ix86_tune_memcpy_strategy)
4232 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4233 ix86_parse_stringop_strategy_string (str, false);
4234 free (str);
4237 if (opts->x_ix86_tune_memset_strategy)
4239 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4240 ix86_parse_stringop_strategy_string (str, true);
4241 free (str);
4245 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4247 static void
4248 ix86_option_override (void)
4250 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4251 static struct register_pass_info insert_vzeroupper_info
4252 = { pass_insert_vzeroupper, "reload",
4253 1, PASS_POS_INSERT_AFTER
4256 ix86_option_override_internal (true, &global_options, &global_options_set);
4259 /* This needs to be done at start up. It's convenient to do it here. */
4260 register_pass (&insert_vzeroupper_info);
4263 /* Update register usage after having seen the compiler flags. */
4265 static void
4266 ix86_conditional_register_usage (void)
4268 int i, c_mask;
4269 unsigned int j;
4271 /* The PIC register, if it exists, is fixed. */
4272 j = PIC_OFFSET_TABLE_REGNUM;
4273 if (j != INVALID_REGNUM)
4274 fixed_regs[j] = call_used_regs[j] = 1;
4276 /* For 32-bit targets, squash the REX registers. */
4277 if (! TARGET_64BIT)
4279 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4280 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4281 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4282 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4283 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4284 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4287 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4288 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4289 : TARGET_64BIT ? (1 << 2)
4290 : (1 << 1));
4292 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4294 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4296 /* Set/reset conditionally defined registers from
4297 CALL_USED_REGISTERS initializer. */
4298 if (call_used_regs[i] > 1)
4299 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4301 /* Calculate registers of CLOBBERED_REGS register set
4302 as call used registers from GENERAL_REGS register set. */
4303 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4304 && call_used_regs[i])
4305 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4308 /* If MMX is disabled, squash the registers. */
4309 if (! TARGET_MMX)
4310 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4311 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4312 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4314 /* If SSE is disabled, squash the registers. */
4315 if (! TARGET_SSE)
4316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4317 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4318 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4320 /* If the FPU is disabled, squash the registers. */
4321 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4322 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4323 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4324 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4326 /* If AVX512F is disabled, squash the registers. */
4327 if (! TARGET_AVX512F)
4329 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4330 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4332 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4333 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4338 /* Save the current options */
4340 static void
4341 ix86_function_specific_save (struct cl_target_option *ptr,
4342 struct gcc_options *opts)
4344 ptr->arch = ix86_arch;
4345 ptr->schedule = ix86_schedule;
4346 ptr->tune = ix86_tune;
4347 ptr->branch_cost = ix86_branch_cost;
4348 ptr->tune_defaulted = ix86_tune_defaulted;
4349 ptr->arch_specified = ix86_arch_specified;
4350 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4351 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4352 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4353 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4354 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4355 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4356 ptr->x_ix86_abi = opts->x_ix86_abi;
4357 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4358 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4359 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4360 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4361 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4362 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4363 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4364 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4365 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4366 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4367 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4368 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4369 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4370 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4371 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4372 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4373 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4374 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4375 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4376 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4378 /* The fields are char but the variables are not; make sure the
4379 values fit in the fields. */
4380 gcc_assert (ptr->arch == ix86_arch);
4381 gcc_assert (ptr->schedule == ix86_schedule);
4382 gcc_assert (ptr->tune == ix86_tune);
4383 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4386 /* Restore the current options */
4388 static void
4389 ix86_function_specific_restore (struct gcc_options *opts,
4390 struct cl_target_option *ptr)
4392 enum processor_type old_tune = ix86_tune;
4393 enum processor_type old_arch = ix86_arch;
4394 unsigned int ix86_arch_mask;
4395 int i;
4397 /* We don't change -fPIC. */
4398 opts->x_flag_pic = flag_pic;
4400 ix86_arch = (enum processor_type) ptr->arch;
4401 ix86_schedule = (enum attr_cpu) ptr->schedule;
4402 ix86_tune = (enum processor_type) ptr->tune;
4403 opts->x_ix86_branch_cost = ptr->branch_cost;
4404 ix86_tune_defaulted = ptr->tune_defaulted;
4405 ix86_arch_specified = ptr->arch_specified;
4406 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4407 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4408 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4409 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4410 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4411 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4412 opts->x_ix86_abi = ptr->x_ix86_abi;
4413 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4414 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4415 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4416 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4417 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4418 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4419 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4420 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4421 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4422 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4423 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4424 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4425 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4426 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4427 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4428 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4429 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4430 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4431 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4432 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4434 /* Recreate the arch feature tests if the arch changed */
4435 if (old_arch != ix86_arch)
4437 ix86_arch_mask = 1u << ix86_arch;
4438 for (i = 0; i < X86_ARCH_LAST; ++i)
4439 ix86_arch_features[i]
4440 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4443 /* Recreate the tune optimization tests */
4444 if (old_tune != ix86_tune)
4445 set_ix86_tune_features (ix86_tune, false);
4448 /* Print the current options */
4450 static void
4451 ix86_function_specific_print (FILE *file, int indent,
4452 struct cl_target_option *ptr)
4454 char *target_string
4455 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4456 NULL, NULL, ptr->x_ix86_fpmath, false);
4458 gcc_assert (ptr->arch < PROCESSOR_max);
4459 fprintf (file, "%*sarch = %d (%s)\n",
4460 indent, "",
4461 ptr->arch, processor_target_table[ptr->arch].name);
4463 gcc_assert (ptr->tune < PROCESSOR_max);
4464 fprintf (file, "%*stune = %d (%s)\n",
4465 indent, "",
4466 ptr->tune, processor_target_table[ptr->tune].name);
4468 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4470 if (target_string)
4472 fprintf (file, "%*s%s\n", indent, "", target_string);
4473 free (target_string);
4478 /* Inner function to process the attribute((target(...))), take an argument and
4479 set the current options from the argument. If we have a list, recursively go
4480 over the list. */
4482 static bool
4483 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4484 struct gcc_options *opts,
4485 struct gcc_options *opts_set,
4486 struct gcc_options *enum_opts_set)
4488 char *next_optstr;
4489 bool ret = true;
4491 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4492 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4493 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4494 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4495 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4497 enum ix86_opt_type
4499 ix86_opt_unknown,
4500 ix86_opt_yes,
4501 ix86_opt_no,
4502 ix86_opt_str,
4503 ix86_opt_enum,
4504 ix86_opt_isa
4507 static const struct
4509 const char *string;
4510 size_t len;
4511 enum ix86_opt_type type;
4512 int opt;
4513 int mask;
4514 } attrs[] = {
4515 /* isa options */
4516 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4517 IX86_ATTR_ISA ("abm", OPT_mabm),
4518 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4519 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4520 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4521 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4522 IX86_ATTR_ISA ("aes", OPT_maes),
4523 IX86_ATTR_ISA ("sha", OPT_msha),
4524 IX86_ATTR_ISA ("avx", OPT_mavx),
4525 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4526 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4527 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4528 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4529 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4530 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4531 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4532 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4533 IX86_ATTR_ISA ("sse", OPT_msse),
4534 IX86_ATTR_ISA ("sse2", OPT_msse2),
4535 IX86_ATTR_ISA ("sse3", OPT_msse3),
4536 IX86_ATTR_ISA ("sse4", OPT_msse4),
4537 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4538 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4539 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4540 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4541 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4542 IX86_ATTR_ISA ("fma", OPT_mfma),
4543 IX86_ATTR_ISA ("xop", OPT_mxop),
4544 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4545 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4546 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4547 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4548 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4549 IX86_ATTR_ISA ("hle", OPT_mhle),
4550 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4551 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4552 IX86_ATTR_ISA ("adx", OPT_madx),
4553 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4554 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4555 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4556 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4558 /* enum options */
4559 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4561 /* string options */
4562 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4563 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4565 /* flag options */
4566 IX86_ATTR_YES ("cld",
4567 OPT_mcld,
4568 MASK_CLD),
4570 IX86_ATTR_NO ("fancy-math-387",
4571 OPT_mfancy_math_387,
4572 MASK_NO_FANCY_MATH_387),
4574 IX86_ATTR_YES ("ieee-fp",
4575 OPT_mieee_fp,
4576 MASK_IEEE_FP),
4578 IX86_ATTR_YES ("inline-all-stringops",
4579 OPT_minline_all_stringops,
4580 MASK_INLINE_ALL_STRINGOPS),
4582 IX86_ATTR_YES ("inline-stringops-dynamically",
4583 OPT_minline_stringops_dynamically,
4584 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4586 IX86_ATTR_NO ("align-stringops",
4587 OPT_mno_align_stringops,
4588 MASK_NO_ALIGN_STRINGOPS),
4590 IX86_ATTR_YES ("recip",
4591 OPT_mrecip,
4592 MASK_RECIP),
4596 /* If this is a list, recurse to get the options. */
4597 if (TREE_CODE (args) == TREE_LIST)
4599 bool ret = true;
4601 for (; args; args = TREE_CHAIN (args))
4602 if (TREE_VALUE (args)
4603 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4604 p_strings, opts, opts_set,
4605 enum_opts_set))
4606 ret = false;
4608 return ret;
4611 else if (TREE_CODE (args) != STRING_CST)
4613 error ("attribute %<target%> argument not a string");
4614 return false;
4617 /* Handle multiple arguments separated by commas. */
4618 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4620 while (next_optstr && *next_optstr != '\0')
4622 char *p = next_optstr;
4623 char *orig_p = p;
4624 char *comma = strchr (next_optstr, ',');
4625 const char *opt_string;
4626 size_t len, opt_len;
4627 int opt;
4628 bool opt_set_p;
4629 char ch;
4630 unsigned i;
4631 enum ix86_opt_type type = ix86_opt_unknown;
4632 int mask = 0;
4634 if (comma)
4636 *comma = '\0';
4637 len = comma - next_optstr;
4638 next_optstr = comma + 1;
4640 else
4642 len = strlen (p);
4643 next_optstr = NULL;
4646 /* Recognize no-xxx. */
4647 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4649 opt_set_p = false;
4650 p += 3;
4651 len -= 3;
4653 else
4654 opt_set_p = true;
4656 /* Find the option. */
4657 ch = *p;
4658 opt = N_OPTS;
4659 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4661 type = attrs[i].type;
4662 opt_len = attrs[i].len;
4663 if (ch == attrs[i].string[0]
4664 && ((type != ix86_opt_str && type != ix86_opt_enum)
4665 ? len == opt_len
4666 : len > opt_len)
4667 && memcmp (p, attrs[i].string, opt_len) == 0)
4669 opt = attrs[i].opt;
4670 mask = attrs[i].mask;
4671 opt_string = attrs[i].string;
4672 break;
4676 /* Process the option. */
4677 if (opt == N_OPTS)
4679 error ("attribute(target(\"%s\")) is unknown", orig_p);
4680 ret = false;
4683 else if (type == ix86_opt_isa)
4685 struct cl_decoded_option decoded;
4687 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4688 ix86_handle_option (opts, opts_set,
4689 &decoded, input_location);
4692 else if (type == ix86_opt_yes || type == ix86_opt_no)
4694 if (type == ix86_opt_no)
4695 opt_set_p = !opt_set_p;
4697 if (opt_set_p)
4698 opts->x_target_flags |= mask;
4699 else
4700 opts->x_target_flags &= ~mask;
4703 else if (type == ix86_opt_str)
4705 if (p_strings[opt])
4707 error ("option(\"%s\") was already specified", opt_string);
4708 ret = false;
4710 else
4711 p_strings[opt] = xstrdup (p + opt_len);
4714 else if (type == ix86_opt_enum)
4716 bool arg_ok;
4717 int value;
4719 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4720 if (arg_ok)
4721 set_option (opts, enum_opts_set, opt, value,
4722 p + opt_len, DK_UNSPECIFIED, input_location,
4723 global_dc);
4724 else
4726 error ("attribute(target(\"%s\")) is unknown", orig_p);
4727 ret = false;
4731 else
4732 gcc_unreachable ();
4735 return ret;
4738 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4740 tree
4741 ix86_valid_target_attribute_tree (tree args,
4742 struct gcc_options *opts,
4743 struct gcc_options *opts_set)
4745 const char *orig_arch_string = opts->x_ix86_arch_string;
4746 const char *orig_tune_string = opts->x_ix86_tune_string;
4747 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4748 int orig_tune_defaulted = ix86_tune_defaulted;
4749 int orig_arch_specified = ix86_arch_specified;
4750 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4751 tree t = NULL_TREE;
4752 int i;
4753 struct cl_target_option *def
4754 = TREE_TARGET_OPTION (target_option_default_node);
4755 struct gcc_options enum_opts_set;
4757 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4759 /* Process each of the options on the chain. */
4760 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4761 opts_set, &enum_opts_set))
4762 return error_mark_node;
4764 /* If the changed options are different from the default, rerun
4765 ix86_option_override_internal, and then save the options away.
4766 The string options are are attribute options, and will be undone
4767 when we copy the save structure. */
4768 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4769 || opts->x_target_flags != def->x_target_flags
4770 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4771 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4772 || enum_opts_set.x_ix86_fpmath)
4774 /* If we are using the default tune= or arch=, undo the string assigned,
4775 and use the default. */
4776 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4777 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4778 else if (!orig_arch_specified)
4779 opts->x_ix86_arch_string = NULL;
4781 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4782 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4783 else if (orig_tune_defaulted)
4784 opts->x_ix86_tune_string = NULL;
4786 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4787 if (enum_opts_set.x_ix86_fpmath)
4788 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4789 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4790 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4792 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4793 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4796 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4797 ix86_option_override_internal (false, opts, opts_set);
4799 /* Add any builtin functions with the new isa if any. */
4800 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4802 /* Save the current options unless we are validating options for
4803 #pragma. */
4804 t = build_target_option_node (opts);
4806 opts->x_ix86_arch_string = orig_arch_string;
4807 opts->x_ix86_tune_string = orig_tune_string;
4808 opts_set->x_ix86_fpmath = orig_fpmath_set;
4810 /* Free up memory allocated to hold the strings */
4811 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4812 free (option_strings[i]);
4815 return t;
4818 /* Hook to validate attribute((target("string"))). */
4820 static bool
4821 ix86_valid_target_attribute_p (tree fndecl,
4822 tree ARG_UNUSED (name),
4823 tree args,
4824 int ARG_UNUSED (flags))
4826 struct gcc_options func_options;
4827 tree new_target, new_optimize;
4828 bool ret = true;
4830 /* attribute((target("default"))) does nothing, beyond
4831 affecting multi-versioning. */
4832 if (TREE_VALUE (args)
4833 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4834 && TREE_CHAIN (args) == NULL_TREE
4835 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4836 return true;
4838 tree old_optimize = build_optimization_node (&global_options);
4840 /* Get the optimization options of the current function. */
4841 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4843 if (!func_optimize)
4844 func_optimize = old_optimize;
4846 /* Init func_options. */
4847 memset (&func_options, 0, sizeof (func_options));
4848 init_options_struct (&func_options, NULL);
4849 lang_hooks.init_options_struct (&func_options);
4851 cl_optimization_restore (&func_options,
4852 TREE_OPTIMIZATION (func_optimize));
4854 /* Initialize func_options to the default before its target options can
4855 be set. */
4856 cl_target_option_restore (&func_options,
4857 TREE_TARGET_OPTION (target_option_default_node));
4859 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4860 &global_options_set);
4862 new_optimize = build_optimization_node (&func_options);
4864 if (new_target == error_mark_node)
4865 ret = false;
4867 else if (fndecl && new_target)
4869 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4871 if (old_optimize != new_optimize)
4872 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4875 return ret;
4879 /* Hook to determine if one function can safely inline another. */
4881 static bool
4882 ix86_can_inline_p (tree caller, tree callee)
4884 bool ret = false;
4885 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4886 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4888 /* If callee has no option attributes, then it is ok to inline. */
4889 if (!callee_tree)
4890 ret = true;
4892 /* If caller has no option attributes, but callee does then it is not ok to
4893 inline. */
4894 else if (!caller_tree)
4895 ret = false;
4897 else
4899 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4900 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4902 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4903 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4904 function. */
4905 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4906 != callee_opts->x_ix86_isa_flags)
4907 ret = false;
4909 /* See if we have the same non-isa options. */
4910 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4911 ret = false;
4913 /* See if arch, tune, etc. are the same. */
4914 else if (caller_opts->arch != callee_opts->arch)
4915 ret = false;
4917 else if (caller_opts->tune != callee_opts->tune)
4918 ret = false;
4920 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4921 ret = false;
4923 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4924 ret = false;
4926 else
4927 ret = true;
4930 return ret;
4934 /* Remember the last target of ix86_set_current_function. */
4935 static GTY(()) tree ix86_previous_fndecl;
4937 /* Invalidate ix86_previous_fndecl cache. */
4938 void
4939 ix86_reset_previous_fndecl (void)
4941 ix86_previous_fndecl = NULL_TREE;
4944 /* Establish appropriate back-end context for processing the function
4945 FNDECL. The argument might be NULL to indicate processing at top
4946 level, outside of any function scope. */
4947 static void
4948 ix86_set_current_function (tree fndecl)
4950 /* Only change the context if the function changes. This hook is called
4951 several times in the course of compiling a function, and we don't want to
4952 slow things down too much or call target_reinit when it isn't safe. */
4953 if (fndecl && fndecl != ix86_previous_fndecl)
4955 tree old_tree = (ix86_previous_fndecl
4956 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4957 : NULL_TREE);
4959 tree new_tree = (fndecl
4960 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4961 : NULL_TREE);
4963 ix86_previous_fndecl = fndecl;
4964 if (old_tree == new_tree)
4967 else if (new_tree)
4969 cl_target_option_restore (&global_options,
4970 TREE_TARGET_OPTION (new_tree));
4971 if (TREE_TARGET_GLOBALS (new_tree))
4972 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4973 else
4974 TREE_TARGET_GLOBALS (new_tree)
4975 = save_target_globals_default_opts ();
4978 else if (old_tree)
4980 new_tree = target_option_current_node;
4981 cl_target_option_restore (&global_options,
4982 TREE_TARGET_OPTION (new_tree));
4983 if (TREE_TARGET_GLOBALS (new_tree))
4984 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4985 else if (new_tree == target_option_default_node)
4986 restore_target_globals (&default_target_globals);
4987 else
4988 TREE_TARGET_GLOBALS (new_tree)
4989 = save_target_globals_default_opts ();
4995 /* Return true if this goes in large data/bss. */
4997 static bool
4998 ix86_in_large_data_p (tree exp)
5000 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5001 return false;
5003 /* Functions are never large data. */
5004 if (TREE_CODE (exp) == FUNCTION_DECL)
5005 return false;
5007 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5009 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5010 if (strcmp (section, ".ldata") == 0
5011 || strcmp (section, ".lbss") == 0)
5012 return true;
5013 return false;
5015 else
5017 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5019 /* If this is an incomplete type with size 0, then we can't put it
5020 in data because it might be too big when completed. */
5021 if (!size || size > ix86_section_threshold)
5022 return true;
5025 return false;
5028 /* Switch to the appropriate section for output of DECL.
5029 DECL is either a `VAR_DECL' node or a constant of some sort.
5030 RELOC indicates whether forming the initial value of DECL requires
5031 link-time relocations. */
5033 ATTRIBUTE_UNUSED static section *
5034 x86_64_elf_select_section (tree decl, int reloc,
5035 unsigned HOST_WIDE_INT align)
5037 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5038 && ix86_in_large_data_p (decl))
5040 const char *sname = NULL;
5041 unsigned int flags = SECTION_WRITE;
5042 switch (categorize_decl_for_section (decl, reloc))
5044 case SECCAT_DATA:
5045 sname = ".ldata";
5046 break;
5047 case SECCAT_DATA_REL:
5048 sname = ".ldata.rel";
5049 break;
5050 case SECCAT_DATA_REL_LOCAL:
5051 sname = ".ldata.rel.local";
5052 break;
5053 case SECCAT_DATA_REL_RO:
5054 sname = ".ldata.rel.ro";
5055 break;
5056 case SECCAT_DATA_REL_RO_LOCAL:
5057 sname = ".ldata.rel.ro.local";
5058 break;
5059 case SECCAT_BSS:
5060 sname = ".lbss";
5061 flags |= SECTION_BSS;
5062 break;
5063 case SECCAT_RODATA:
5064 case SECCAT_RODATA_MERGE_STR:
5065 case SECCAT_RODATA_MERGE_STR_INIT:
5066 case SECCAT_RODATA_MERGE_CONST:
5067 sname = ".lrodata";
5068 flags = 0;
5069 break;
5070 case SECCAT_SRODATA:
5071 case SECCAT_SDATA:
5072 case SECCAT_SBSS:
5073 gcc_unreachable ();
5074 case SECCAT_TEXT:
5075 case SECCAT_TDATA:
5076 case SECCAT_TBSS:
5077 /* We don't split these for medium model. Place them into
5078 default sections and hope for best. */
5079 break;
5081 if (sname)
5083 /* We might get called with string constants, but get_named_section
5084 doesn't like them as they are not DECLs. Also, we need to set
5085 flags in that case. */
5086 if (!DECL_P (decl))
5087 return get_section (sname, flags, NULL);
5088 return get_named_section (decl, sname, reloc);
5091 return default_elf_select_section (decl, reloc, align);
5094 /* Select a set of attributes for section NAME based on the properties
5095 of DECL and whether or not RELOC indicates that DECL's initializer
5096 might contain runtime relocations. */
5098 static unsigned int ATTRIBUTE_UNUSED
5099 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5101 unsigned int flags = default_section_type_flags (decl, name, reloc);
5103 if (decl == NULL_TREE
5104 && (strcmp (name, ".ldata.rel.ro") == 0
5105 || strcmp (name, ".ldata.rel.ro.local") == 0))
5106 flags |= SECTION_RELRO;
5108 if (strcmp (name, ".lbss") == 0
5109 || strncmp (name, ".lbss.", 5) == 0
5110 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5111 flags |= SECTION_BSS;
5113 return flags;
5116 /* Build up a unique section name, expressed as a
5117 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5118 RELOC indicates whether the initial value of EXP requires
5119 link-time relocations. */
5121 static void ATTRIBUTE_UNUSED
5122 x86_64_elf_unique_section (tree decl, int reloc)
5124 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5125 && ix86_in_large_data_p (decl))
5127 const char *prefix = NULL;
5128 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5129 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5131 switch (categorize_decl_for_section (decl, reloc))
5133 case SECCAT_DATA:
5134 case SECCAT_DATA_REL:
5135 case SECCAT_DATA_REL_LOCAL:
5136 case SECCAT_DATA_REL_RO:
5137 case SECCAT_DATA_REL_RO_LOCAL:
5138 prefix = one_only ? ".ld" : ".ldata";
5139 break;
5140 case SECCAT_BSS:
5141 prefix = one_only ? ".lb" : ".lbss";
5142 break;
5143 case SECCAT_RODATA:
5144 case SECCAT_RODATA_MERGE_STR:
5145 case SECCAT_RODATA_MERGE_STR_INIT:
5146 case SECCAT_RODATA_MERGE_CONST:
5147 prefix = one_only ? ".lr" : ".lrodata";
5148 break;
5149 case SECCAT_SRODATA:
5150 case SECCAT_SDATA:
5151 case SECCAT_SBSS:
5152 gcc_unreachable ();
5153 case SECCAT_TEXT:
5154 case SECCAT_TDATA:
5155 case SECCAT_TBSS:
5156 /* We don't split these for medium model. Place them into
5157 default sections and hope for best. */
5158 break;
5160 if (prefix)
5162 const char *name, *linkonce;
5163 char *string;
5165 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5166 name = targetm.strip_name_encoding (name);
5168 /* If we're using one_only, then there needs to be a .gnu.linkonce
5169 prefix to the section name. */
5170 linkonce = one_only ? ".gnu.linkonce" : "";
5172 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5174 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5175 return;
5178 default_unique_section (decl, reloc);
5181 #ifdef COMMON_ASM_OP
5182 /* This says how to output assembler code to declare an
5183 uninitialized external linkage data object.
5185 For medium model x86-64 we need to use .largecomm opcode for
5186 large objects. */
5187 void
5188 x86_elf_aligned_common (FILE *file,
5189 const char *name, unsigned HOST_WIDE_INT size,
5190 int align)
5192 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5193 && size > (unsigned int)ix86_section_threshold)
5194 fputs (".largecomm\t", file);
5195 else
5196 fputs (COMMON_ASM_OP, file);
5197 assemble_name (file, name);
5198 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5199 size, align / BITS_PER_UNIT);
5201 #endif
5203 /* Utility function for targets to use in implementing
5204 ASM_OUTPUT_ALIGNED_BSS. */
5206 void
5207 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5208 const char *name, unsigned HOST_WIDE_INT size,
5209 int align)
5211 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5212 && size > (unsigned int)ix86_section_threshold)
5213 switch_to_section (get_named_section (decl, ".lbss", 0));
5214 else
5215 switch_to_section (bss_section);
5216 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5217 #ifdef ASM_DECLARE_OBJECT_NAME
5218 last_assemble_variable_decl = decl;
5219 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5220 #else
5221 /* Standard thing is just output label for the object. */
5222 ASM_OUTPUT_LABEL (file, name);
5223 #endif /* ASM_DECLARE_OBJECT_NAME */
5224 ASM_OUTPUT_SKIP (file, size ? size : 1);
5227 /* Decide whether we must probe the stack before any space allocation
5228 on this target. It's essentially TARGET_STACK_PROBE except when
5229 -fstack-check causes the stack to be already probed differently. */
5231 bool
5232 ix86_target_stack_probe (void)
5234 /* Do not probe the stack twice if static stack checking is enabled. */
5235 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5236 return false;
5238 return TARGET_STACK_PROBE;
5241 /* Decide whether we can make a sibling call to a function. DECL is the
5242 declaration of the function being targeted by the call and EXP is the
5243 CALL_EXPR representing the call. */
5245 static bool
5246 ix86_function_ok_for_sibcall (tree decl, tree exp)
5248 tree type, decl_or_type;
5249 rtx a, b;
5251 /* If we are generating position-independent code, we cannot sibcall
5252 optimize any indirect call, or a direct call to a global function,
5253 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5254 if (!TARGET_MACHO
5255 && !TARGET_64BIT
5256 && flag_pic
5257 && (!decl || !targetm.binds_local_p (decl)))
5258 return false;
5260 /* If we need to align the outgoing stack, then sibcalling would
5261 unalign the stack, which may break the called function. */
5262 if (ix86_minimum_incoming_stack_boundary (true)
5263 < PREFERRED_STACK_BOUNDARY)
5264 return false;
5266 if (decl)
5268 decl_or_type = decl;
5269 type = TREE_TYPE (decl);
5271 else
5273 /* We're looking at the CALL_EXPR, we need the type of the function. */
5274 type = CALL_EXPR_FN (exp); /* pointer expression */
5275 type = TREE_TYPE (type); /* pointer type */
5276 type = TREE_TYPE (type); /* function type */
5277 decl_or_type = type;
5280 /* Check that the return value locations are the same. Like
5281 if we are returning floats on the 80387 register stack, we cannot
5282 make a sibcall from a function that doesn't return a float to a
5283 function that does or, conversely, from a function that does return
5284 a float to a function that doesn't; the necessary stack adjustment
5285 would not be executed. This is also the place we notice
5286 differences in the return value ABI. Note that it is ok for one
5287 of the functions to have void return type as long as the return
5288 value of the other is passed in a register. */
5289 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5290 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5291 cfun->decl, false);
5292 if (STACK_REG_P (a) || STACK_REG_P (b))
5294 if (!rtx_equal_p (a, b))
5295 return false;
5297 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5299 else if (!rtx_equal_p (a, b))
5300 return false;
5302 if (TARGET_64BIT)
5304 /* The SYSV ABI has more call-clobbered registers;
5305 disallow sibcalls from MS to SYSV. */
5306 if (cfun->machine->call_abi == MS_ABI
5307 && ix86_function_type_abi (type) == SYSV_ABI)
5308 return false;
5310 else
5312 /* If this call is indirect, we'll need to be able to use a
5313 call-clobbered register for the address of the target function.
5314 Make sure that all such registers are not used for passing
5315 parameters. Note that DLLIMPORT functions are indirect. */
5316 if (!decl
5317 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5319 if (ix86_function_regparm (type, NULL) >= 3)
5321 /* ??? Need to count the actual number of registers to be used,
5322 not the possible number of registers. Fix later. */
5323 return false;
5328 /* Otherwise okay. That also includes certain types of indirect calls. */
5329 return true;
5332 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5333 and "sseregparm" calling convention attributes;
5334 arguments as in struct attribute_spec.handler. */
5336 static tree
5337 ix86_handle_cconv_attribute (tree *node, tree name,
5338 tree args,
5339 int flags ATTRIBUTE_UNUSED,
5340 bool *no_add_attrs)
5342 if (TREE_CODE (*node) != FUNCTION_TYPE
5343 && TREE_CODE (*node) != METHOD_TYPE
5344 && TREE_CODE (*node) != FIELD_DECL
5345 && TREE_CODE (*node) != TYPE_DECL)
5347 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5348 name);
5349 *no_add_attrs = true;
5350 return NULL_TREE;
5353 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5354 if (is_attribute_p ("regparm", name))
5356 tree cst;
5358 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5360 error ("fastcall and regparm attributes are not compatible");
5363 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5365 error ("regparam and thiscall attributes are not compatible");
5368 cst = TREE_VALUE (args);
5369 if (TREE_CODE (cst) != INTEGER_CST)
5371 warning (OPT_Wattributes,
5372 "%qE attribute requires an integer constant argument",
5373 name);
5374 *no_add_attrs = true;
5376 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5378 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5379 name, REGPARM_MAX);
5380 *no_add_attrs = true;
5383 return NULL_TREE;
5386 if (TARGET_64BIT)
5388 /* Do not warn when emulating the MS ABI. */
5389 if ((TREE_CODE (*node) != FUNCTION_TYPE
5390 && TREE_CODE (*node) != METHOD_TYPE)
5391 || ix86_function_type_abi (*node) != MS_ABI)
5392 warning (OPT_Wattributes, "%qE attribute ignored",
5393 name);
5394 *no_add_attrs = true;
5395 return NULL_TREE;
5398 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5399 if (is_attribute_p ("fastcall", name))
5401 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5403 error ("fastcall and cdecl attributes are not compatible");
5405 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5407 error ("fastcall and stdcall attributes are not compatible");
5409 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5411 error ("fastcall and regparm attributes are not compatible");
5413 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5415 error ("fastcall and thiscall attributes are not compatible");
5419 /* Can combine stdcall with fastcall (redundant), regparm and
5420 sseregparm. */
5421 else if (is_attribute_p ("stdcall", name))
5423 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5425 error ("stdcall and cdecl attributes are not compatible");
5427 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5429 error ("stdcall and fastcall attributes are not compatible");
5431 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5433 error ("stdcall and thiscall attributes are not compatible");
5437 /* Can combine cdecl with regparm and sseregparm. */
5438 else if (is_attribute_p ("cdecl", name))
5440 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5442 error ("stdcall and cdecl attributes are not compatible");
5444 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5446 error ("fastcall and cdecl attributes are not compatible");
5448 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5450 error ("cdecl and thiscall attributes are not compatible");
5453 else if (is_attribute_p ("thiscall", name))
5455 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5456 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5457 name);
5458 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5460 error ("stdcall and thiscall attributes are not compatible");
5462 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5464 error ("fastcall and thiscall attributes are not compatible");
5466 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5468 error ("cdecl and thiscall attributes are not compatible");
5472 /* Can combine sseregparm with all attributes. */
5474 return NULL_TREE;
5477 /* The transactional memory builtins are implicitly regparm or fastcall
5478 depending on the ABI. Override the generic do-nothing attribute that
5479 these builtins were declared with, and replace it with one of the two
5480 attributes that we expect elsewhere. */
5482 static tree
5483 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5484 tree args ATTRIBUTE_UNUSED,
5485 int flags, bool *no_add_attrs)
5487 tree alt;
5489 /* In no case do we want to add the placeholder attribute. */
5490 *no_add_attrs = true;
5492 /* The 64-bit ABI is unchanged for transactional memory. */
5493 if (TARGET_64BIT)
5494 return NULL_TREE;
5496 /* ??? Is there a better way to validate 32-bit windows? We have
5497 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5498 if (CHECK_STACK_LIMIT > 0)
5499 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5500 else
5502 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5503 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5505 decl_attributes (node, alt, flags);
5507 return NULL_TREE;
5510 /* This function determines from TYPE the calling-convention. */
5512 unsigned int
5513 ix86_get_callcvt (const_tree type)
5515 unsigned int ret = 0;
5516 bool is_stdarg;
5517 tree attrs;
5519 if (TARGET_64BIT)
5520 return IX86_CALLCVT_CDECL;
5522 attrs = TYPE_ATTRIBUTES (type);
5523 if (attrs != NULL_TREE)
5525 if (lookup_attribute ("cdecl", attrs))
5526 ret |= IX86_CALLCVT_CDECL;
5527 else if (lookup_attribute ("stdcall", attrs))
5528 ret |= IX86_CALLCVT_STDCALL;
5529 else if (lookup_attribute ("fastcall", attrs))
5530 ret |= IX86_CALLCVT_FASTCALL;
5531 else if (lookup_attribute ("thiscall", attrs))
5532 ret |= IX86_CALLCVT_THISCALL;
5534 /* Regparam isn't allowed for thiscall and fastcall. */
5535 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5537 if (lookup_attribute ("regparm", attrs))
5538 ret |= IX86_CALLCVT_REGPARM;
5539 if (lookup_attribute ("sseregparm", attrs))
5540 ret |= IX86_CALLCVT_SSEREGPARM;
5543 if (IX86_BASE_CALLCVT(ret) != 0)
5544 return ret;
5547 is_stdarg = stdarg_p (type);
5548 if (TARGET_RTD && !is_stdarg)
5549 return IX86_CALLCVT_STDCALL | ret;
5551 if (ret != 0
5552 || is_stdarg
5553 || TREE_CODE (type) != METHOD_TYPE
5554 || ix86_function_type_abi (type) != MS_ABI)
5555 return IX86_CALLCVT_CDECL | ret;
5557 return IX86_CALLCVT_THISCALL;
5560 /* Return 0 if the attributes for two types are incompatible, 1 if they
5561 are compatible, and 2 if they are nearly compatible (which causes a
5562 warning to be generated). */
5564 static int
5565 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5567 unsigned int ccvt1, ccvt2;
5569 if (TREE_CODE (type1) != FUNCTION_TYPE
5570 && TREE_CODE (type1) != METHOD_TYPE)
5571 return 1;
5573 ccvt1 = ix86_get_callcvt (type1);
5574 ccvt2 = ix86_get_callcvt (type2);
5575 if (ccvt1 != ccvt2)
5576 return 0;
5577 if (ix86_function_regparm (type1, NULL)
5578 != ix86_function_regparm (type2, NULL))
5579 return 0;
5581 return 1;
5584 /* Return the regparm value for a function with the indicated TYPE and DECL.
5585 DECL may be NULL when calling function indirectly
5586 or considering a libcall. */
5588 static int
5589 ix86_function_regparm (const_tree type, const_tree decl)
5591 tree attr;
5592 int regparm;
5593 unsigned int ccvt;
5595 if (TARGET_64BIT)
5596 return (ix86_function_type_abi (type) == SYSV_ABI
5597 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5598 ccvt = ix86_get_callcvt (type);
5599 regparm = ix86_regparm;
5601 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5603 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5604 if (attr)
5606 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5607 return regparm;
5610 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5611 return 2;
5612 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5613 return 1;
5615 /* Use register calling convention for local functions when possible. */
5616 if (decl
5617 && TREE_CODE (decl) == FUNCTION_DECL
5618 /* Caller and callee must agree on the calling convention, so
5619 checking here just optimize means that with
5620 __attribute__((optimize (...))) caller could use regparm convention
5621 and callee not, or vice versa. Instead look at whether the callee
5622 is optimized or not. */
5623 && opt_for_fn (decl, optimize)
5624 && !(profile_flag && !flag_fentry))
5626 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5627 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5628 if (i && i->local && i->can_change_signature)
5630 int local_regparm, globals = 0, regno;
5632 /* Make sure no regparm register is taken by a
5633 fixed register variable. */
5634 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5635 if (fixed_regs[local_regparm])
5636 break;
5638 /* We don't want to use regparm(3) for nested functions as
5639 these use a static chain pointer in the third argument. */
5640 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5641 local_regparm = 2;
5643 /* In 32-bit mode save a register for the split stack. */
5644 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5645 local_regparm = 2;
5647 /* Each fixed register usage increases register pressure,
5648 so less registers should be used for argument passing.
5649 This functionality can be overriden by an explicit
5650 regparm value. */
5651 for (regno = AX_REG; regno <= DI_REG; regno++)
5652 if (fixed_regs[regno])
5653 globals++;
5655 local_regparm
5656 = globals < local_regparm ? local_regparm - globals : 0;
5658 if (local_regparm > regparm)
5659 regparm = local_regparm;
5663 return regparm;
5666 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5667 DFmode (2) arguments in SSE registers for a function with the
5668 indicated TYPE and DECL. DECL may be NULL when calling function
5669 indirectly or considering a libcall. Otherwise return 0. */
5671 static int
5672 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5674 gcc_assert (!TARGET_64BIT);
5676 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5677 by the sseregparm attribute. */
5678 if (TARGET_SSEREGPARM
5679 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5681 if (!TARGET_SSE)
5683 if (warn)
5685 if (decl)
5686 error ("calling %qD with attribute sseregparm without "
5687 "SSE/SSE2 enabled", decl);
5688 else
5689 error ("calling %qT with attribute sseregparm without "
5690 "SSE/SSE2 enabled", type);
5692 return 0;
5695 return 2;
5698 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5699 (and DFmode for SSE2) arguments in SSE registers. */
5700 if (decl && TARGET_SSE_MATH && optimize
5701 && !(profile_flag && !flag_fentry))
5703 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5704 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5705 if (i && i->local && i->can_change_signature)
5706 return TARGET_SSE2 ? 2 : 1;
5709 return 0;
5712 /* Return true if EAX is live at the start of the function. Used by
5713 ix86_expand_prologue to determine if we need special help before
5714 calling allocate_stack_worker. */
5716 static bool
5717 ix86_eax_live_at_start_p (void)
5719 /* Cheat. Don't bother working forward from ix86_function_regparm
5720 to the function type to whether an actual argument is located in
5721 eax. Instead just look at cfg info, which is still close enough
5722 to correct at this point. This gives false positives for broken
5723 functions that might use uninitialized data that happens to be
5724 allocated in eax, but who cares? */
5725 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5728 static bool
5729 ix86_keep_aggregate_return_pointer (tree fntype)
5731 tree attr;
5733 if (!TARGET_64BIT)
5735 attr = lookup_attribute ("callee_pop_aggregate_return",
5736 TYPE_ATTRIBUTES (fntype));
5737 if (attr)
5738 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5740 /* For 32-bit MS-ABI the default is to keep aggregate
5741 return pointer. */
5742 if (ix86_function_type_abi (fntype) == MS_ABI)
5743 return true;
5745 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5748 /* Value is the number of bytes of arguments automatically
5749 popped when returning from a subroutine call.
5750 FUNDECL is the declaration node of the function (as a tree),
5751 FUNTYPE is the data type of the function (as a tree),
5752 or for a library call it is an identifier node for the subroutine name.
5753 SIZE is the number of bytes of arguments passed on the stack.
5755 On the 80386, the RTD insn may be used to pop them if the number
5756 of args is fixed, but if the number is variable then the caller
5757 must pop them all. RTD can't be used for library calls now
5758 because the library is compiled with the Unix compiler.
5759 Use of RTD is a selectable option, since it is incompatible with
5760 standard Unix calling sequences. If the option is not selected,
5761 the caller must always pop the args.
5763 The attribute stdcall is equivalent to RTD on a per module basis. */
5765 static int
5766 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5768 unsigned int ccvt;
5770 /* None of the 64-bit ABIs pop arguments. */
5771 if (TARGET_64BIT)
5772 return 0;
5774 ccvt = ix86_get_callcvt (funtype);
5776 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5777 | IX86_CALLCVT_THISCALL)) != 0
5778 && ! stdarg_p (funtype))
5779 return size;
5781 /* Lose any fake structure return argument if it is passed on the stack. */
5782 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5783 && !ix86_keep_aggregate_return_pointer (funtype))
5785 int nregs = ix86_function_regparm (funtype, fundecl);
5786 if (nregs == 0)
5787 return GET_MODE_SIZE (Pmode);
5790 return 0;
5793 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5795 static bool
5796 ix86_legitimate_combined_insn (rtx insn)
5798 /* Check operand constraints in case hard registers were propagated
5799 into insn pattern. This check prevents combine pass from
5800 generating insn patterns with invalid hard register operands.
5801 These invalid insns can eventually confuse reload to error out
5802 with a spill failure. See also PRs 46829 and 46843. */
5803 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5805 int i;
5807 extract_insn (insn);
5808 preprocess_constraints ();
5810 for (i = 0; i < recog_data.n_operands; i++)
5812 rtx op = recog_data.operand[i];
5813 enum machine_mode mode = GET_MODE (op);
5814 struct operand_alternative *op_alt;
5815 int offset = 0;
5816 bool win;
5817 int j;
5819 /* For pre-AVX disallow unaligned loads/stores where the
5820 instructions don't support it. */
5821 if (!TARGET_AVX
5822 && VECTOR_MODE_P (GET_MODE (op))
5823 && misaligned_operand (op, GET_MODE (op)))
5825 int min_align = get_attr_ssememalign (insn);
5826 if (min_align == 0)
5827 return false;
5830 /* A unary operator may be accepted by the predicate, but it
5831 is irrelevant for matching constraints. */
5832 if (UNARY_P (op))
5833 op = XEXP (op, 0);
5835 if (GET_CODE (op) == SUBREG)
5837 if (REG_P (SUBREG_REG (op))
5838 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5839 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5840 GET_MODE (SUBREG_REG (op)),
5841 SUBREG_BYTE (op),
5842 GET_MODE (op));
5843 op = SUBREG_REG (op);
5846 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5847 continue;
5849 op_alt = recog_op_alt[i];
5851 /* Operand has no constraints, anything is OK. */
5852 win = !recog_data.n_alternatives;
5854 for (j = 0; j < recog_data.n_alternatives; j++)
5856 if (op_alt[j].anything_ok
5857 || (op_alt[j].matches != -1
5858 && operands_match_p
5859 (recog_data.operand[i],
5860 recog_data.operand[op_alt[j].matches]))
5861 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5863 win = true;
5864 break;
5868 if (!win)
5869 return false;
5873 return true;
5876 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5878 static unsigned HOST_WIDE_INT
5879 ix86_asan_shadow_offset (void)
5881 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5882 : HOST_WIDE_INT_C (0x7fff8000))
5883 : (HOST_WIDE_INT_1 << 29);
5886 /* Argument support functions. */
5888 /* Return true when register may be used to pass function parameters. */
5889 bool
5890 ix86_function_arg_regno_p (int regno)
5892 int i;
5893 const int *parm_regs;
5895 if (!TARGET_64BIT)
5897 if (TARGET_MACHO)
5898 return (regno < REGPARM_MAX
5899 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5900 else
5901 return (regno < REGPARM_MAX
5902 || (TARGET_MMX && MMX_REGNO_P (regno)
5903 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5904 || (TARGET_SSE && SSE_REGNO_P (regno)
5905 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5908 if (TARGET_SSE && SSE_REGNO_P (regno)
5909 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5910 return true;
5912 /* TODO: The function should depend on current function ABI but
5913 builtins.c would need updating then. Therefore we use the
5914 default ABI. */
5916 /* RAX is used as hidden argument to va_arg functions. */
5917 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5918 return true;
5920 if (ix86_abi == MS_ABI)
5921 parm_regs = x86_64_ms_abi_int_parameter_registers;
5922 else
5923 parm_regs = x86_64_int_parameter_registers;
5924 for (i = 0; i < (ix86_abi == MS_ABI
5925 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5926 if (regno == parm_regs[i])
5927 return true;
5928 return false;
5931 /* Return if we do not know how to pass TYPE solely in registers. */
5933 static bool
5934 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5936 if (must_pass_in_stack_var_size_or_pad (mode, type))
5937 return true;
5939 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5940 The layout_type routine is crafty and tries to trick us into passing
5941 currently unsupported vector types on the stack by using TImode. */
5942 return (!TARGET_64BIT && mode == TImode
5943 && type && TREE_CODE (type) != VECTOR_TYPE);
5946 /* It returns the size, in bytes, of the area reserved for arguments passed
5947 in registers for the function represented by fndecl dependent to the used
5948 abi format. */
5950 ix86_reg_parm_stack_space (const_tree fndecl)
5952 enum calling_abi call_abi = SYSV_ABI;
5953 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5954 call_abi = ix86_function_abi (fndecl);
5955 else
5956 call_abi = ix86_function_type_abi (fndecl);
5957 if (TARGET_64BIT && call_abi == MS_ABI)
5958 return 32;
5959 return 0;
5962 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5963 call abi used. */
5964 enum calling_abi
5965 ix86_function_type_abi (const_tree fntype)
5967 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5969 enum calling_abi abi = ix86_abi;
5970 if (abi == SYSV_ABI)
5972 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5973 abi = MS_ABI;
5975 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5976 abi = SYSV_ABI;
5977 return abi;
5979 return ix86_abi;
5982 /* We add this as a workaround in order to use libc_has_function
5983 hook in i386.md. */
5984 bool
5985 ix86_libc_has_function (enum function_class fn_class)
5987 return targetm.libc_has_function (fn_class);
5990 static bool
5991 ix86_function_ms_hook_prologue (const_tree fn)
5993 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5995 if (decl_function_context (fn) != NULL_TREE)
5996 error_at (DECL_SOURCE_LOCATION (fn),
5997 "ms_hook_prologue is not compatible with nested function");
5998 else
5999 return true;
6001 return false;
6004 static enum calling_abi
6005 ix86_function_abi (const_tree fndecl)
6007 if (! fndecl)
6008 return ix86_abi;
6009 return ix86_function_type_abi (TREE_TYPE (fndecl));
6012 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6013 call abi used. */
6014 enum calling_abi
6015 ix86_cfun_abi (void)
6017 if (! cfun)
6018 return ix86_abi;
6019 return cfun->machine->call_abi;
6022 /* Write the extra assembler code needed to declare a function properly. */
6024 void
6025 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6026 tree decl)
6028 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6030 if (is_ms_hook)
6032 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6033 unsigned int filler_cc = 0xcccccccc;
6035 for (i = 0; i < filler_count; i += 4)
6036 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6039 #ifdef SUBTARGET_ASM_UNWIND_INIT
6040 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6041 #endif
6043 ASM_OUTPUT_LABEL (asm_out_file, fname);
6045 /* Output magic byte marker, if hot-patch attribute is set. */
6046 if (is_ms_hook)
6048 if (TARGET_64BIT)
6050 /* leaq [%rsp + 0], %rsp */
6051 asm_fprintf (asm_out_file, ASM_BYTE
6052 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6054 else
6056 /* movl.s %edi, %edi
6057 push %ebp
6058 movl.s %esp, %ebp */
6059 asm_fprintf (asm_out_file, ASM_BYTE
6060 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6065 /* regclass.c */
6066 extern void init_regs (void);
6068 /* Implementation of call abi switching target hook. Specific to FNDECL
6069 the specific call register sets are set. See also
6070 ix86_conditional_register_usage for more details. */
6071 void
6072 ix86_call_abi_override (const_tree fndecl)
6074 if (fndecl == NULL_TREE)
6075 cfun->machine->call_abi = ix86_abi;
6076 else
6077 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6080 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6081 expensive re-initialization of init_regs each time we switch function context
6082 since this is needed only during RTL expansion. */
6083 static void
6084 ix86_maybe_switch_abi (void)
6086 if (TARGET_64BIT &&
6087 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6088 reinit_regs ();
6091 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6092 for a call to a function whose data type is FNTYPE.
6093 For a library call, FNTYPE is 0. */
6095 void
6096 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6097 tree fntype, /* tree ptr for function decl */
6098 rtx libname, /* SYMBOL_REF of library name or 0 */
6099 tree fndecl,
6100 int caller)
6102 struct cgraph_local_info *i;
6104 memset (cum, 0, sizeof (*cum));
6106 if (fndecl)
6108 i = cgraph_local_info (fndecl);
6109 cum->call_abi = ix86_function_abi (fndecl);
6111 else
6113 i = NULL;
6114 cum->call_abi = ix86_function_type_abi (fntype);
6117 cum->caller = caller;
6119 /* Set up the number of registers to use for passing arguments. */
6120 cum->nregs = ix86_regparm;
6121 if (TARGET_64BIT)
6123 cum->nregs = (cum->call_abi == SYSV_ABI
6124 ? X86_64_REGPARM_MAX
6125 : X86_64_MS_REGPARM_MAX);
6127 if (TARGET_SSE)
6129 cum->sse_nregs = SSE_REGPARM_MAX;
6130 if (TARGET_64BIT)
6132 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6133 ? X86_64_SSE_REGPARM_MAX
6134 : X86_64_MS_SSE_REGPARM_MAX);
6137 if (TARGET_MMX)
6138 cum->mmx_nregs = MMX_REGPARM_MAX;
6139 cum->warn_avx512f = true;
6140 cum->warn_avx = true;
6141 cum->warn_sse = true;
6142 cum->warn_mmx = true;
6144 /* Because type might mismatch in between caller and callee, we need to
6145 use actual type of function for local calls.
6146 FIXME: cgraph_analyze can be told to actually record if function uses
6147 va_start so for local functions maybe_vaarg can be made aggressive
6148 helping K&R code.
6149 FIXME: once typesytem is fixed, we won't need this code anymore. */
6150 if (i && i->local && i->can_change_signature)
6151 fntype = TREE_TYPE (fndecl);
6152 cum->maybe_vaarg = (fntype
6153 ? (!prototype_p (fntype) || stdarg_p (fntype))
6154 : !libname);
6156 if (!TARGET_64BIT)
6158 /* If there are variable arguments, then we won't pass anything
6159 in registers in 32-bit mode. */
6160 if (stdarg_p (fntype))
6162 cum->nregs = 0;
6163 cum->sse_nregs = 0;
6164 cum->mmx_nregs = 0;
6165 cum->warn_avx512f = false;
6166 cum->warn_avx = false;
6167 cum->warn_sse = false;
6168 cum->warn_mmx = false;
6169 return;
6172 /* Use ecx and edx registers if function has fastcall attribute,
6173 else look for regparm information. */
6174 if (fntype)
6176 unsigned int ccvt = ix86_get_callcvt (fntype);
6177 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6179 cum->nregs = 1;
6180 cum->fastcall = 1; /* Same first register as in fastcall. */
6182 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6184 cum->nregs = 2;
6185 cum->fastcall = 1;
6187 else
6188 cum->nregs = ix86_function_regparm (fntype, fndecl);
6191 /* Set up the number of SSE registers used for passing SFmode
6192 and DFmode arguments. Warn for mismatching ABI. */
6193 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6197 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6198 But in the case of vector types, it is some vector mode.
6200 When we have only some of our vector isa extensions enabled, then there
6201 are some modes for which vector_mode_supported_p is false. For these
6202 modes, the generic vector support in gcc will choose some non-vector mode
6203 in order to implement the type. By computing the natural mode, we'll
6204 select the proper ABI location for the operand and not depend on whatever
6205 the middle-end decides to do with these vector types.
6207 The midde-end can't deal with the vector types > 16 bytes. In this
6208 case, we return the original mode and warn ABI change if CUM isn't
6209 NULL.
6211 If INT_RETURN is true, warn ABI change if the vector mode isn't
6212 available for function return value. */
6214 static enum machine_mode
6215 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6216 bool in_return)
6218 enum machine_mode mode = TYPE_MODE (type);
6220 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6222 HOST_WIDE_INT size = int_size_in_bytes (type);
6223 if ((size == 8 || size == 16 || size == 32 || size == 64)
6224 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6225 && TYPE_VECTOR_SUBPARTS (type) > 1)
6227 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6229 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6230 mode = MIN_MODE_VECTOR_FLOAT;
6231 else
6232 mode = MIN_MODE_VECTOR_INT;
6234 /* Get the mode which has this inner mode and number of units. */
6235 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6236 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6237 && GET_MODE_INNER (mode) == innermode)
6239 if (size == 64 && !TARGET_AVX512F)
6241 static bool warnedavx512f;
6242 static bool warnedavx512f_ret;
6244 if (cum && cum->warn_avx512f && !warnedavx512f)
6246 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6247 "without AVX512F enabled changes the ABI"))
6248 warnedavx512f = true;
6250 else if (in_return && !warnedavx512f_ret)
6252 if (warning (OPT_Wpsabi, "AVX512F vector return "
6253 "without AVX512F enabled changes the ABI"))
6254 warnedavx512f_ret = true;
6257 return TYPE_MODE (type);
6259 else if (size == 32 && !TARGET_AVX)
6261 static bool warnedavx;
6262 static bool warnedavx_ret;
6264 if (cum && cum->warn_avx && !warnedavx)
6266 if (warning (OPT_Wpsabi, "AVX vector argument "
6267 "without AVX enabled changes the ABI"))
6268 warnedavx = true;
6270 else if (in_return && !warnedavx_ret)
6272 if (warning (OPT_Wpsabi, "AVX vector return "
6273 "without AVX enabled changes the ABI"))
6274 warnedavx_ret = true;
6277 return TYPE_MODE (type);
6279 else if (((size == 8 && TARGET_64BIT) || size == 16)
6280 && !TARGET_SSE)
6282 static bool warnedsse;
6283 static bool warnedsse_ret;
6285 if (cum && cum->warn_sse && !warnedsse)
6287 if (warning (OPT_Wpsabi, "SSE vector argument "
6288 "without SSE enabled changes the ABI"))
6289 warnedsse = true;
6291 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6293 if (warning (OPT_Wpsabi, "SSE vector return "
6294 "without SSE enabled changes the ABI"))
6295 warnedsse_ret = true;
6298 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6300 static bool warnedmmx;
6301 static bool warnedmmx_ret;
6303 if (cum && cum->warn_mmx && !warnedmmx)
6305 if (warning (OPT_Wpsabi, "MMX vector argument "
6306 "without MMX enabled changes the ABI"))
6307 warnedmmx = true;
6309 else if (in_return && !warnedmmx_ret)
6311 if (warning (OPT_Wpsabi, "MMX vector return "
6312 "without MMX enabled changes the ABI"))
6313 warnedmmx_ret = true;
6316 return mode;
6319 gcc_unreachable ();
6323 return mode;
6326 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6327 this may not agree with the mode that the type system has chosen for the
6328 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6329 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6331 static rtx
6332 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6333 unsigned int regno)
6335 rtx tmp;
6337 if (orig_mode != BLKmode)
6338 tmp = gen_rtx_REG (orig_mode, regno);
6339 else
6341 tmp = gen_rtx_REG (mode, regno);
6342 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6343 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6346 return tmp;
6349 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6350 of this code is to classify each 8bytes of incoming argument by the register
6351 class and assign registers accordingly. */
6353 /* Return the union class of CLASS1 and CLASS2.
6354 See the x86-64 PS ABI for details. */
6356 static enum x86_64_reg_class
6357 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6359 /* Rule #1: If both classes are equal, this is the resulting class. */
6360 if (class1 == class2)
6361 return class1;
6363 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6364 the other class. */
6365 if (class1 == X86_64_NO_CLASS)
6366 return class2;
6367 if (class2 == X86_64_NO_CLASS)
6368 return class1;
6370 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6371 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6372 return X86_64_MEMORY_CLASS;
6374 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6375 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6376 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6377 return X86_64_INTEGERSI_CLASS;
6378 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6379 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6380 return X86_64_INTEGER_CLASS;
6382 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6383 MEMORY is used. */
6384 if (class1 == X86_64_X87_CLASS
6385 || class1 == X86_64_X87UP_CLASS
6386 || class1 == X86_64_COMPLEX_X87_CLASS
6387 || class2 == X86_64_X87_CLASS
6388 || class2 == X86_64_X87UP_CLASS
6389 || class2 == X86_64_COMPLEX_X87_CLASS)
6390 return X86_64_MEMORY_CLASS;
6392 /* Rule #6: Otherwise class SSE is used. */
6393 return X86_64_SSE_CLASS;
6396 /* Classify the argument of type TYPE and mode MODE.
6397 CLASSES will be filled by the register class used to pass each word
6398 of the operand. The number of words is returned. In case the parameter
6399 should be passed in memory, 0 is returned. As a special case for zero
6400 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6402 BIT_OFFSET is used internally for handling records and specifies offset
6403 of the offset in bits modulo 512 to avoid overflow cases.
6405 See the x86-64 PS ABI for details.
6408 static int
6409 classify_argument (enum machine_mode mode, const_tree type,
6410 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6412 HOST_WIDE_INT bytes =
6413 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6414 int words
6415 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6417 /* Variable sized entities are always passed/returned in memory. */
6418 if (bytes < 0)
6419 return 0;
6421 if (mode != VOIDmode
6422 && targetm.calls.must_pass_in_stack (mode, type))
6423 return 0;
6425 /* Special case check for pointer to shared, on 64-bit target. */
6426 if (TARGET_64BIT && mode == TImode
6427 && type && TREE_CODE (type) == POINTER_TYPE
6428 && upc_shared_type_p (TREE_TYPE (type)))
6430 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6431 return 2;
6434 if (type && AGGREGATE_TYPE_P (type))
6436 int i;
6437 tree field;
6438 enum x86_64_reg_class subclasses[MAX_CLASSES];
6440 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6441 if (bytes > 64)
6442 return 0;
6444 for (i = 0; i < words; i++)
6445 classes[i] = X86_64_NO_CLASS;
6447 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6448 signalize memory class, so handle it as special case. */
6449 if (!words)
6451 classes[0] = X86_64_NO_CLASS;
6452 return 1;
6455 /* Classify each field of record and merge classes. */
6456 switch (TREE_CODE (type))
6458 case RECORD_TYPE:
6459 /* And now merge the fields of structure. */
6460 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6462 if (TREE_CODE (field) == FIELD_DECL)
6464 int num;
6466 if (TREE_TYPE (field) == error_mark_node)
6467 continue;
6469 /* Bitfields are always classified as integer. Handle them
6470 early, since later code would consider them to be
6471 misaligned integers. */
6472 if (DECL_BIT_FIELD (field))
6474 for (i = (int_bit_position (field)
6475 + (bit_offset % 64)) / 8 / 8;
6476 i < ((int_bit_position (field) + (bit_offset % 64))
6477 + tree_to_shwi (DECL_SIZE (field))
6478 + 63) / 8 / 8; i++)
6479 classes[i] =
6480 merge_classes (X86_64_INTEGER_CLASS,
6481 classes[i]);
6483 else
6485 int pos;
6487 type = TREE_TYPE (field);
6489 /* Flexible array member is ignored. */
6490 if (TYPE_MODE (type) == BLKmode
6491 && TREE_CODE (type) == ARRAY_TYPE
6492 && TYPE_SIZE (type) == NULL_TREE
6493 && TYPE_DOMAIN (type) != NULL_TREE
6494 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6495 == NULL_TREE))
6497 static bool warned;
6499 if (!warned && warn_psabi)
6501 warned = true;
6502 inform (input_location,
6503 "the ABI of passing struct with"
6504 " a flexible array member has"
6505 " changed in GCC 4.4");
6507 continue;
6509 num = classify_argument (TYPE_MODE (type), type,
6510 subclasses,
6511 (int_bit_position (field)
6512 + bit_offset) % 512);
6513 if (!num)
6514 return 0;
6515 pos = (int_bit_position (field)
6516 + (bit_offset % 64)) / 8 / 8;
6517 for (i = 0; i < num && (i + pos) < words; i++)
6518 classes[i + pos] =
6519 merge_classes (subclasses[i], classes[i + pos]);
6523 break;
6525 case ARRAY_TYPE:
6526 /* Arrays are handled as small records. */
6528 int num;
6529 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6530 TREE_TYPE (type), subclasses, bit_offset);
6531 if (!num)
6532 return 0;
6534 /* The partial classes are now full classes. */
6535 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6536 subclasses[0] = X86_64_SSE_CLASS;
6537 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6538 && !((bit_offset % 64) == 0 && bytes == 4))
6539 subclasses[0] = X86_64_INTEGER_CLASS;
6541 for (i = 0; i < words; i++)
6542 classes[i] = subclasses[i % num];
6544 break;
6546 case UNION_TYPE:
6547 case QUAL_UNION_TYPE:
6548 /* Unions are similar to RECORD_TYPE but offset is always 0.
6550 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6552 if (TREE_CODE (field) == FIELD_DECL)
6554 int num;
6556 if (TREE_TYPE (field) == error_mark_node)
6557 continue;
6559 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6560 TREE_TYPE (field), subclasses,
6561 bit_offset);
6562 if (!num)
6563 return 0;
6564 for (i = 0; i < num; i++)
6565 classes[i] = merge_classes (subclasses[i], classes[i]);
6568 break;
6570 default:
6571 gcc_unreachable ();
6574 if (words > 2)
6576 /* When size > 16 bytes, if the first one isn't
6577 X86_64_SSE_CLASS or any other ones aren't
6578 X86_64_SSEUP_CLASS, everything should be passed in
6579 memory. */
6580 if (classes[0] != X86_64_SSE_CLASS)
6581 return 0;
6583 for (i = 1; i < words; i++)
6584 if (classes[i] != X86_64_SSEUP_CLASS)
6585 return 0;
6588 /* Final merger cleanup. */
6589 for (i = 0; i < words; i++)
6591 /* If one class is MEMORY, everything should be passed in
6592 memory. */
6593 if (classes[i] == X86_64_MEMORY_CLASS)
6594 return 0;
6596 /* The X86_64_SSEUP_CLASS should be always preceded by
6597 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6598 if (classes[i] == X86_64_SSEUP_CLASS
6599 && classes[i - 1] != X86_64_SSE_CLASS
6600 && classes[i - 1] != X86_64_SSEUP_CLASS)
6602 /* The first one should never be X86_64_SSEUP_CLASS. */
6603 gcc_assert (i != 0);
6604 classes[i] = X86_64_SSE_CLASS;
6607 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6608 everything should be passed in memory. */
6609 if (classes[i] == X86_64_X87UP_CLASS
6610 && (classes[i - 1] != X86_64_X87_CLASS))
6612 static bool warned;
6614 /* The first one should never be X86_64_X87UP_CLASS. */
6615 gcc_assert (i != 0);
6616 if (!warned && warn_psabi)
6618 warned = true;
6619 inform (input_location,
6620 "the ABI of passing union with long double"
6621 " has changed in GCC 4.4");
6623 return 0;
6626 return words;
6629 /* Compute alignment needed. We align all types to natural boundaries with
6630 exception of XFmode that is aligned to 64bits. */
6631 if (mode != VOIDmode && mode != BLKmode)
6633 int mode_alignment = GET_MODE_BITSIZE (mode);
6635 if (mode == XFmode)
6636 mode_alignment = 128;
6637 else if (mode == XCmode)
6638 mode_alignment = 256;
6639 if (COMPLEX_MODE_P (mode))
6640 mode_alignment /= 2;
6641 /* Misaligned fields are always returned in memory. */
6642 if (bit_offset % mode_alignment)
6643 return 0;
6646 /* for V1xx modes, just use the base mode */
6647 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6648 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6649 mode = GET_MODE_INNER (mode);
6651 /* Classification of atomic types. */
6652 switch (mode)
6654 case SDmode:
6655 case DDmode:
6656 classes[0] = X86_64_SSE_CLASS;
6657 return 1;
6658 case TDmode:
6659 classes[0] = X86_64_SSE_CLASS;
6660 classes[1] = X86_64_SSEUP_CLASS;
6661 return 2;
6662 case DImode:
6663 case SImode:
6664 case HImode:
6665 case QImode:
6666 case CSImode:
6667 case CHImode:
6668 case CQImode:
6670 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6672 /* Analyze last 128 bits only. */
6673 size = (size - 1) & 0x7f;
6675 if (size < 32)
6677 classes[0] = X86_64_INTEGERSI_CLASS;
6678 return 1;
6680 else if (size < 64)
6682 classes[0] = X86_64_INTEGER_CLASS;
6683 return 1;
6685 else if (size < 64+32)
6687 classes[0] = X86_64_INTEGER_CLASS;
6688 classes[1] = X86_64_INTEGERSI_CLASS;
6689 return 2;
6691 else if (size < 64+64)
6693 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6694 return 2;
6696 else
6697 gcc_unreachable ();
6699 case CDImode:
6700 case TImode:
6701 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6702 return 2;
6703 case COImode:
6704 case OImode:
6705 /* OImode shouldn't be used directly. */
6706 gcc_unreachable ();
6707 case CTImode:
6708 return 0;
6709 case SFmode:
6710 if (!(bit_offset % 64))
6711 classes[0] = X86_64_SSESF_CLASS;
6712 else
6713 classes[0] = X86_64_SSE_CLASS;
6714 return 1;
6715 case DFmode:
6716 classes[0] = X86_64_SSEDF_CLASS;
6717 return 1;
6718 case XFmode:
6719 classes[0] = X86_64_X87_CLASS;
6720 classes[1] = X86_64_X87UP_CLASS;
6721 return 2;
6722 case TFmode:
6723 classes[0] = X86_64_SSE_CLASS;
6724 classes[1] = X86_64_SSEUP_CLASS;
6725 return 2;
6726 case SCmode:
6727 classes[0] = X86_64_SSE_CLASS;
6728 if (!(bit_offset % 64))
6729 return 1;
6730 else
6732 static bool warned;
6734 if (!warned && warn_psabi)
6736 warned = true;
6737 inform (input_location,
6738 "the ABI of passing structure with complex float"
6739 " member has changed in GCC 4.4");
6741 classes[1] = X86_64_SSESF_CLASS;
6742 return 2;
6744 case DCmode:
6745 classes[0] = X86_64_SSEDF_CLASS;
6746 classes[1] = X86_64_SSEDF_CLASS;
6747 return 2;
6748 case XCmode:
6749 classes[0] = X86_64_COMPLEX_X87_CLASS;
6750 return 1;
6751 case TCmode:
6752 /* This modes is larger than 16 bytes. */
6753 return 0;
6754 case V8SFmode:
6755 case V8SImode:
6756 case V32QImode:
6757 case V16HImode:
6758 case V4DFmode:
6759 case V4DImode:
6760 classes[0] = X86_64_SSE_CLASS;
6761 classes[1] = X86_64_SSEUP_CLASS;
6762 classes[2] = X86_64_SSEUP_CLASS;
6763 classes[3] = X86_64_SSEUP_CLASS;
6764 return 4;
6765 case V8DFmode:
6766 case V16SFmode:
6767 case V8DImode:
6768 case V16SImode:
6769 case V32HImode:
6770 case V64QImode:
6771 classes[0] = X86_64_SSE_CLASS;
6772 classes[1] = X86_64_SSEUP_CLASS;
6773 classes[2] = X86_64_SSEUP_CLASS;
6774 classes[3] = X86_64_SSEUP_CLASS;
6775 classes[4] = X86_64_SSEUP_CLASS;
6776 classes[5] = X86_64_SSEUP_CLASS;
6777 classes[6] = X86_64_SSEUP_CLASS;
6778 classes[7] = X86_64_SSEUP_CLASS;
6779 return 8;
6780 case V4SFmode:
6781 case V4SImode:
6782 case V16QImode:
6783 case V8HImode:
6784 case V2DFmode:
6785 case V2DImode:
6786 classes[0] = X86_64_SSE_CLASS;
6787 classes[1] = X86_64_SSEUP_CLASS;
6788 return 2;
6789 case V1TImode:
6790 case V1DImode:
6791 case V2SFmode:
6792 case V2SImode:
6793 case V4HImode:
6794 case V8QImode:
6795 classes[0] = X86_64_SSE_CLASS;
6796 return 1;
6797 case BLKmode:
6798 case VOIDmode:
6799 return 0;
6800 default:
6801 gcc_assert (VECTOR_MODE_P (mode));
6803 if (bytes > 16)
6804 return 0;
6806 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6808 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6809 classes[0] = X86_64_INTEGERSI_CLASS;
6810 else
6811 classes[0] = X86_64_INTEGER_CLASS;
6812 classes[1] = X86_64_INTEGER_CLASS;
6813 return 1 + (bytes > 8);
6817 /* Examine the argument and return set number of register required in each
6818 class. Return 0 iff parameter should be passed in memory. */
6819 static int
6820 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6821 int *int_nregs, int *sse_nregs)
6823 enum x86_64_reg_class regclass[MAX_CLASSES];
6824 int n = classify_argument (mode, type, regclass, 0);
6826 *int_nregs = 0;
6827 *sse_nregs = 0;
6828 if (!n)
6829 return 0;
6830 for (n--; n >= 0; n--)
6831 switch (regclass[n])
6833 case X86_64_INTEGER_CLASS:
6834 case X86_64_INTEGERSI_CLASS:
6835 (*int_nregs)++;
6836 break;
6837 case X86_64_SSE_CLASS:
6838 case X86_64_SSESF_CLASS:
6839 case X86_64_SSEDF_CLASS:
6840 (*sse_nregs)++;
6841 break;
6842 case X86_64_NO_CLASS:
6843 case X86_64_SSEUP_CLASS:
6844 break;
6845 case X86_64_X87_CLASS:
6846 case X86_64_X87UP_CLASS:
6847 if (!in_return)
6848 return 0;
6849 break;
6850 case X86_64_COMPLEX_X87_CLASS:
6851 return in_return ? 2 : 0;
6852 case X86_64_MEMORY_CLASS:
6853 gcc_unreachable ();
6855 return 1;
6858 /* Construct container for the argument used by GCC interface. See
6859 FUNCTION_ARG for the detailed description. */
6861 static rtx
6862 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6863 const_tree type, int in_return, int nintregs, int nsseregs,
6864 const int *intreg, int sse_regno)
6866 /* The following variables hold the static issued_error state. */
6867 static bool issued_sse_arg_error;
6868 static bool issued_sse_ret_error;
6869 static bool issued_x87_ret_error;
6871 enum machine_mode tmpmode;
6872 int bytes =
6873 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6874 enum x86_64_reg_class regclass[MAX_CLASSES];
6875 int n;
6876 int i;
6877 int nexps = 0;
6878 int needed_sseregs, needed_intregs;
6879 rtx exp[MAX_CLASSES];
6880 rtx ret;
6882 n = classify_argument (mode, type, regclass, 0);
6883 if (!n)
6884 return NULL;
6885 if (!examine_argument (mode, type, in_return, &needed_intregs,
6886 &needed_sseregs))
6887 return NULL;
6888 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6889 return NULL;
6891 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6892 some less clueful developer tries to use floating-point anyway. */
6893 if (needed_sseregs && !TARGET_SSE)
6895 if (in_return)
6897 if (!issued_sse_ret_error)
6899 error ("SSE register return with SSE disabled");
6900 issued_sse_ret_error = true;
6903 else if (!issued_sse_arg_error)
6905 error ("SSE register argument with SSE disabled");
6906 issued_sse_arg_error = true;
6908 return NULL;
6911 /* Likewise, error if the ABI requires us to return values in the
6912 x87 registers and the user specified -mno-80387. */
6913 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6914 for (i = 0; i < n; i++)
6915 if (regclass[i] == X86_64_X87_CLASS
6916 || regclass[i] == X86_64_X87UP_CLASS
6917 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6919 if (!issued_x87_ret_error)
6921 error ("x87 register return with x87 disabled");
6922 issued_x87_ret_error = true;
6924 return NULL;
6927 /* First construct simple cases. Avoid SCmode, since we want to use
6928 single register to pass this type. */
6929 if (n == 1 && mode != SCmode)
6930 switch (regclass[0])
6932 case X86_64_INTEGER_CLASS:
6933 case X86_64_INTEGERSI_CLASS:
6934 return gen_rtx_REG (mode, intreg[0]);
6935 case X86_64_SSE_CLASS:
6936 case X86_64_SSESF_CLASS:
6937 case X86_64_SSEDF_CLASS:
6938 if (mode != BLKmode)
6939 return gen_reg_or_parallel (mode, orig_mode,
6940 SSE_REGNO (sse_regno));
6941 break;
6942 case X86_64_X87_CLASS:
6943 case X86_64_COMPLEX_X87_CLASS:
6944 return gen_rtx_REG (mode, FIRST_STACK_REG);
6945 case X86_64_NO_CLASS:
6946 /* Zero sized array, struct or class. */
6947 return NULL;
6948 default:
6949 gcc_unreachable ();
6951 if (n == 2
6952 && regclass[0] == X86_64_SSE_CLASS
6953 && regclass[1] == X86_64_SSEUP_CLASS
6954 && mode != BLKmode)
6955 return gen_reg_or_parallel (mode, orig_mode,
6956 SSE_REGNO (sse_regno));
6957 if (n == 4
6958 && regclass[0] == X86_64_SSE_CLASS
6959 && regclass[1] == X86_64_SSEUP_CLASS
6960 && regclass[2] == X86_64_SSEUP_CLASS
6961 && regclass[3] == X86_64_SSEUP_CLASS
6962 && mode != BLKmode)
6963 return gen_reg_or_parallel (mode, orig_mode,
6964 SSE_REGNO (sse_regno));
6965 if (n == 8
6966 && regclass[0] == X86_64_SSE_CLASS
6967 && regclass[1] == X86_64_SSEUP_CLASS
6968 && regclass[2] == X86_64_SSEUP_CLASS
6969 && regclass[3] == X86_64_SSEUP_CLASS
6970 && regclass[4] == X86_64_SSEUP_CLASS
6971 && regclass[5] == X86_64_SSEUP_CLASS
6972 && regclass[6] == X86_64_SSEUP_CLASS
6973 && regclass[7] == X86_64_SSEUP_CLASS
6974 && mode != BLKmode)
6975 return gen_reg_or_parallel (mode, orig_mode,
6976 SSE_REGNO (sse_regno));
6977 if (n == 2
6978 && regclass[0] == X86_64_X87_CLASS
6979 && regclass[1] == X86_64_X87UP_CLASS)
6980 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6982 if (n == 2
6983 && regclass[0] == X86_64_INTEGER_CLASS
6984 && regclass[1] == X86_64_INTEGER_CLASS
6985 && (mode == CDImode || mode == TImode)
6986 && intreg[0] + 1 == intreg[1])
6987 return gen_rtx_REG (mode, intreg[0]);
6989 /* Otherwise figure out the entries of the PARALLEL. */
6990 for (i = 0; i < n; i++)
6992 int pos;
6994 switch (regclass[i])
6996 case X86_64_NO_CLASS:
6997 break;
6998 case X86_64_INTEGER_CLASS:
6999 case X86_64_INTEGERSI_CLASS:
7000 /* Merge TImodes on aligned occasions here too. */
7001 if (i * 8 + 8 > bytes)
7002 tmpmode
7003 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7004 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7005 tmpmode = SImode;
7006 else
7007 tmpmode = DImode;
7008 /* We've requested 24 bytes we
7009 don't have mode for. Use DImode. */
7010 if (tmpmode == BLKmode)
7011 tmpmode = DImode;
7012 exp [nexps++]
7013 = gen_rtx_EXPR_LIST (VOIDmode,
7014 gen_rtx_REG (tmpmode, *intreg),
7015 GEN_INT (i*8));
7016 intreg++;
7017 break;
7018 case X86_64_SSESF_CLASS:
7019 exp [nexps++]
7020 = gen_rtx_EXPR_LIST (VOIDmode,
7021 gen_rtx_REG (SFmode,
7022 SSE_REGNO (sse_regno)),
7023 GEN_INT (i*8));
7024 sse_regno++;
7025 break;
7026 case X86_64_SSEDF_CLASS:
7027 exp [nexps++]
7028 = gen_rtx_EXPR_LIST (VOIDmode,
7029 gen_rtx_REG (DFmode,
7030 SSE_REGNO (sse_regno)),
7031 GEN_INT (i*8));
7032 sse_regno++;
7033 break;
7034 case X86_64_SSE_CLASS:
7035 pos = i;
7036 switch (n)
7038 case 1:
7039 tmpmode = DImode;
7040 break;
7041 case 2:
7042 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7044 tmpmode = TImode;
7045 i++;
7047 else
7048 tmpmode = DImode;
7049 break;
7050 case 4:
7051 gcc_assert (i == 0
7052 && regclass[1] == X86_64_SSEUP_CLASS
7053 && regclass[2] == X86_64_SSEUP_CLASS
7054 && regclass[3] == X86_64_SSEUP_CLASS);
7055 tmpmode = OImode;
7056 i += 3;
7057 break;
7058 case 8:
7059 gcc_assert (i == 0
7060 && regclass[1] == X86_64_SSEUP_CLASS
7061 && regclass[2] == X86_64_SSEUP_CLASS
7062 && regclass[3] == X86_64_SSEUP_CLASS
7063 && regclass[4] == X86_64_SSEUP_CLASS
7064 && regclass[5] == X86_64_SSEUP_CLASS
7065 && regclass[6] == X86_64_SSEUP_CLASS
7066 && regclass[7] == X86_64_SSEUP_CLASS);
7067 tmpmode = XImode;
7068 i += 7;
7069 break;
7070 default:
7071 gcc_unreachable ();
7073 exp [nexps++]
7074 = gen_rtx_EXPR_LIST (VOIDmode,
7075 gen_rtx_REG (tmpmode,
7076 SSE_REGNO (sse_regno)),
7077 GEN_INT (pos*8));
7078 sse_regno++;
7079 break;
7080 default:
7081 gcc_unreachable ();
7085 /* Empty aligned struct, union or class. */
7086 if (nexps == 0)
7087 return NULL;
7089 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7090 for (i = 0; i < nexps; i++)
7091 XVECEXP (ret, 0, i) = exp [i];
7092 return ret;
7095 /* Update the data in CUM to advance over an argument of mode MODE
7096 and data type TYPE. (TYPE is null for libcalls where that information
7097 may not be available.) */
7099 static void
7100 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7101 const_tree type, HOST_WIDE_INT bytes,
7102 HOST_WIDE_INT words)
7104 switch (mode)
7106 default:
7107 break;
7109 case BLKmode:
7110 if (bytes < 0)
7111 break;
7112 /* FALLTHRU */
7114 case DImode:
7115 case SImode:
7116 case HImode:
7117 case QImode:
7118 cum->words += words;
7119 cum->nregs -= words;
7120 cum->regno += words;
7122 if (cum->nregs <= 0)
7124 cum->nregs = 0;
7125 cum->regno = 0;
7127 break;
7129 case OImode:
7130 /* OImode shouldn't be used directly. */
7131 gcc_unreachable ();
7133 case DFmode:
7134 if (cum->float_in_sse < 2)
7135 break;
7136 case SFmode:
7137 if (cum->float_in_sse < 1)
7138 break;
7139 /* FALLTHRU */
7141 case V8SFmode:
7142 case V8SImode:
7143 case V64QImode:
7144 case V32HImode:
7145 case V16SImode:
7146 case V8DImode:
7147 case V16SFmode:
7148 case V8DFmode:
7149 case V32QImode:
7150 case V16HImode:
7151 case V4DFmode:
7152 case V4DImode:
7153 case TImode:
7154 case V16QImode:
7155 case V8HImode:
7156 case V4SImode:
7157 case V2DImode:
7158 case V4SFmode:
7159 case V2DFmode:
7160 if (!type || !AGGREGATE_TYPE_P (type))
7162 cum->sse_words += words;
7163 cum->sse_nregs -= 1;
7164 cum->sse_regno += 1;
7165 if (cum->sse_nregs <= 0)
7167 cum->sse_nregs = 0;
7168 cum->sse_regno = 0;
7171 break;
7173 case V8QImode:
7174 case V4HImode:
7175 case V2SImode:
7176 case V2SFmode:
7177 case V1TImode:
7178 case V1DImode:
7179 if (!type || !AGGREGATE_TYPE_P (type))
7181 cum->mmx_words += words;
7182 cum->mmx_nregs -= 1;
7183 cum->mmx_regno += 1;
7184 if (cum->mmx_nregs <= 0)
7186 cum->mmx_nregs = 0;
7187 cum->mmx_regno = 0;
7190 break;
7194 static void
7195 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7196 const_tree type, HOST_WIDE_INT words, bool named)
7198 int int_nregs, sse_nregs;
7200 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7201 if (!named && (VALID_AVX512F_REG_MODE (mode)
7202 || VALID_AVX256_REG_MODE (mode)))
7203 return;
7205 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7206 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7208 cum->nregs -= int_nregs;
7209 cum->sse_nregs -= sse_nregs;
7210 cum->regno += int_nregs;
7211 cum->sse_regno += sse_nregs;
7213 else
7215 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7216 cum->words = (cum->words + align - 1) & ~(align - 1);
7217 cum->words += words;
7221 static void
7222 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7223 HOST_WIDE_INT words)
7225 /* Otherwise, this should be passed indirect. */
7226 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7228 cum->words += words;
7229 if (cum->nregs > 0)
7231 cum->nregs -= 1;
7232 cum->regno += 1;
7236 /* Update the data in CUM to advance over an argument of mode MODE and
7237 data type TYPE. (TYPE is null for libcalls where that information
7238 may not be available.) */
7240 static void
7241 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7242 const_tree type, bool named)
7244 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7245 HOST_WIDE_INT bytes, words;
7247 if (mode == BLKmode)
7248 bytes = int_size_in_bytes (type);
7249 else
7250 bytes = GET_MODE_SIZE (mode);
7251 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7253 if (type)
7254 mode = type_natural_mode (type, NULL, false);
7256 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7257 function_arg_advance_ms_64 (cum, bytes, words);
7258 else if (TARGET_64BIT)
7259 function_arg_advance_64 (cum, mode, type, words, named);
7260 else
7261 function_arg_advance_32 (cum, mode, type, bytes, words);
7264 /* Define where to put the arguments to a function.
7265 Value is zero to push the argument on the stack,
7266 or a hard register in which to store the argument.
7268 MODE is the argument's machine mode.
7269 TYPE is the data type of the argument (as a tree).
7270 This is null for libcalls where that information may
7271 not be available.
7272 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7273 the preceding args and about the function being called.
7274 NAMED is nonzero if this argument is a named parameter
7275 (otherwise it is an extra parameter matching an ellipsis). */
7277 static rtx
7278 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7279 enum machine_mode orig_mode, const_tree type,
7280 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7282 /* Avoid the AL settings for the Unix64 ABI. */
7283 if (mode == VOIDmode)
7284 return constm1_rtx;
7286 switch (mode)
7288 default:
7289 break;
7291 case BLKmode:
7292 if (bytes < 0)
7293 break;
7294 /* FALLTHRU */
7295 case DImode:
7296 case SImode:
7297 case HImode:
7298 case QImode:
7299 if (words <= cum->nregs)
7301 int regno = cum->regno;
7303 /* Fastcall allocates the first two DWORD (SImode) or
7304 smaller arguments to ECX and EDX if it isn't an
7305 aggregate type . */
7306 if (cum->fastcall)
7308 if (mode == BLKmode
7309 || mode == DImode
7310 || (type && AGGREGATE_TYPE_P (type)))
7311 break;
7313 /* ECX not EAX is the first allocated register. */
7314 if (regno == AX_REG)
7315 regno = CX_REG;
7317 return gen_rtx_REG (mode, regno);
7319 break;
7321 case DFmode:
7322 if (cum->float_in_sse < 2)
7323 break;
7324 case SFmode:
7325 if (cum->float_in_sse < 1)
7326 break;
7327 /* FALLTHRU */
7328 case TImode:
7329 /* In 32bit, we pass TImode in xmm registers. */
7330 case V16QImode:
7331 case V8HImode:
7332 case V4SImode:
7333 case V2DImode:
7334 case V4SFmode:
7335 case V2DFmode:
7336 if (!type || !AGGREGATE_TYPE_P (type))
7338 if (cum->sse_nregs)
7339 return gen_reg_or_parallel (mode, orig_mode,
7340 cum->sse_regno + FIRST_SSE_REG);
7342 break;
7344 case OImode:
7345 case XImode:
7346 /* OImode and XImode shouldn't be used directly. */
7347 gcc_unreachable ();
7349 case V64QImode:
7350 case V32HImode:
7351 case V16SImode:
7352 case V8DImode:
7353 case V16SFmode:
7354 case V8DFmode:
7355 case V8SFmode:
7356 case V8SImode:
7357 case V32QImode:
7358 case V16HImode:
7359 case V4DFmode:
7360 case V4DImode:
7361 if (!type || !AGGREGATE_TYPE_P (type))
7363 if (cum->sse_nregs)
7364 return gen_reg_or_parallel (mode, orig_mode,
7365 cum->sse_regno + FIRST_SSE_REG);
7367 break;
7369 case V8QImode:
7370 case V4HImode:
7371 case V2SImode:
7372 case V2SFmode:
7373 case V1TImode:
7374 case V1DImode:
7375 if (!type || !AGGREGATE_TYPE_P (type))
7377 if (cum->mmx_nregs)
7378 return gen_reg_or_parallel (mode, orig_mode,
7379 cum->mmx_regno + FIRST_MMX_REG);
7381 break;
7384 return NULL_RTX;
7387 static rtx
7388 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7389 enum machine_mode orig_mode, const_tree type, bool named)
7391 /* Handle a hidden AL argument containing number of registers
7392 for varargs x86-64 functions. */
7393 if (mode == VOIDmode)
7394 return GEN_INT (cum->maybe_vaarg
7395 ? (cum->sse_nregs < 0
7396 ? X86_64_SSE_REGPARM_MAX
7397 : cum->sse_regno)
7398 : -1);
7400 switch (mode)
7402 default:
7403 break;
7405 case V8SFmode:
7406 case V8SImode:
7407 case V32QImode:
7408 case V16HImode:
7409 case V4DFmode:
7410 case V4DImode:
7411 case V16SFmode:
7412 case V16SImode:
7413 case V64QImode:
7414 case V32HImode:
7415 case V8DFmode:
7416 case V8DImode:
7417 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7418 if (!named)
7419 return NULL;
7420 break;
7423 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7424 cum->sse_nregs,
7425 &x86_64_int_parameter_registers [cum->regno],
7426 cum->sse_regno);
7429 static rtx
7430 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7431 enum machine_mode orig_mode, bool named,
7432 HOST_WIDE_INT bytes)
7434 unsigned int regno;
7436 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7437 We use value of -2 to specify that current function call is MSABI. */
7438 if (mode == VOIDmode)
7439 return GEN_INT (-2);
7441 /* If we've run out of registers, it goes on the stack. */
7442 if (cum->nregs == 0)
7443 return NULL_RTX;
7445 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7447 /* Only floating point modes are passed in anything but integer regs. */
7448 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7450 if (named)
7451 regno = cum->regno + FIRST_SSE_REG;
7452 else
7454 rtx t1, t2;
7456 /* Unnamed floating parameters are passed in both the
7457 SSE and integer registers. */
7458 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7459 t2 = gen_rtx_REG (mode, regno);
7460 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7461 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7462 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7465 /* Handle aggregated types passed in register. */
7466 if (orig_mode == BLKmode)
7468 if (bytes > 0 && bytes <= 8)
7469 mode = (bytes > 4 ? DImode : SImode);
7470 if (mode == BLKmode)
7471 mode = DImode;
7474 return gen_reg_or_parallel (mode, orig_mode, regno);
7477 /* Return where to put the arguments to a function.
7478 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7480 MODE is the argument's machine mode. TYPE is the data type of the
7481 argument. It is null for libcalls where that information may not be
7482 available. CUM gives information about the preceding args and about
7483 the function being called. NAMED is nonzero if this argument is a
7484 named parameter (otherwise it is an extra parameter matching an
7485 ellipsis). */
7487 static rtx
7488 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7489 const_tree type, bool named)
7491 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7492 enum machine_mode mode = omode;
7493 HOST_WIDE_INT bytes, words;
7494 rtx arg;
7496 if (mode == BLKmode)
7497 bytes = int_size_in_bytes (type);
7498 else
7499 bytes = GET_MODE_SIZE (mode);
7500 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7502 /* To simplify the code below, represent vector types with a vector mode
7503 even if MMX/SSE are not active. */
7504 if (type && TREE_CODE (type) == VECTOR_TYPE)
7505 mode = type_natural_mode (type, cum, false);
7507 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7508 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7509 else if (TARGET_64BIT)
7510 arg = function_arg_64 (cum, mode, omode, type, named);
7511 else
7512 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7514 return arg;
7517 /* A C expression that indicates when an argument must be passed by
7518 reference. If nonzero for an argument, a copy of that argument is
7519 made in memory and a pointer to the argument is passed instead of
7520 the argument itself. The pointer is passed in whatever way is
7521 appropriate for passing a pointer to that type. */
7523 static bool
7524 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7525 const_tree type, bool named ATTRIBUTE_UNUSED)
7527 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7529 /* See Windows x64 Software Convention. */
7530 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7532 int msize = (int) GET_MODE_SIZE (mode);
7533 if (type)
7535 /* Arrays are passed by reference. */
7536 if (TREE_CODE (type) == ARRAY_TYPE)
7537 return true;
7539 if (AGGREGATE_TYPE_P (type))
7541 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7542 are passed by reference. */
7543 msize = int_size_in_bytes (type);
7547 /* __m128 is passed by reference. */
7548 switch (msize) {
7549 case 1: case 2: case 4: case 8:
7550 break;
7551 default:
7552 return true;
7555 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7556 return 1;
7558 return 0;
7561 /* Return true when TYPE should be 128bit aligned for 32bit argument
7562 passing ABI. XXX: This function is obsolete and is only used for
7563 checking psABI compatibility with previous versions of GCC. */
7565 static bool
7566 ix86_compat_aligned_value_p (const_tree type)
7568 enum machine_mode mode = TYPE_MODE (type);
7569 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7570 || mode == TDmode
7571 || mode == TFmode
7572 || mode == TCmode)
7573 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7574 return true;
7575 if (TYPE_ALIGN (type) < 128)
7576 return false;
7578 if (AGGREGATE_TYPE_P (type))
7580 /* Walk the aggregates recursively. */
7581 switch (TREE_CODE (type))
7583 case RECORD_TYPE:
7584 case UNION_TYPE:
7585 case QUAL_UNION_TYPE:
7587 tree field;
7589 /* Walk all the structure fields. */
7590 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7592 if (TREE_CODE (field) == FIELD_DECL
7593 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7594 return true;
7596 break;
7599 case ARRAY_TYPE:
7600 /* Just for use if some languages passes arrays by value. */
7601 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7602 return true;
7603 break;
7605 default:
7606 gcc_unreachable ();
7609 return false;
7612 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7613 XXX: This function is obsolete and is only used for checking psABI
7614 compatibility with previous versions of GCC. */
7616 static unsigned int
7617 ix86_compat_function_arg_boundary (enum machine_mode mode,
7618 const_tree type, unsigned int align)
7620 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7621 natural boundaries. */
7622 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7624 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7625 make an exception for SSE modes since these require 128bit
7626 alignment.
7628 The handling here differs from field_alignment. ICC aligns MMX
7629 arguments to 4 byte boundaries, while structure fields are aligned
7630 to 8 byte boundaries. */
7631 if (!type)
7633 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7634 align = PARM_BOUNDARY;
7636 else
7638 if (!ix86_compat_aligned_value_p (type))
7639 align = PARM_BOUNDARY;
7642 if (align > BIGGEST_ALIGNMENT)
7643 align = BIGGEST_ALIGNMENT;
7644 return align;
7647 /* Return true when TYPE should be 128bit aligned for 32bit argument
7648 passing ABI. */
7650 static bool
7651 ix86_contains_aligned_value_p (const_tree type)
7653 enum machine_mode mode = TYPE_MODE (type);
7655 if (mode == XFmode || mode == XCmode)
7656 return false;
7658 if (TYPE_ALIGN (type) < 128)
7659 return false;
7661 if (AGGREGATE_TYPE_P (type))
7663 /* Walk the aggregates recursively. */
7664 switch (TREE_CODE (type))
7666 case RECORD_TYPE:
7667 case UNION_TYPE:
7668 case QUAL_UNION_TYPE:
7670 tree field;
7672 /* Walk all the structure fields. */
7673 for (field = TYPE_FIELDS (type);
7674 field;
7675 field = DECL_CHAIN (field))
7677 if (TREE_CODE (field) == FIELD_DECL
7678 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7679 return true;
7681 break;
7684 case ARRAY_TYPE:
7685 /* Just for use if some languages passes arrays by value. */
7686 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7687 return true;
7688 break;
7690 default:
7691 gcc_unreachable ();
7694 else
7695 return TYPE_ALIGN (type) >= 128;
7697 return false;
7700 /* Gives the alignment boundary, in bits, of an argument with the
7701 specified mode and type. */
7703 static unsigned int
7704 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7706 unsigned int align;
7707 if (type)
7709 /* Since the main variant type is used for call, we convert it to
7710 the main variant type. */
7711 type = TYPE_MAIN_VARIANT (type);
7712 align = TYPE_ALIGN (type);
7714 else
7715 align = GET_MODE_ALIGNMENT (mode);
7716 if (align < PARM_BOUNDARY)
7717 align = PARM_BOUNDARY;
7718 else
7720 static bool warned;
7721 unsigned int saved_align = align;
7723 if (!TARGET_64BIT)
7725 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7726 if (!type)
7728 if (mode == XFmode || mode == XCmode)
7729 align = PARM_BOUNDARY;
7731 else if (!ix86_contains_aligned_value_p (type))
7732 align = PARM_BOUNDARY;
7734 if (align < 128)
7735 align = PARM_BOUNDARY;
7738 if (warn_psabi
7739 && !warned
7740 && align != ix86_compat_function_arg_boundary (mode, type,
7741 saved_align))
7743 warned = true;
7744 inform (input_location,
7745 "The ABI for passing parameters with %d-byte"
7746 " alignment has changed in GCC 4.6",
7747 align / BITS_PER_UNIT);
7751 return align;
7754 /* Return true if N is a possible register number of function value. */
7756 static bool
7757 ix86_function_value_regno_p (const unsigned int regno)
7759 switch (regno)
7761 case AX_REG:
7762 case DX_REG:
7763 return true;
7764 case DI_REG:
7765 case SI_REG:
7766 return TARGET_64BIT && ix86_abi != MS_ABI;
7768 /* Complex values are returned in %st(0)/%st(1) pair. */
7769 case ST0_REG:
7770 case ST1_REG:
7771 /* TODO: The function should depend on current function ABI but
7772 builtins.c would need updating then. Therefore we use the
7773 default ABI. */
7774 if (TARGET_64BIT && ix86_abi == MS_ABI)
7775 return false;
7776 return TARGET_FLOAT_RETURNS_IN_80387;
7778 /* Complex values are returned in %xmm0/%xmm1 pair. */
7779 case XMM0_REG:
7780 case XMM1_REG:
7781 return TARGET_SSE;
7783 case MM0_REG:
7784 if (TARGET_MACHO || TARGET_64BIT)
7785 return false;
7786 return TARGET_MMX;
7789 return false;
7792 /* Define how to find the value returned by a function.
7793 VALTYPE is the data type of the value (as a tree).
7794 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7795 otherwise, FUNC is 0. */
7797 static rtx
7798 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7799 const_tree fntype, const_tree fn)
7801 unsigned int regno;
7803 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7804 we normally prevent this case when mmx is not available. However
7805 some ABIs may require the result to be returned like DImode. */
7806 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7807 regno = FIRST_MMX_REG;
7809 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7810 we prevent this case when sse is not available. However some ABIs
7811 may require the result to be returned like integer TImode. */
7812 else if (mode == TImode
7813 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7814 regno = FIRST_SSE_REG;
7816 /* 32-byte vector modes in %ymm0. */
7817 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7818 regno = FIRST_SSE_REG;
7820 /* 64-byte vector modes in %zmm0. */
7821 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7822 regno = FIRST_SSE_REG;
7824 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7825 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7826 regno = FIRST_FLOAT_REG;
7827 else
7828 /* Most things go in %eax. */
7829 regno = AX_REG;
7831 /* Override FP return register with %xmm0 for local functions when
7832 SSE math is enabled or for functions with sseregparm attribute. */
7833 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7835 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7836 if ((sse_level >= 1 && mode == SFmode)
7837 || (sse_level == 2 && mode == DFmode))
7838 regno = FIRST_SSE_REG;
7841 /* OImode shouldn't be used directly. */
7842 gcc_assert (mode != OImode);
7844 return gen_rtx_REG (orig_mode, regno);
7847 static rtx
7848 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7849 const_tree valtype)
7851 rtx ret;
7853 /* Handle libcalls, which don't provide a type node. */
7854 if (valtype == NULL)
7856 unsigned int regno;
7858 switch (mode)
7860 case SFmode:
7861 case SCmode:
7862 case DFmode:
7863 case DCmode:
7864 case TFmode:
7865 case SDmode:
7866 case DDmode:
7867 case TDmode:
7868 regno = FIRST_SSE_REG;
7869 break;
7870 case XFmode:
7871 case XCmode:
7872 regno = FIRST_FLOAT_REG;
7873 break;
7874 case TCmode:
7875 return NULL;
7876 default:
7877 regno = AX_REG;
7880 return gen_rtx_REG (mode, regno);
7882 else if (POINTER_TYPE_P (valtype)
7883 && !upc_shared_type_p (TREE_TYPE (valtype)))
7885 /* Pointers are always returned in word_mode. */
7886 mode = word_mode;
7889 ret = construct_container (mode, orig_mode, valtype, 1,
7890 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7891 x86_64_int_return_registers, 0);
7893 /* For zero sized structures, construct_container returns NULL, but we
7894 need to keep rest of compiler happy by returning meaningful value. */
7895 if (!ret)
7896 ret = gen_rtx_REG (orig_mode, AX_REG);
7898 return ret;
7901 static rtx
7902 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7903 const_tree valtype)
7905 unsigned int regno = AX_REG;
7907 if (TARGET_SSE)
7909 switch (GET_MODE_SIZE (mode))
7911 case 16:
7912 if (valtype != NULL_TREE
7913 && !VECTOR_INTEGER_TYPE_P (valtype)
7914 && !VECTOR_INTEGER_TYPE_P (valtype)
7915 && !INTEGRAL_TYPE_P (valtype)
7916 && !VECTOR_FLOAT_TYPE_P (valtype))
7917 break;
7918 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7919 && !COMPLEX_MODE_P (mode))
7920 regno = FIRST_SSE_REG;
7921 break;
7922 case 8:
7923 case 4:
7924 if (mode == SFmode || mode == DFmode)
7925 regno = FIRST_SSE_REG;
7926 break;
7927 default:
7928 break;
7931 return gen_rtx_REG (orig_mode, regno);
7934 static rtx
7935 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7936 enum machine_mode orig_mode, enum machine_mode mode)
7938 const_tree fn, fntype;
7940 fn = NULL_TREE;
7941 if (fntype_or_decl && DECL_P (fntype_or_decl))
7942 fn = fntype_or_decl;
7943 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7945 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7946 return function_value_ms_64 (orig_mode, mode, valtype);
7947 else if (TARGET_64BIT)
7948 return function_value_64 (orig_mode, mode, valtype);
7949 else
7950 return function_value_32 (orig_mode, mode, fntype, fn);
7953 static rtx
7954 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7955 bool outgoing ATTRIBUTE_UNUSED)
7957 enum machine_mode mode, orig_mode;
7959 orig_mode = TYPE_MODE (valtype);
7960 mode = type_natural_mode (valtype, NULL, true);
7961 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7964 /* Pointer function arguments and return values are promoted to
7965 word_mode. */
7967 static enum machine_mode
7968 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7969 int *punsignedp, const_tree fntype,
7970 int for_return)
7972 if (type != NULL_TREE && POINTER_TYPE_P (type))
7974 if (upc_shared_type_p (TREE_TYPE (type)))
7976 *punsignedp = 1;
7977 return TYPE_MODE (upc_pts_rep_type_node);
7979 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7980 return word_mode;
7982 return default_promote_function_mode (type, mode, punsignedp, fntype,
7983 for_return);
7986 /* Return true if a structure, union or array with MODE containing FIELD
7987 should be accessed using BLKmode. */
7989 static bool
7990 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7992 /* Union with XFmode must be in BLKmode. */
7993 return (mode == XFmode
7994 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7995 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7999 ix86_libcall_value (enum machine_mode mode)
8001 return ix86_function_value_1 (NULL, NULL, mode, mode);
8004 /* Return true iff type is returned in memory. */
8006 static bool ATTRIBUTE_UNUSED
8007 return_in_memory_32 (const_tree type, enum machine_mode mode)
8009 HOST_WIDE_INT size;
8011 if (mode == BLKmode)
8012 return true;
8014 size = int_size_in_bytes (type);
8016 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8017 return false;
8019 if (VECTOR_MODE_P (mode) || mode == TImode)
8021 /* User-created vectors small enough to fit in EAX. */
8022 if (size < 8)
8023 return false;
8025 /* MMX/3dNow values are returned in MM0,
8026 except when it doesn't exits or the ABI prescribes otherwise. */
8027 if (size == 8)
8028 return !TARGET_MMX || TARGET_VECT8_RETURNS;
8030 /* SSE values are returned in XMM0, except when it doesn't exist. */
8031 if (size == 16)
8032 return !TARGET_SSE;
8034 /* AVX values are returned in YMM0, except when it doesn't exist. */
8035 if (size == 32)
8036 return !TARGET_AVX;
8038 /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
8039 if (size == 64)
8040 return !TARGET_AVX512F;
8043 if (mode == XFmode)
8044 return false;
8046 if (size > 12)
8047 return true;
8049 /* OImode shouldn't be used directly. */
8050 gcc_assert (mode != OImode);
8052 return false;
8055 static bool ATTRIBUTE_UNUSED
8056 return_in_memory_64 (const_tree type, enum machine_mode mode)
8058 int needed_intregs, needed_sseregs;
8059 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
8062 static bool ATTRIBUTE_UNUSED
8063 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
8065 HOST_WIDE_INT size = int_size_in_bytes (type);
8067 /* __m128 is returned in xmm0. */
8068 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
8069 || VECTOR_FLOAT_TYPE_P (type))
8070 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8071 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
8072 return false;
8074 /* Otherwise, the size must be exactly in [1248]. */
8075 return size != 1 && size != 2 && size != 4 && size != 8;
8078 static bool
8079 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8081 #ifdef SUBTARGET_RETURN_IN_MEMORY
8082 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8083 #else
8084 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8086 if (TARGET_64BIT)
8088 if (ix86_function_type_abi (fntype) == MS_ABI)
8089 return return_in_memory_ms_64 (type, mode);
8090 else
8091 return return_in_memory_64 (type, mode);
8093 else
8094 return return_in_memory_32 (type, mode);
8095 #endif
8099 /* Create the va_list data type. */
8101 /* Returns the calling convention specific va_list date type.
8102 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8104 static tree
8105 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8107 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8109 /* For i386 we use plain pointer to argument area. */
8110 if (!TARGET_64BIT || abi == MS_ABI)
8111 return build_pointer_type (char_type_node);
8113 record = lang_hooks.types.make_type (RECORD_TYPE);
8114 type_decl = build_decl (BUILTINS_LOCATION,
8115 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8117 f_gpr = build_decl (BUILTINS_LOCATION,
8118 FIELD_DECL, get_identifier ("gp_offset"),
8119 unsigned_type_node);
8120 f_fpr = build_decl (BUILTINS_LOCATION,
8121 FIELD_DECL, get_identifier ("fp_offset"),
8122 unsigned_type_node);
8123 f_ovf = build_decl (BUILTINS_LOCATION,
8124 FIELD_DECL, get_identifier ("overflow_arg_area"),
8125 ptr_type_node);
8126 f_sav = build_decl (BUILTINS_LOCATION,
8127 FIELD_DECL, get_identifier ("reg_save_area"),
8128 ptr_type_node);
8130 va_list_gpr_counter_field = f_gpr;
8131 va_list_fpr_counter_field = f_fpr;
8133 DECL_FIELD_CONTEXT (f_gpr) = record;
8134 DECL_FIELD_CONTEXT (f_fpr) = record;
8135 DECL_FIELD_CONTEXT (f_ovf) = record;
8136 DECL_FIELD_CONTEXT (f_sav) = record;
8138 TYPE_STUB_DECL (record) = type_decl;
8139 TYPE_NAME (record) = type_decl;
8140 TYPE_FIELDS (record) = f_gpr;
8141 DECL_CHAIN (f_gpr) = f_fpr;
8142 DECL_CHAIN (f_fpr) = f_ovf;
8143 DECL_CHAIN (f_ovf) = f_sav;
8145 layout_type (record);
8147 /* The correct type is an array type of one element. */
8148 return build_array_type (record, build_index_type (size_zero_node));
8151 /* Setup the builtin va_list data type and for 64-bit the additional
8152 calling convention specific va_list data types. */
8154 static tree
8155 ix86_build_builtin_va_list (void)
8157 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8159 /* Initialize abi specific va_list builtin types. */
8160 if (TARGET_64BIT)
8162 tree t;
8163 if (ix86_abi == MS_ABI)
8165 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8166 if (TREE_CODE (t) != RECORD_TYPE)
8167 t = build_variant_type_copy (t);
8168 sysv_va_list_type_node = t;
8170 else
8172 t = ret;
8173 if (TREE_CODE (t) != RECORD_TYPE)
8174 t = build_variant_type_copy (t);
8175 sysv_va_list_type_node = t;
8177 if (ix86_abi != MS_ABI)
8179 t = ix86_build_builtin_va_list_abi (MS_ABI);
8180 if (TREE_CODE (t) != RECORD_TYPE)
8181 t = build_variant_type_copy (t);
8182 ms_va_list_type_node = t;
8184 else
8186 t = ret;
8187 if (TREE_CODE (t) != RECORD_TYPE)
8188 t = build_variant_type_copy (t);
8189 ms_va_list_type_node = t;
8193 return ret;
8196 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8198 static void
8199 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8201 rtx save_area, mem;
8202 alias_set_type set;
8203 int i, max;
8205 /* GPR size of varargs save area. */
8206 if (cfun->va_list_gpr_size)
8207 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8208 else
8209 ix86_varargs_gpr_size = 0;
8211 /* FPR size of varargs save area. We don't need it if we don't pass
8212 anything in SSE registers. */
8213 if (TARGET_SSE && cfun->va_list_fpr_size)
8214 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8215 else
8216 ix86_varargs_fpr_size = 0;
8218 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8219 return;
8221 save_area = frame_pointer_rtx;
8222 set = get_varargs_alias_set ();
8224 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8225 if (max > X86_64_REGPARM_MAX)
8226 max = X86_64_REGPARM_MAX;
8228 for (i = cum->regno; i < max; i++)
8230 mem = gen_rtx_MEM (word_mode,
8231 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8232 MEM_NOTRAP_P (mem) = 1;
8233 set_mem_alias_set (mem, set);
8234 emit_move_insn (mem,
8235 gen_rtx_REG (word_mode,
8236 x86_64_int_parameter_registers[i]));
8239 if (ix86_varargs_fpr_size)
8241 enum machine_mode smode;
8242 rtx label, test;
8244 /* Now emit code to save SSE registers. The AX parameter contains number
8245 of SSE parameter registers used to call this function, though all we
8246 actually check here is the zero/non-zero status. */
8248 label = gen_label_rtx ();
8249 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8250 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8251 label));
8253 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8254 we used movdqa (i.e. TImode) instead? Perhaps even better would
8255 be if we could determine the real mode of the data, via a hook
8256 into pass_stdarg. Ignore all that for now. */
8257 smode = V4SFmode;
8258 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8259 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8261 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8262 if (max > X86_64_SSE_REGPARM_MAX)
8263 max = X86_64_SSE_REGPARM_MAX;
8265 for (i = cum->sse_regno; i < max; ++i)
8267 mem = plus_constant (Pmode, save_area,
8268 i * 16 + ix86_varargs_gpr_size);
8269 mem = gen_rtx_MEM (smode, mem);
8270 MEM_NOTRAP_P (mem) = 1;
8271 set_mem_alias_set (mem, set);
8272 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8274 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8277 emit_label (label);
8281 static void
8282 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8284 alias_set_type set = get_varargs_alias_set ();
8285 int i;
8287 /* Reset to zero, as there might be a sysv vaarg used
8288 before. */
8289 ix86_varargs_gpr_size = 0;
8290 ix86_varargs_fpr_size = 0;
8292 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8294 rtx reg, mem;
8296 mem = gen_rtx_MEM (Pmode,
8297 plus_constant (Pmode, virtual_incoming_args_rtx,
8298 i * UNITS_PER_WORD));
8299 MEM_NOTRAP_P (mem) = 1;
8300 set_mem_alias_set (mem, set);
8302 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8303 emit_move_insn (mem, reg);
8307 static void
8308 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8309 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8310 int no_rtl)
8312 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8313 CUMULATIVE_ARGS next_cum;
8314 tree fntype;
8316 /* This argument doesn't appear to be used anymore. Which is good,
8317 because the old code here didn't suppress rtl generation. */
8318 gcc_assert (!no_rtl);
8320 if (!TARGET_64BIT)
8321 return;
8323 fntype = TREE_TYPE (current_function_decl);
8325 /* For varargs, we do not want to skip the dummy va_dcl argument.
8326 For stdargs, we do want to skip the last named argument. */
8327 next_cum = *cum;
8328 if (stdarg_p (fntype))
8329 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8330 true);
8332 if (cum->call_abi == MS_ABI)
8333 setup_incoming_varargs_ms_64 (&next_cum);
8334 else
8335 setup_incoming_varargs_64 (&next_cum);
8338 /* Checks if TYPE is of kind va_list char *. */
8340 static bool
8341 is_va_list_char_pointer (tree type)
8343 tree canonic;
8345 /* For 32-bit it is always true. */
8346 if (!TARGET_64BIT)
8347 return true;
8348 canonic = ix86_canonical_va_list_type (type);
8349 return (canonic == ms_va_list_type_node
8350 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8353 /* Implement va_start. */
8355 static void
8356 ix86_va_start (tree valist, rtx nextarg)
8358 HOST_WIDE_INT words, n_gpr, n_fpr;
8359 tree f_gpr, f_fpr, f_ovf, f_sav;
8360 tree gpr, fpr, ovf, sav, t;
8361 tree type;
8362 rtx ovf_rtx;
8364 if (flag_split_stack
8365 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8367 unsigned int scratch_regno;
8369 /* When we are splitting the stack, we can't refer to the stack
8370 arguments using internal_arg_pointer, because they may be on
8371 the old stack. The split stack prologue will arrange to
8372 leave a pointer to the old stack arguments in a scratch
8373 register, which we here copy to a pseudo-register. The split
8374 stack prologue can't set the pseudo-register directly because
8375 it (the prologue) runs before any registers have been saved. */
8377 scratch_regno = split_stack_prologue_scratch_regno ();
8378 if (scratch_regno != INVALID_REGNUM)
8380 rtx reg, seq;
8382 reg = gen_reg_rtx (Pmode);
8383 cfun->machine->split_stack_varargs_pointer = reg;
8385 start_sequence ();
8386 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8387 seq = get_insns ();
8388 end_sequence ();
8390 push_topmost_sequence ();
8391 emit_insn_after (seq, entry_of_function ());
8392 pop_topmost_sequence ();
8396 /* Only 64bit target needs something special. */
8397 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8399 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8400 std_expand_builtin_va_start (valist, nextarg);
8401 else
8403 rtx va_r, next;
8405 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8406 next = expand_binop (ptr_mode, add_optab,
8407 cfun->machine->split_stack_varargs_pointer,
8408 crtl->args.arg_offset_rtx,
8409 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8410 convert_move (va_r, next, 0);
8412 return;
8415 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8416 f_fpr = DECL_CHAIN (f_gpr);
8417 f_ovf = DECL_CHAIN (f_fpr);
8418 f_sav = DECL_CHAIN (f_ovf);
8420 valist = build_simple_mem_ref (valist);
8421 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8422 /* The following should be folded into the MEM_REF offset. */
8423 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8424 f_gpr, NULL_TREE);
8425 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8426 f_fpr, NULL_TREE);
8427 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8428 f_ovf, NULL_TREE);
8429 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8430 f_sav, NULL_TREE);
8432 /* Count number of gp and fp argument registers used. */
8433 words = crtl->args.info.words;
8434 n_gpr = crtl->args.info.regno;
8435 n_fpr = crtl->args.info.sse_regno;
8437 if (cfun->va_list_gpr_size)
8439 type = TREE_TYPE (gpr);
8440 t = build2 (MODIFY_EXPR, type,
8441 gpr, build_int_cst (type, n_gpr * 8));
8442 TREE_SIDE_EFFECTS (t) = 1;
8443 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8446 if (TARGET_SSE && cfun->va_list_fpr_size)
8448 type = TREE_TYPE (fpr);
8449 t = build2 (MODIFY_EXPR, type, fpr,
8450 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8451 TREE_SIDE_EFFECTS (t) = 1;
8452 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8455 /* Find the overflow area. */
8456 type = TREE_TYPE (ovf);
8457 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8458 ovf_rtx = crtl->args.internal_arg_pointer;
8459 else
8460 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8461 t = make_tree (type, ovf_rtx);
8462 if (words != 0)
8463 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8464 t = build2 (MODIFY_EXPR, type, ovf, t);
8465 TREE_SIDE_EFFECTS (t) = 1;
8466 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8468 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8470 /* Find the register save area.
8471 Prologue of the function save it right above stack frame. */
8472 type = TREE_TYPE (sav);
8473 t = make_tree (type, frame_pointer_rtx);
8474 if (!ix86_varargs_gpr_size)
8475 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8476 t = build2 (MODIFY_EXPR, type, sav, t);
8477 TREE_SIDE_EFFECTS (t) = 1;
8478 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8482 /* Implement va_arg. */
8484 static tree
8485 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8486 gimple_seq *post_p)
8488 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8489 tree f_gpr, f_fpr, f_ovf, f_sav;
8490 tree gpr, fpr, ovf, sav, t;
8491 int size, rsize;
8492 tree lab_false, lab_over = NULL_TREE;
8493 tree addr, t2;
8494 rtx container;
8495 int indirect_p = 0;
8496 tree ptrtype;
8497 enum machine_mode nat_mode;
8498 unsigned int arg_boundary;
8500 /* Only 64bit target needs something special. */
8501 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8502 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8504 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8505 f_fpr = DECL_CHAIN (f_gpr);
8506 f_ovf = DECL_CHAIN (f_fpr);
8507 f_sav = DECL_CHAIN (f_ovf);
8509 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8510 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8511 valist = build_va_arg_indirect_ref (valist);
8512 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8513 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8514 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8516 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8517 if (indirect_p)
8518 type = build_pointer_type (type);
8519 size = int_size_in_bytes (type);
8520 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8522 nat_mode = type_natural_mode (type, NULL, false);
8523 switch (nat_mode)
8525 case V8SFmode:
8526 case V8SImode:
8527 case V32QImode:
8528 case V16HImode:
8529 case V4DFmode:
8530 case V4DImode:
8531 case V16SFmode:
8532 case V16SImode:
8533 case V64QImode:
8534 case V32HImode:
8535 case V8DFmode:
8536 case V8DImode:
8537 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8538 if (!TARGET_64BIT_MS_ABI)
8540 container = NULL;
8541 break;
8544 default:
8545 container = construct_container (nat_mode, TYPE_MODE (type),
8546 type, 0, X86_64_REGPARM_MAX,
8547 X86_64_SSE_REGPARM_MAX, intreg,
8549 break;
8552 /* Pull the value out of the saved registers. */
8554 addr = create_tmp_var (ptr_type_node, "addr");
8556 if (container)
8558 int needed_intregs, needed_sseregs;
8559 bool need_temp;
8560 tree int_addr, sse_addr;
8562 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8563 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8565 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8567 need_temp = (!REG_P (container)
8568 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8569 || TYPE_ALIGN (type) > 128));
8571 /* In case we are passing structure, verify that it is consecutive block
8572 on the register save area. If not we need to do moves. */
8573 if (!need_temp && !REG_P (container))
8575 /* Verify that all registers are strictly consecutive */
8576 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8578 int i;
8580 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8582 rtx slot = XVECEXP (container, 0, i);
8583 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8584 || INTVAL (XEXP (slot, 1)) != i * 16)
8585 need_temp = 1;
8588 else
8590 int i;
8592 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8594 rtx slot = XVECEXP (container, 0, i);
8595 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8596 || INTVAL (XEXP (slot, 1)) != i * 8)
8597 need_temp = 1;
8601 if (!need_temp)
8603 int_addr = addr;
8604 sse_addr = addr;
8606 else
8608 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8609 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8612 /* First ensure that we fit completely in registers. */
8613 if (needed_intregs)
8615 t = build_int_cst (TREE_TYPE (gpr),
8616 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8617 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8618 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8619 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8620 gimplify_and_add (t, pre_p);
8622 if (needed_sseregs)
8624 t = build_int_cst (TREE_TYPE (fpr),
8625 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8626 + X86_64_REGPARM_MAX * 8);
8627 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8628 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8629 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8630 gimplify_and_add (t, pre_p);
8633 /* Compute index to start of area used for integer regs. */
8634 if (needed_intregs)
8636 /* int_addr = gpr + sav; */
8637 t = fold_build_pointer_plus (sav, gpr);
8638 gimplify_assign (int_addr, t, pre_p);
8640 if (needed_sseregs)
8642 /* sse_addr = fpr + sav; */
8643 t = fold_build_pointer_plus (sav, fpr);
8644 gimplify_assign (sse_addr, t, pre_p);
8646 if (need_temp)
8648 int i, prev_size = 0;
8649 tree temp = create_tmp_var (type, "va_arg_tmp");
8651 /* addr = &temp; */
8652 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8653 gimplify_assign (addr, t, pre_p);
8655 for (i = 0; i < XVECLEN (container, 0); i++)
8657 rtx slot = XVECEXP (container, 0, i);
8658 rtx reg = XEXP (slot, 0);
8659 enum machine_mode mode = GET_MODE (reg);
8660 tree piece_type;
8661 tree addr_type;
8662 tree daddr_type;
8663 tree src_addr, src;
8664 int src_offset;
8665 tree dest_addr, dest;
8666 int cur_size = GET_MODE_SIZE (mode);
8668 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8669 prev_size = INTVAL (XEXP (slot, 1));
8670 if (prev_size + cur_size > size)
8672 cur_size = size - prev_size;
8673 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8674 if (mode == BLKmode)
8675 mode = QImode;
8677 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8678 if (mode == GET_MODE (reg))
8679 addr_type = build_pointer_type (piece_type);
8680 else
8681 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8682 true);
8683 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8684 true);
8686 if (SSE_REGNO_P (REGNO (reg)))
8688 src_addr = sse_addr;
8689 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8691 else
8693 src_addr = int_addr;
8694 src_offset = REGNO (reg) * 8;
8696 src_addr = fold_convert (addr_type, src_addr);
8697 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8699 dest_addr = fold_convert (daddr_type, addr);
8700 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8701 if (cur_size == GET_MODE_SIZE (mode))
8703 src = build_va_arg_indirect_ref (src_addr);
8704 dest = build_va_arg_indirect_ref (dest_addr);
8706 gimplify_assign (dest, src, pre_p);
8708 else
8710 tree copy
8711 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8712 3, dest_addr, src_addr,
8713 size_int (cur_size));
8714 gimplify_and_add (copy, pre_p);
8716 prev_size += cur_size;
8720 if (needed_intregs)
8722 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8723 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8724 gimplify_assign (gpr, t, pre_p);
8727 if (needed_sseregs)
8729 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8730 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8731 gimplify_assign (fpr, t, pre_p);
8734 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8736 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8739 /* ... otherwise out of the overflow area. */
8741 /* When we align parameter on stack for caller, if the parameter
8742 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8743 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8744 here with caller. */
8745 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8746 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8747 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8749 /* Care for on-stack alignment if needed. */
8750 if (arg_boundary <= 64 || size == 0)
8751 t = ovf;
8752 else
8754 HOST_WIDE_INT align = arg_boundary / 8;
8755 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8756 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8757 build_int_cst (TREE_TYPE (t), -align));
8760 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8761 gimplify_assign (addr, t, pre_p);
8763 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8764 gimplify_assign (unshare_expr (ovf), t, pre_p);
8766 if (container)
8767 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8769 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8770 addr = fold_convert (ptrtype, addr);
8772 if (indirect_p)
8773 addr = build_va_arg_indirect_ref (addr);
8774 return build_va_arg_indirect_ref (addr);
8777 /* Return true if OPNUM's MEM should be matched
8778 in movabs* patterns. */
8780 bool
8781 ix86_check_movabs (rtx insn, int opnum)
8783 rtx set, mem;
8785 set = PATTERN (insn);
8786 if (GET_CODE (set) == PARALLEL)
8787 set = XVECEXP (set, 0, 0);
8788 gcc_assert (GET_CODE (set) == SET);
8789 mem = XEXP (set, opnum);
8790 while (GET_CODE (mem) == SUBREG)
8791 mem = SUBREG_REG (mem);
8792 gcc_assert (MEM_P (mem));
8793 return volatile_ok || !MEM_VOLATILE_P (mem);
8796 /* Initialize the table of extra 80387 mathematical constants. */
8798 static void
8799 init_ext_80387_constants (void)
8801 static const char * cst[5] =
8803 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8804 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8805 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8806 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8807 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8809 int i;
8811 for (i = 0; i < 5; i++)
8813 real_from_string (&ext_80387_constants_table[i], cst[i]);
8814 /* Ensure each constant is rounded to XFmode precision. */
8815 real_convert (&ext_80387_constants_table[i],
8816 XFmode, &ext_80387_constants_table[i]);
8819 ext_80387_constants_init = 1;
8822 /* Return non-zero if the constant is something that
8823 can be loaded with a special instruction. */
8826 standard_80387_constant_p (rtx x)
8828 enum machine_mode mode = GET_MODE (x);
8830 REAL_VALUE_TYPE r;
8832 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8833 return -1;
8835 if (x == CONST0_RTX (mode))
8836 return 1;
8837 if (x == CONST1_RTX (mode))
8838 return 2;
8840 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8842 /* For XFmode constants, try to find a special 80387 instruction when
8843 optimizing for size or on those CPUs that benefit from them. */
8844 if (mode == XFmode
8845 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8847 int i;
8849 if (! ext_80387_constants_init)
8850 init_ext_80387_constants ();
8852 for (i = 0; i < 5; i++)
8853 if (real_identical (&r, &ext_80387_constants_table[i]))
8854 return i + 3;
8857 /* Load of the constant -0.0 or -1.0 will be split as
8858 fldz;fchs or fld1;fchs sequence. */
8859 if (real_isnegzero (&r))
8860 return 8;
8861 if (real_identical (&r, &dconstm1))
8862 return 9;
8864 return 0;
8867 /* Return the opcode of the special instruction to be used to load
8868 the constant X. */
8870 const char *
8871 standard_80387_constant_opcode (rtx x)
8873 switch (standard_80387_constant_p (x))
8875 case 1:
8876 return "fldz";
8877 case 2:
8878 return "fld1";
8879 case 3:
8880 return "fldlg2";
8881 case 4:
8882 return "fldln2";
8883 case 5:
8884 return "fldl2e";
8885 case 6:
8886 return "fldl2t";
8887 case 7:
8888 return "fldpi";
8889 case 8:
8890 case 9:
8891 return "#";
8892 default:
8893 gcc_unreachable ();
8897 /* Return the CONST_DOUBLE representing the 80387 constant that is
8898 loaded by the specified special instruction. The argument IDX
8899 matches the return value from standard_80387_constant_p. */
8902 standard_80387_constant_rtx (int idx)
8904 int i;
8906 if (! ext_80387_constants_init)
8907 init_ext_80387_constants ();
8909 switch (idx)
8911 case 3:
8912 case 4:
8913 case 5:
8914 case 6:
8915 case 7:
8916 i = idx - 3;
8917 break;
8919 default:
8920 gcc_unreachable ();
8923 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8924 XFmode);
8927 /* Return 1 if X is all 0s and 2 if x is all 1s
8928 in supported SSE/AVX vector mode. */
8931 standard_sse_constant_p (rtx x)
8933 enum machine_mode mode = GET_MODE (x);
8935 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8936 return 1;
8937 if (vector_all_ones_operand (x, mode))
8938 switch (mode)
8940 case V16QImode:
8941 case V8HImode:
8942 case V4SImode:
8943 case V2DImode:
8944 if (TARGET_SSE2)
8945 return 2;
8946 case V32QImode:
8947 case V16HImode:
8948 case V8SImode:
8949 case V4DImode:
8950 if (TARGET_AVX2)
8951 return 2;
8952 case V64QImode:
8953 case V32HImode:
8954 case V16SImode:
8955 case V8DImode:
8956 if (TARGET_AVX512F)
8957 return 2;
8958 default:
8959 break;
8962 return 0;
8965 /* Return the opcode of the special instruction to be used to load
8966 the constant X. */
8968 const char *
8969 standard_sse_constant_opcode (rtx insn, rtx x)
8971 switch (standard_sse_constant_p (x))
8973 case 1:
8974 switch (get_attr_mode (insn))
8976 case MODE_XI:
8977 case MODE_V16SF:
8978 return "vpxord\t%g0, %g0, %g0";
8979 case MODE_V8DF:
8980 return "vpxorq\t%g0, %g0, %g0";
8981 case MODE_TI:
8982 return "%vpxor\t%0, %d0";
8983 case MODE_V2DF:
8984 return "%vxorpd\t%0, %d0";
8985 case MODE_V4SF:
8986 return "%vxorps\t%0, %d0";
8988 case MODE_OI:
8989 return "vpxor\t%x0, %x0, %x0";
8990 case MODE_V4DF:
8991 return "vxorpd\t%x0, %x0, %x0";
8992 case MODE_V8SF:
8993 return "vxorps\t%x0, %x0, %x0";
8995 default:
8996 break;
8999 case 2:
9000 if (get_attr_mode (insn) == MODE_XI
9001 || get_attr_mode (insn) == MODE_V8DF
9002 || get_attr_mode (insn) == MODE_V16SF)
9003 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9004 if (TARGET_AVX)
9005 return "vpcmpeqd\t%0, %0, %0";
9006 else
9007 return "pcmpeqd\t%0, %0";
9009 default:
9010 break;
9012 gcc_unreachable ();
9015 /* Returns true if OP contains a symbol reference */
9017 bool
9018 symbolic_reference_mentioned_p (rtx op)
9020 const char *fmt;
9021 int i;
9023 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9024 return true;
9026 fmt = GET_RTX_FORMAT (GET_CODE (op));
9027 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9029 if (fmt[i] == 'E')
9031 int j;
9033 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9034 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9035 return true;
9038 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9039 return true;
9042 return false;
9045 /* Return true if it is appropriate to emit `ret' instructions in the
9046 body of a function. Do this only if the epilogue is simple, needing a
9047 couple of insns. Prior to reloading, we can't tell how many registers
9048 must be saved, so return false then. Return false if there is no frame
9049 marker to de-allocate. */
9051 bool
9052 ix86_can_use_return_insn_p (void)
9054 struct ix86_frame frame;
9056 if (! reload_completed || frame_pointer_needed)
9057 return 0;
9059 /* Don't allow more than 32k pop, since that's all we can do
9060 with one instruction. */
9061 if (crtl->args.pops_args && crtl->args.size >= 32768)
9062 return 0;
9064 ix86_compute_frame_layout (&frame);
9065 return (frame.stack_pointer_offset == UNITS_PER_WORD
9066 && (frame.nregs + frame.nsseregs) == 0);
9069 /* Value should be nonzero if functions must have frame pointers.
9070 Zero means the frame pointer need not be set up (and parms may
9071 be accessed via the stack pointer) in functions that seem suitable. */
9073 static bool
9074 ix86_frame_pointer_required (void)
9076 /* If we accessed previous frames, then the generated code expects
9077 to be able to access the saved ebp value in our frame. */
9078 if (cfun->machine->accesses_prev_frame)
9079 return true;
9081 /* Several x86 os'es need a frame pointer for other reasons,
9082 usually pertaining to setjmp. */
9083 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9084 return true;
9086 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9087 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9088 return true;
9090 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9091 allocation is 4GB. */
9092 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9093 return true;
9095 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9096 turns off the frame pointer by default. Turn it back on now if
9097 we've not got a leaf function. */
9098 if (TARGET_OMIT_LEAF_FRAME_POINTER
9099 && (!crtl->is_leaf
9100 || ix86_current_function_calls_tls_descriptor))
9101 return true;
9103 if (crtl->profile && !flag_fentry)
9104 return true;
9106 return false;
9109 /* Record that the current function accesses previous call frames. */
9111 void
9112 ix86_setup_frame_addresses (void)
9114 cfun->machine->accesses_prev_frame = 1;
9117 #ifndef USE_HIDDEN_LINKONCE
9118 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9119 # define USE_HIDDEN_LINKONCE 1
9120 # else
9121 # define USE_HIDDEN_LINKONCE 0
9122 # endif
9123 #endif
9125 static int pic_labels_used;
9127 /* Fills in the label name that should be used for a pc thunk for
9128 the given register. */
9130 static void
9131 get_pc_thunk_name (char name[32], unsigned int regno)
9133 gcc_assert (!TARGET_64BIT);
9135 if (USE_HIDDEN_LINKONCE)
9136 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9137 else
9138 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9142 /* This function generates code for -fpic that loads %ebx with
9143 the return address of the caller and then returns. */
9145 static void
9146 ix86_code_end (void)
9148 rtx xops[2];
9149 int regno;
9151 for (regno = AX_REG; regno <= SP_REG; regno++)
9153 char name[32];
9154 tree decl;
9156 if (!(pic_labels_used & (1 << regno)))
9157 continue;
9159 get_pc_thunk_name (name, regno);
9161 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9162 get_identifier (name),
9163 build_function_type_list (void_type_node, NULL_TREE));
9164 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9165 NULL_TREE, void_type_node);
9166 TREE_PUBLIC (decl) = 1;
9167 TREE_STATIC (decl) = 1;
9168 DECL_IGNORED_P (decl) = 1;
9170 #if TARGET_MACHO
9171 if (TARGET_MACHO)
9173 switch_to_section (darwin_sections[text_coal_section]);
9174 fputs ("\t.weak_definition\t", asm_out_file);
9175 assemble_name (asm_out_file, name);
9176 fputs ("\n\t.private_extern\t", asm_out_file);
9177 assemble_name (asm_out_file, name);
9178 putc ('\n', asm_out_file);
9179 ASM_OUTPUT_LABEL (asm_out_file, name);
9180 DECL_WEAK (decl) = 1;
9182 else
9183 #endif
9184 if (USE_HIDDEN_LINKONCE)
9186 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9188 targetm.asm_out.unique_section (decl, 0);
9189 switch_to_section (get_named_section (decl, NULL, 0));
9191 targetm.asm_out.globalize_label (asm_out_file, name);
9192 fputs ("\t.hidden\t", asm_out_file);
9193 assemble_name (asm_out_file, name);
9194 putc ('\n', asm_out_file);
9195 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9197 else
9199 switch_to_section (text_section);
9200 ASM_OUTPUT_LABEL (asm_out_file, name);
9203 DECL_INITIAL (decl) = make_node (BLOCK);
9204 current_function_decl = decl;
9205 init_function_start (decl);
9206 first_function_block_is_cold = false;
9207 /* Make sure unwind info is emitted for the thunk if needed. */
9208 final_start_function (emit_barrier (), asm_out_file, 1);
9210 /* Pad stack IP move with 4 instructions (two NOPs count
9211 as one instruction). */
9212 if (TARGET_PAD_SHORT_FUNCTION)
9214 int i = 8;
9216 while (i--)
9217 fputs ("\tnop\n", asm_out_file);
9220 xops[0] = gen_rtx_REG (Pmode, regno);
9221 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9222 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9223 fputs ("\tret\n", asm_out_file);
9224 final_end_function ();
9225 init_insn_lengths ();
9226 free_after_compilation (cfun);
9227 set_cfun (NULL);
9228 current_function_decl = NULL;
9231 if (flag_split_stack)
9232 file_end_indicate_split_stack ();
9235 /* Emit code for the SET_GOT patterns. */
9237 const char *
9238 output_set_got (rtx dest, rtx label)
9240 rtx xops[3];
9242 xops[0] = dest;
9244 if (TARGET_VXWORKS_RTP && flag_pic)
9246 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9247 xops[2] = gen_rtx_MEM (Pmode,
9248 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9249 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9251 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9252 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9253 an unadorned address. */
9254 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9255 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9256 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9257 return "";
9260 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9262 if (!flag_pic)
9264 if (TARGET_MACHO)
9265 /* We don't need a pic base, we're not producing pic. */
9266 gcc_unreachable ();
9268 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9269 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9270 targetm.asm_out.internal_label (asm_out_file, "L",
9271 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9273 else
9275 char name[32];
9276 get_pc_thunk_name (name, REGNO (dest));
9277 pic_labels_used |= 1 << REGNO (dest);
9279 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9280 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9281 output_asm_insn ("call\t%X2", xops);
9283 #if TARGET_MACHO
9284 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9285 This is what will be referenced by the Mach-O PIC subsystem. */
9286 if (machopic_should_output_picbase_label () || !label)
9287 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9289 /* When we are restoring the pic base at the site of a nonlocal label,
9290 and we decided to emit the pic base above, we will still output a
9291 local label used for calculating the correction offset (even though
9292 the offset will be 0 in that case). */
9293 if (label)
9294 targetm.asm_out.internal_label (asm_out_file, "L",
9295 CODE_LABEL_NUMBER (label));
9296 #endif
9299 if (!TARGET_MACHO)
9300 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9302 return "";
9305 /* Generate an "push" pattern for input ARG. */
9307 static rtx
9308 gen_push (rtx arg)
9310 struct machine_function *m = cfun->machine;
9312 if (m->fs.cfa_reg == stack_pointer_rtx)
9313 m->fs.cfa_offset += UNITS_PER_WORD;
9314 m->fs.sp_offset += UNITS_PER_WORD;
9316 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9317 arg = gen_rtx_REG (word_mode, REGNO (arg));
9319 return gen_rtx_SET (VOIDmode,
9320 gen_rtx_MEM (word_mode,
9321 gen_rtx_PRE_DEC (Pmode,
9322 stack_pointer_rtx)),
9323 arg);
9326 /* Generate an "pop" pattern for input ARG. */
9328 static rtx
9329 gen_pop (rtx arg)
9331 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9332 arg = gen_rtx_REG (word_mode, REGNO (arg));
9334 return gen_rtx_SET (VOIDmode,
9335 arg,
9336 gen_rtx_MEM (word_mode,
9337 gen_rtx_POST_INC (Pmode,
9338 stack_pointer_rtx)));
9341 /* Return >= 0 if there is an unused call-clobbered register available
9342 for the entire function. */
9344 static unsigned int
9345 ix86_select_alt_pic_regnum (void)
9347 if (crtl->is_leaf
9348 && !crtl->profile
9349 && !ix86_current_function_calls_tls_descriptor)
9351 int i, drap;
9352 /* Can't use the same register for both PIC and DRAP. */
9353 if (crtl->drap_reg)
9354 drap = REGNO (crtl->drap_reg);
9355 else
9356 drap = -1;
9357 for (i = 2; i >= 0; --i)
9358 if (i != drap && !df_regs_ever_live_p (i))
9359 return i;
9362 return INVALID_REGNUM;
9365 /* Return TRUE if we need to save REGNO. */
9367 static bool
9368 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9370 if (pic_offset_table_rtx
9371 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9372 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9373 || crtl->profile
9374 || crtl->calls_eh_return
9375 || crtl->uses_const_pool
9376 || cfun->has_nonlocal_label))
9377 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9379 if (crtl->calls_eh_return && maybe_eh_return)
9381 unsigned i;
9382 for (i = 0; ; i++)
9384 unsigned test = EH_RETURN_DATA_REGNO (i);
9385 if (test == INVALID_REGNUM)
9386 break;
9387 if (test == regno)
9388 return true;
9392 if (crtl->drap_reg
9393 && regno == REGNO (crtl->drap_reg)
9394 && !cfun->machine->no_drap_save_restore)
9395 return true;
9397 return (df_regs_ever_live_p (regno)
9398 && !call_used_regs[regno]
9399 && !fixed_regs[regno]
9400 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9403 /* Return number of saved general prupose registers. */
9405 static int
9406 ix86_nsaved_regs (void)
9408 int nregs = 0;
9409 int regno;
9411 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9412 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9413 nregs ++;
9414 return nregs;
9417 /* Return number of saved SSE registrers. */
9419 static int
9420 ix86_nsaved_sseregs (void)
9422 int nregs = 0;
9423 int regno;
9425 if (!TARGET_64BIT_MS_ABI)
9426 return 0;
9427 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9428 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9429 nregs ++;
9430 return nregs;
9433 /* Given FROM and TO register numbers, say whether this elimination is
9434 allowed. If stack alignment is needed, we can only replace argument
9435 pointer with hard frame pointer, or replace frame pointer with stack
9436 pointer. Otherwise, frame pointer elimination is automatically
9437 handled and all other eliminations are valid. */
9439 static bool
9440 ix86_can_eliminate (const int from, const int to)
9442 if (stack_realign_fp)
9443 return ((from == ARG_POINTER_REGNUM
9444 && to == HARD_FRAME_POINTER_REGNUM)
9445 || (from == FRAME_POINTER_REGNUM
9446 && to == STACK_POINTER_REGNUM));
9447 else
9448 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9451 /* Return the offset between two registers, one to be eliminated, and the other
9452 its replacement, at the start of a routine. */
9454 HOST_WIDE_INT
9455 ix86_initial_elimination_offset (int from, int to)
9457 struct ix86_frame frame;
9458 ix86_compute_frame_layout (&frame);
9460 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9461 return frame.hard_frame_pointer_offset;
9462 else if (from == FRAME_POINTER_REGNUM
9463 && to == HARD_FRAME_POINTER_REGNUM)
9464 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9465 else
9467 gcc_assert (to == STACK_POINTER_REGNUM);
9469 if (from == ARG_POINTER_REGNUM)
9470 return frame.stack_pointer_offset;
9472 gcc_assert (from == FRAME_POINTER_REGNUM);
9473 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9477 /* In a dynamically-aligned function, we can't know the offset from
9478 stack pointer to frame pointer, so we must ensure that setjmp
9479 eliminates fp against the hard fp (%ebp) rather than trying to
9480 index from %esp up to the top of the frame across a gap that is
9481 of unknown (at compile-time) size. */
9482 static rtx
9483 ix86_builtin_setjmp_frame_value (void)
9485 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9488 /* When using -fsplit-stack, the allocation routines set a field in
9489 the TCB to the bottom of the stack plus this much space, measured
9490 in bytes. */
9492 #define SPLIT_STACK_AVAILABLE 256
9494 /* Fill structure ix86_frame about frame of currently computed function. */
9496 static void
9497 ix86_compute_frame_layout (struct ix86_frame *frame)
9499 unsigned HOST_WIDE_INT stack_alignment_needed;
9500 HOST_WIDE_INT offset;
9501 unsigned HOST_WIDE_INT preferred_alignment;
9502 HOST_WIDE_INT size = get_frame_size ();
9503 HOST_WIDE_INT to_allocate;
9505 frame->nregs = ix86_nsaved_regs ();
9506 frame->nsseregs = ix86_nsaved_sseregs ();
9508 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9509 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9511 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9512 function prologues and leaf. */
9513 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9514 && (!crtl->is_leaf || cfun->calls_alloca != 0
9515 || ix86_current_function_calls_tls_descriptor))
9517 preferred_alignment = 16;
9518 stack_alignment_needed = 16;
9519 crtl->preferred_stack_boundary = 128;
9520 crtl->stack_alignment_needed = 128;
9523 gcc_assert (!size || stack_alignment_needed);
9524 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9525 gcc_assert (preferred_alignment <= stack_alignment_needed);
9527 /* For SEH we have to limit the amount of code movement into the prologue.
9528 At present we do this via a BLOCKAGE, at which point there's very little
9529 scheduling that can be done, which means that there's very little point
9530 in doing anything except PUSHs. */
9531 if (TARGET_SEH)
9532 cfun->machine->use_fast_prologue_epilogue = false;
9534 /* During reload iteration the amount of registers saved can change.
9535 Recompute the value as needed. Do not recompute when amount of registers
9536 didn't change as reload does multiple calls to the function and does not
9537 expect the decision to change within single iteration. */
9538 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9539 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9541 int count = frame->nregs;
9542 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9544 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9546 /* The fast prologue uses move instead of push to save registers. This
9547 is significantly longer, but also executes faster as modern hardware
9548 can execute the moves in parallel, but can't do that for push/pop.
9550 Be careful about choosing what prologue to emit: When function takes
9551 many instructions to execute we may use slow version as well as in
9552 case function is known to be outside hot spot (this is known with
9553 feedback only). Weight the size of function by number of registers
9554 to save as it is cheap to use one or two push instructions but very
9555 slow to use many of them. */
9556 if (count)
9557 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9558 if (node->frequency < NODE_FREQUENCY_NORMAL
9559 || (flag_branch_probabilities
9560 && node->frequency < NODE_FREQUENCY_HOT))
9561 cfun->machine->use_fast_prologue_epilogue = false;
9562 else
9563 cfun->machine->use_fast_prologue_epilogue
9564 = !expensive_function_p (count);
9567 frame->save_regs_using_mov
9568 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9569 /* If static stack checking is enabled and done with probes,
9570 the registers need to be saved before allocating the frame. */
9571 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9573 /* Skip return address. */
9574 offset = UNITS_PER_WORD;
9576 /* Skip pushed static chain. */
9577 if (ix86_static_chain_on_stack)
9578 offset += UNITS_PER_WORD;
9580 /* Skip saved base pointer. */
9581 if (frame_pointer_needed)
9582 offset += UNITS_PER_WORD;
9583 frame->hfp_save_offset = offset;
9585 /* The traditional frame pointer location is at the top of the frame. */
9586 frame->hard_frame_pointer_offset = offset;
9588 /* Register save area */
9589 offset += frame->nregs * UNITS_PER_WORD;
9590 frame->reg_save_offset = offset;
9592 /* On SEH target, registers are pushed just before the frame pointer
9593 location. */
9594 if (TARGET_SEH)
9595 frame->hard_frame_pointer_offset = offset;
9597 /* Align and set SSE register save area. */
9598 if (frame->nsseregs)
9600 /* The only ABI that has saved SSE registers (Win64) also has a
9601 16-byte aligned default stack, and thus we don't need to be
9602 within the re-aligned local stack frame to save them. */
9603 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9604 offset = (offset + 16 - 1) & -16;
9605 offset += frame->nsseregs * 16;
9607 frame->sse_reg_save_offset = offset;
9609 /* The re-aligned stack starts here. Values before this point are not
9610 directly comparable with values below this point. In order to make
9611 sure that no value happens to be the same before and after, force
9612 the alignment computation below to add a non-zero value. */
9613 if (stack_realign_fp)
9614 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9616 /* Va-arg area */
9617 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9618 offset += frame->va_arg_size;
9620 /* Align start of frame for local function. */
9621 if (stack_realign_fp
9622 || offset != frame->sse_reg_save_offset
9623 || size != 0
9624 || !crtl->is_leaf
9625 || cfun->calls_alloca
9626 || ix86_current_function_calls_tls_descriptor)
9627 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9629 /* Frame pointer points here. */
9630 frame->frame_pointer_offset = offset;
9632 offset += size;
9634 /* Add outgoing arguments area. Can be skipped if we eliminated
9635 all the function calls as dead code.
9636 Skipping is however impossible when function calls alloca. Alloca
9637 expander assumes that last crtl->outgoing_args_size
9638 of stack frame are unused. */
9639 if (ACCUMULATE_OUTGOING_ARGS
9640 && (!crtl->is_leaf || cfun->calls_alloca
9641 || ix86_current_function_calls_tls_descriptor))
9643 offset += crtl->outgoing_args_size;
9644 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9646 else
9647 frame->outgoing_arguments_size = 0;
9649 /* Align stack boundary. Only needed if we're calling another function
9650 or using alloca. */
9651 if (!crtl->is_leaf || cfun->calls_alloca
9652 || ix86_current_function_calls_tls_descriptor)
9653 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9655 /* We've reached end of stack frame. */
9656 frame->stack_pointer_offset = offset;
9658 /* Size prologue needs to allocate. */
9659 to_allocate = offset - frame->sse_reg_save_offset;
9661 if ((!to_allocate && frame->nregs <= 1)
9662 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9663 frame->save_regs_using_mov = false;
9665 if (ix86_using_red_zone ()
9666 && crtl->sp_is_unchanging
9667 && crtl->is_leaf
9668 && !ix86_current_function_calls_tls_descriptor)
9670 frame->red_zone_size = to_allocate;
9671 if (frame->save_regs_using_mov)
9672 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9673 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9674 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9676 else
9677 frame->red_zone_size = 0;
9678 frame->stack_pointer_offset -= frame->red_zone_size;
9680 /* The SEH frame pointer location is near the bottom of the frame.
9681 This is enforced by the fact that the difference between the
9682 stack pointer and the frame pointer is limited to 240 bytes in
9683 the unwind data structure. */
9684 if (TARGET_SEH)
9686 HOST_WIDE_INT diff;
9688 /* If we can leave the frame pointer where it is, do so. Also, returns
9689 the establisher frame for __builtin_frame_address (0). */
9690 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9691 if (diff <= SEH_MAX_FRAME_SIZE
9692 && (diff > 240 || (diff & 15) != 0)
9693 && !crtl->accesses_prior_frames)
9695 /* Ideally we'd determine what portion of the local stack frame
9696 (within the constraint of the lowest 240) is most heavily used.
9697 But without that complication, simply bias the frame pointer
9698 by 128 bytes so as to maximize the amount of the local stack
9699 frame that is addressable with 8-bit offsets. */
9700 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9705 /* This is semi-inlined memory_address_length, but simplified
9706 since we know that we're always dealing with reg+offset, and
9707 to avoid having to create and discard all that rtl. */
9709 static inline int
9710 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9712 int len = 4;
9714 if (offset == 0)
9716 /* EBP and R13 cannot be encoded without an offset. */
9717 len = (regno == BP_REG || regno == R13_REG);
9719 else if (IN_RANGE (offset, -128, 127))
9720 len = 1;
9722 /* ESP and R12 must be encoded with a SIB byte. */
9723 if (regno == SP_REG || regno == R12_REG)
9724 len++;
9726 return len;
9729 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9730 The valid base registers are taken from CFUN->MACHINE->FS. */
9732 static rtx
9733 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9735 const struct machine_function *m = cfun->machine;
9736 rtx base_reg = NULL;
9737 HOST_WIDE_INT base_offset = 0;
9739 if (m->use_fast_prologue_epilogue)
9741 /* Choose the base register most likely to allow the most scheduling
9742 opportunities. Generally FP is valid throughout the function,
9743 while DRAP must be reloaded within the epilogue. But choose either
9744 over the SP due to increased encoding size. */
9746 if (m->fs.fp_valid)
9748 base_reg = hard_frame_pointer_rtx;
9749 base_offset = m->fs.fp_offset - cfa_offset;
9751 else if (m->fs.drap_valid)
9753 base_reg = crtl->drap_reg;
9754 base_offset = 0 - cfa_offset;
9756 else if (m->fs.sp_valid)
9758 base_reg = stack_pointer_rtx;
9759 base_offset = m->fs.sp_offset - cfa_offset;
9762 else
9764 HOST_WIDE_INT toffset;
9765 int len = 16, tlen;
9767 /* Choose the base register with the smallest address encoding.
9768 With a tie, choose FP > DRAP > SP. */
9769 if (m->fs.sp_valid)
9771 base_reg = stack_pointer_rtx;
9772 base_offset = m->fs.sp_offset - cfa_offset;
9773 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9775 if (m->fs.drap_valid)
9777 toffset = 0 - cfa_offset;
9778 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9779 if (tlen <= len)
9781 base_reg = crtl->drap_reg;
9782 base_offset = toffset;
9783 len = tlen;
9786 if (m->fs.fp_valid)
9788 toffset = m->fs.fp_offset - cfa_offset;
9789 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9790 if (tlen <= len)
9792 base_reg = hard_frame_pointer_rtx;
9793 base_offset = toffset;
9794 len = tlen;
9798 gcc_assert (base_reg != NULL);
9800 return plus_constant (Pmode, base_reg, base_offset);
9803 /* Emit code to save registers in the prologue. */
9805 static void
9806 ix86_emit_save_regs (void)
9808 unsigned int regno;
9809 rtx insn;
9811 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9812 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9814 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9815 RTX_FRAME_RELATED_P (insn) = 1;
9819 /* Emit a single register save at CFA - CFA_OFFSET. */
9821 static void
9822 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9823 HOST_WIDE_INT cfa_offset)
9825 struct machine_function *m = cfun->machine;
9826 rtx reg = gen_rtx_REG (mode, regno);
9827 rtx mem, addr, base, insn;
9829 addr = choose_baseaddr (cfa_offset);
9830 mem = gen_frame_mem (mode, addr);
9832 /* For SSE saves, we need to indicate the 128-bit alignment. */
9833 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9835 insn = emit_move_insn (mem, reg);
9836 RTX_FRAME_RELATED_P (insn) = 1;
9838 base = addr;
9839 if (GET_CODE (base) == PLUS)
9840 base = XEXP (base, 0);
9841 gcc_checking_assert (REG_P (base));
9843 /* When saving registers into a re-aligned local stack frame, avoid
9844 any tricky guessing by dwarf2out. */
9845 if (m->fs.realigned)
9847 gcc_checking_assert (stack_realign_drap);
9849 if (regno == REGNO (crtl->drap_reg))
9851 /* A bit of a hack. We force the DRAP register to be saved in
9852 the re-aligned stack frame, which provides us with a copy
9853 of the CFA that will last past the prologue. Install it. */
9854 gcc_checking_assert (cfun->machine->fs.fp_valid);
9855 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9856 cfun->machine->fs.fp_offset - cfa_offset);
9857 mem = gen_rtx_MEM (mode, addr);
9858 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9860 else
9862 /* The frame pointer is a stable reference within the
9863 aligned frame. Use it. */
9864 gcc_checking_assert (cfun->machine->fs.fp_valid);
9865 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9866 cfun->machine->fs.fp_offset - cfa_offset);
9867 mem = gen_rtx_MEM (mode, addr);
9868 add_reg_note (insn, REG_CFA_EXPRESSION,
9869 gen_rtx_SET (VOIDmode, mem, reg));
9873 /* The memory may not be relative to the current CFA register,
9874 which means that we may need to generate a new pattern for
9875 use by the unwind info. */
9876 else if (base != m->fs.cfa_reg)
9878 addr = plus_constant (Pmode, m->fs.cfa_reg,
9879 m->fs.cfa_offset - cfa_offset);
9880 mem = gen_rtx_MEM (mode, addr);
9881 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9885 /* Emit code to save registers using MOV insns.
9886 First register is stored at CFA - CFA_OFFSET. */
9887 static void
9888 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9890 unsigned int regno;
9892 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9893 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9895 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9896 cfa_offset -= UNITS_PER_WORD;
9900 /* Emit code to save SSE registers using MOV insns.
9901 First register is stored at CFA - CFA_OFFSET. */
9902 static void
9903 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9905 unsigned int regno;
9907 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9908 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9910 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9911 cfa_offset -= 16;
9915 static GTY(()) rtx queued_cfa_restores;
9917 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9918 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9919 Don't add the note if the previously saved value will be left untouched
9920 within stack red-zone till return, as unwinders can find the same value
9921 in the register and on the stack. */
9923 static void
9924 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9926 if (!crtl->shrink_wrapped
9927 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9928 return;
9930 if (insn)
9932 add_reg_note (insn, REG_CFA_RESTORE, reg);
9933 RTX_FRAME_RELATED_P (insn) = 1;
9935 else
9936 queued_cfa_restores
9937 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9940 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9942 static void
9943 ix86_add_queued_cfa_restore_notes (rtx insn)
9945 rtx last;
9946 if (!queued_cfa_restores)
9947 return;
9948 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9950 XEXP (last, 1) = REG_NOTES (insn);
9951 REG_NOTES (insn) = queued_cfa_restores;
9952 queued_cfa_restores = NULL_RTX;
9953 RTX_FRAME_RELATED_P (insn) = 1;
9956 /* Expand prologue or epilogue stack adjustment.
9957 The pattern exist to put a dependency on all ebp-based memory accesses.
9958 STYLE should be negative if instructions should be marked as frame related,
9959 zero if %r11 register is live and cannot be freely used and positive
9960 otherwise. */
9962 static void
9963 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9964 int style, bool set_cfa)
9966 struct machine_function *m = cfun->machine;
9967 rtx insn;
9968 bool add_frame_related_expr = false;
9970 if (Pmode == SImode)
9971 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9972 else if (x86_64_immediate_operand (offset, DImode))
9973 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9974 else
9976 rtx tmp;
9977 /* r11 is used by indirect sibcall return as well, set before the
9978 epilogue and used after the epilogue. */
9979 if (style)
9980 tmp = gen_rtx_REG (DImode, R11_REG);
9981 else
9983 gcc_assert (src != hard_frame_pointer_rtx
9984 && dest != hard_frame_pointer_rtx);
9985 tmp = hard_frame_pointer_rtx;
9987 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9988 if (style < 0)
9989 add_frame_related_expr = true;
9991 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9994 insn = emit_insn (insn);
9995 if (style >= 0)
9996 ix86_add_queued_cfa_restore_notes (insn);
9998 if (set_cfa)
10000 rtx r;
10002 gcc_assert (m->fs.cfa_reg == src);
10003 m->fs.cfa_offset += INTVAL (offset);
10004 m->fs.cfa_reg = dest;
10006 r = gen_rtx_PLUS (Pmode, src, offset);
10007 r = gen_rtx_SET (VOIDmode, dest, r);
10008 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10009 RTX_FRAME_RELATED_P (insn) = 1;
10011 else if (style < 0)
10013 RTX_FRAME_RELATED_P (insn) = 1;
10014 if (add_frame_related_expr)
10016 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10017 r = gen_rtx_SET (VOIDmode, dest, r);
10018 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10022 if (dest == stack_pointer_rtx)
10024 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10025 bool valid = m->fs.sp_valid;
10027 if (src == hard_frame_pointer_rtx)
10029 valid = m->fs.fp_valid;
10030 ooffset = m->fs.fp_offset;
10032 else if (src == crtl->drap_reg)
10034 valid = m->fs.drap_valid;
10035 ooffset = 0;
10037 else
10039 /* Else there are two possibilities: SP itself, which we set
10040 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10041 taken care of this by hand along the eh_return path. */
10042 gcc_checking_assert (src == stack_pointer_rtx
10043 || offset == const0_rtx);
10046 m->fs.sp_offset = ooffset - INTVAL (offset);
10047 m->fs.sp_valid = valid;
10051 /* Find an available register to be used as dynamic realign argument
10052 pointer regsiter. Such a register will be written in prologue and
10053 used in begin of body, so it must not be
10054 1. parameter passing register.
10055 2. GOT pointer.
10056 We reuse static-chain register if it is available. Otherwise, we
10057 use DI for i386 and R13 for x86-64. We chose R13 since it has
10058 shorter encoding.
10060 Return: the regno of chosen register. */
10062 static unsigned int
10063 find_drap_reg (void)
10065 tree decl = cfun->decl;
10067 if (TARGET_64BIT)
10069 /* Use R13 for nested function or function need static chain.
10070 Since function with tail call may use any caller-saved
10071 registers in epilogue, DRAP must not use caller-saved
10072 register in such case. */
10073 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10074 return R13_REG;
10076 return R10_REG;
10078 else
10080 /* Use DI for nested function or function need static chain.
10081 Since function with tail call may use any caller-saved
10082 registers in epilogue, DRAP must not use caller-saved
10083 register in such case. */
10084 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10085 return DI_REG;
10087 /* Reuse static chain register if it isn't used for parameter
10088 passing. */
10089 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10091 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10092 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10093 return CX_REG;
10095 return DI_REG;
10099 /* Return minimum incoming stack alignment. */
10101 static unsigned int
10102 ix86_minimum_incoming_stack_boundary (bool sibcall)
10104 unsigned int incoming_stack_boundary;
10106 /* Prefer the one specified at command line. */
10107 if (ix86_user_incoming_stack_boundary)
10108 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10109 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10110 if -mstackrealign is used, it isn't used for sibcall check and
10111 estimated stack alignment is 128bit. */
10112 else if (!sibcall
10113 && !TARGET_64BIT
10114 && ix86_force_align_arg_pointer
10115 && crtl->stack_alignment_estimated == 128)
10116 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10117 else
10118 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10120 /* Incoming stack alignment can be changed on individual functions
10121 via force_align_arg_pointer attribute. We use the smallest
10122 incoming stack boundary. */
10123 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10124 && lookup_attribute (ix86_force_align_arg_pointer_string,
10125 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10126 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10128 /* The incoming stack frame has to be aligned at least at
10129 parm_stack_boundary. */
10130 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10131 incoming_stack_boundary = crtl->parm_stack_boundary;
10133 /* Stack at entrance of main is aligned by runtime. We use the
10134 smallest incoming stack boundary. */
10135 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10136 && DECL_NAME (current_function_decl)
10137 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10138 && DECL_FILE_SCOPE_P (current_function_decl))
10139 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10141 return incoming_stack_boundary;
10144 /* Update incoming stack boundary and estimated stack alignment. */
10146 static void
10147 ix86_update_stack_boundary (void)
10149 ix86_incoming_stack_boundary
10150 = ix86_minimum_incoming_stack_boundary (false);
10152 /* x86_64 vararg needs 16byte stack alignment for register save
10153 area. */
10154 if (TARGET_64BIT
10155 && cfun->stdarg
10156 && crtl->stack_alignment_estimated < 128)
10157 crtl->stack_alignment_estimated = 128;
10160 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10161 needed or an rtx for DRAP otherwise. */
10163 static rtx
10164 ix86_get_drap_rtx (void)
10166 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10167 crtl->need_drap = true;
10169 if (stack_realign_drap)
10171 /* Assign DRAP to vDRAP and returns vDRAP */
10172 unsigned int regno = find_drap_reg ();
10173 rtx drap_vreg;
10174 rtx arg_ptr;
10175 rtx seq, insn;
10177 arg_ptr = gen_rtx_REG (Pmode, regno);
10178 crtl->drap_reg = arg_ptr;
10180 start_sequence ();
10181 drap_vreg = copy_to_reg (arg_ptr);
10182 seq = get_insns ();
10183 end_sequence ();
10185 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10186 if (!optimize)
10188 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10189 RTX_FRAME_RELATED_P (insn) = 1;
10191 return drap_vreg;
10193 else
10194 return NULL;
10197 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10199 static rtx
10200 ix86_internal_arg_pointer (void)
10202 return virtual_incoming_args_rtx;
10205 struct scratch_reg {
10206 rtx reg;
10207 bool saved;
10210 /* Return a short-lived scratch register for use on function entry.
10211 In 32-bit mode, it is valid only after the registers are saved
10212 in the prologue. This register must be released by means of
10213 release_scratch_register_on_entry once it is dead. */
10215 static void
10216 get_scratch_register_on_entry (struct scratch_reg *sr)
10218 int regno;
10220 sr->saved = false;
10222 if (TARGET_64BIT)
10224 /* We always use R11 in 64-bit mode. */
10225 regno = R11_REG;
10227 else
10229 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10230 bool fastcall_p
10231 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10232 bool thiscall_p
10233 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10234 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10235 int regparm = ix86_function_regparm (fntype, decl);
10236 int drap_regno
10237 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10239 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10240 for the static chain register. */
10241 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10242 && drap_regno != AX_REG)
10243 regno = AX_REG;
10244 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10245 for the static chain register. */
10246 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10247 regno = AX_REG;
10248 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10249 regno = DX_REG;
10250 /* ecx is the static chain register. */
10251 else if (regparm < 3 && !fastcall_p && !thiscall_p
10252 && !static_chain_p
10253 && drap_regno != CX_REG)
10254 regno = CX_REG;
10255 else if (ix86_save_reg (BX_REG, true))
10256 regno = BX_REG;
10257 /* esi is the static chain register. */
10258 else if (!(regparm == 3 && static_chain_p)
10259 && ix86_save_reg (SI_REG, true))
10260 regno = SI_REG;
10261 else if (ix86_save_reg (DI_REG, true))
10262 regno = DI_REG;
10263 else
10265 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10266 sr->saved = true;
10270 sr->reg = gen_rtx_REG (Pmode, regno);
10271 if (sr->saved)
10273 rtx insn = emit_insn (gen_push (sr->reg));
10274 RTX_FRAME_RELATED_P (insn) = 1;
10278 /* Release a scratch register obtained from the preceding function. */
10280 static void
10281 release_scratch_register_on_entry (struct scratch_reg *sr)
10283 if (sr->saved)
10285 struct machine_function *m = cfun->machine;
10286 rtx x, insn = emit_insn (gen_pop (sr->reg));
10288 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10289 RTX_FRAME_RELATED_P (insn) = 1;
10290 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10291 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10292 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10293 m->fs.sp_offset -= UNITS_PER_WORD;
10297 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10299 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10301 static void
10302 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10304 /* We skip the probe for the first interval + a small dope of 4 words and
10305 probe that many bytes past the specified size to maintain a protection
10306 area at the botton of the stack. */
10307 const int dope = 4 * UNITS_PER_WORD;
10308 rtx size_rtx = GEN_INT (size), last;
10310 /* See if we have a constant small number of probes to generate. If so,
10311 that's the easy case. The run-time loop is made up of 11 insns in the
10312 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10313 for n # of intervals. */
10314 if (size <= 5 * PROBE_INTERVAL)
10316 HOST_WIDE_INT i, adjust;
10317 bool first_probe = true;
10319 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10320 values of N from 1 until it exceeds SIZE. If only one probe is
10321 needed, this will not generate any code. Then adjust and probe
10322 to PROBE_INTERVAL + SIZE. */
10323 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10325 if (first_probe)
10327 adjust = 2 * PROBE_INTERVAL + dope;
10328 first_probe = false;
10330 else
10331 adjust = PROBE_INTERVAL;
10333 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10334 plus_constant (Pmode, stack_pointer_rtx,
10335 -adjust)));
10336 emit_stack_probe (stack_pointer_rtx);
10339 if (first_probe)
10340 adjust = size + PROBE_INTERVAL + dope;
10341 else
10342 adjust = size + PROBE_INTERVAL - i;
10344 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10345 plus_constant (Pmode, stack_pointer_rtx,
10346 -adjust)));
10347 emit_stack_probe (stack_pointer_rtx);
10349 /* Adjust back to account for the additional first interval. */
10350 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10351 plus_constant (Pmode, stack_pointer_rtx,
10352 PROBE_INTERVAL + dope)));
10355 /* Otherwise, do the same as above, but in a loop. Note that we must be
10356 extra careful with variables wrapping around because we might be at
10357 the very top (or the very bottom) of the address space and we have
10358 to be able to handle this case properly; in particular, we use an
10359 equality test for the loop condition. */
10360 else
10362 HOST_WIDE_INT rounded_size;
10363 struct scratch_reg sr;
10365 get_scratch_register_on_entry (&sr);
10368 /* Step 1: round SIZE to the previous multiple of the interval. */
10370 rounded_size = size & -PROBE_INTERVAL;
10373 /* Step 2: compute initial and final value of the loop counter. */
10375 /* SP = SP_0 + PROBE_INTERVAL. */
10376 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10377 plus_constant (Pmode, stack_pointer_rtx,
10378 - (PROBE_INTERVAL + dope))));
10380 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10381 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10382 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10383 gen_rtx_PLUS (Pmode, sr.reg,
10384 stack_pointer_rtx)));
10387 /* Step 3: the loop
10389 while (SP != LAST_ADDR)
10391 SP = SP + PROBE_INTERVAL
10392 probe at SP
10395 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10396 values of N from 1 until it is equal to ROUNDED_SIZE. */
10398 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10401 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10402 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10404 if (size != rounded_size)
10406 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10407 plus_constant (Pmode, stack_pointer_rtx,
10408 rounded_size - size)));
10409 emit_stack_probe (stack_pointer_rtx);
10412 /* Adjust back to account for the additional first interval. */
10413 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10414 plus_constant (Pmode, stack_pointer_rtx,
10415 PROBE_INTERVAL + dope)));
10417 release_scratch_register_on_entry (&sr);
10420 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10422 /* Even if the stack pointer isn't the CFA register, we need to correctly
10423 describe the adjustments made to it, in particular differentiate the
10424 frame-related ones from the frame-unrelated ones. */
10425 if (size > 0)
10427 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10428 XVECEXP (expr, 0, 0)
10429 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10430 plus_constant (Pmode, stack_pointer_rtx, -size));
10431 XVECEXP (expr, 0, 1)
10432 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10433 plus_constant (Pmode, stack_pointer_rtx,
10434 PROBE_INTERVAL + dope + size));
10435 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10436 RTX_FRAME_RELATED_P (last) = 1;
10438 cfun->machine->fs.sp_offset += size;
10441 /* Make sure nothing is scheduled before we are done. */
10442 emit_insn (gen_blockage ());
10445 /* Adjust the stack pointer up to REG while probing it. */
10447 const char *
10448 output_adjust_stack_and_probe (rtx reg)
10450 static int labelno = 0;
10451 char loop_lab[32], end_lab[32];
10452 rtx xops[2];
10454 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10455 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10457 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10459 /* Jump to END_LAB if SP == LAST_ADDR. */
10460 xops[0] = stack_pointer_rtx;
10461 xops[1] = reg;
10462 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10463 fputs ("\tje\t", asm_out_file);
10464 assemble_name_raw (asm_out_file, end_lab);
10465 fputc ('\n', asm_out_file);
10467 /* SP = SP + PROBE_INTERVAL. */
10468 xops[1] = GEN_INT (PROBE_INTERVAL);
10469 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10471 /* Probe at SP. */
10472 xops[1] = const0_rtx;
10473 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10475 fprintf (asm_out_file, "\tjmp\t");
10476 assemble_name_raw (asm_out_file, loop_lab);
10477 fputc ('\n', asm_out_file);
10479 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10481 return "";
10484 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10485 inclusive. These are offsets from the current stack pointer. */
10487 static void
10488 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10490 /* See if we have a constant small number of probes to generate. If so,
10491 that's the easy case. The run-time loop is made up of 7 insns in the
10492 generic case while the compile-time loop is made up of n insns for n #
10493 of intervals. */
10494 if (size <= 7 * PROBE_INTERVAL)
10496 HOST_WIDE_INT i;
10498 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10499 it exceeds SIZE. If only one probe is needed, this will not
10500 generate any code. Then probe at FIRST + SIZE. */
10501 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10502 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10503 -(first + i)));
10505 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10506 -(first + size)));
10509 /* Otherwise, do the same as above, but in a loop. Note that we must be
10510 extra careful with variables wrapping around because we might be at
10511 the very top (or the very bottom) of the address space and we have
10512 to be able to handle this case properly; in particular, we use an
10513 equality test for the loop condition. */
10514 else
10516 HOST_WIDE_INT rounded_size, last;
10517 struct scratch_reg sr;
10519 get_scratch_register_on_entry (&sr);
10522 /* Step 1: round SIZE to the previous multiple of the interval. */
10524 rounded_size = size & -PROBE_INTERVAL;
10527 /* Step 2: compute initial and final value of the loop counter. */
10529 /* TEST_OFFSET = FIRST. */
10530 emit_move_insn (sr.reg, GEN_INT (-first));
10532 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10533 last = first + rounded_size;
10536 /* Step 3: the loop
10538 while (TEST_ADDR != LAST_ADDR)
10540 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10541 probe at TEST_ADDR
10544 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10545 until it is equal to ROUNDED_SIZE. */
10547 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10550 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10551 that SIZE is equal to ROUNDED_SIZE. */
10553 if (size != rounded_size)
10554 emit_stack_probe (plus_constant (Pmode,
10555 gen_rtx_PLUS (Pmode,
10556 stack_pointer_rtx,
10557 sr.reg),
10558 rounded_size - size));
10560 release_scratch_register_on_entry (&sr);
10563 /* Make sure nothing is scheduled before we are done. */
10564 emit_insn (gen_blockage ());
10567 /* Probe a range of stack addresses from REG to END, inclusive. These are
10568 offsets from the current stack pointer. */
10570 const char *
10571 output_probe_stack_range (rtx reg, rtx end)
10573 static int labelno = 0;
10574 char loop_lab[32], end_lab[32];
10575 rtx xops[3];
10577 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10578 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10580 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10582 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10583 xops[0] = reg;
10584 xops[1] = end;
10585 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10586 fputs ("\tje\t", asm_out_file);
10587 assemble_name_raw (asm_out_file, end_lab);
10588 fputc ('\n', asm_out_file);
10590 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10591 xops[1] = GEN_INT (PROBE_INTERVAL);
10592 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10594 /* Probe at TEST_ADDR. */
10595 xops[0] = stack_pointer_rtx;
10596 xops[1] = reg;
10597 xops[2] = const0_rtx;
10598 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10600 fprintf (asm_out_file, "\tjmp\t");
10601 assemble_name_raw (asm_out_file, loop_lab);
10602 fputc ('\n', asm_out_file);
10604 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10606 return "";
10609 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10610 to be generated in correct form. */
10611 static void
10612 ix86_finalize_stack_realign_flags (void)
10614 /* Check if stack realign is really needed after reload, and
10615 stores result in cfun */
10616 unsigned int incoming_stack_boundary
10617 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10618 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10619 unsigned int stack_realign = (incoming_stack_boundary
10620 < (crtl->is_leaf
10621 ? crtl->max_used_stack_slot_alignment
10622 : crtl->stack_alignment_needed));
10624 if (crtl->stack_realign_finalized)
10626 /* After stack_realign_needed is finalized, we can't no longer
10627 change it. */
10628 gcc_assert (crtl->stack_realign_needed == stack_realign);
10629 return;
10632 /* If the only reason for frame_pointer_needed is that we conservatively
10633 assumed stack realignment might be needed, but in the end nothing that
10634 needed the stack alignment had been spilled, clear frame_pointer_needed
10635 and say we don't need stack realignment. */
10636 if (stack_realign
10637 && frame_pointer_needed
10638 && crtl->is_leaf
10639 && flag_omit_frame_pointer
10640 && crtl->sp_is_unchanging
10641 && !ix86_current_function_calls_tls_descriptor
10642 && !crtl->accesses_prior_frames
10643 && !cfun->calls_alloca
10644 && !crtl->calls_eh_return
10645 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10646 && !ix86_frame_pointer_required ()
10647 && get_frame_size () == 0
10648 && ix86_nsaved_sseregs () == 0
10649 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10651 HARD_REG_SET set_up_by_prologue, prologue_used;
10652 basic_block bb;
10654 CLEAR_HARD_REG_SET (prologue_used);
10655 CLEAR_HARD_REG_SET (set_up_by_prologue);
10656 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10657 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10658 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10659 HARD_FRAME_POINTER_REGNUM);
10660 FOR_EACH_BB_FN (bb, cfun)
10662 rtx insn;
10663 FOR_BB_INSNS (bb, insn)
10664 if (NONDEBUG_INSN_P (insn)
10665 && requires_stack_frame_p (insn, prologue_used,
10666 set_up_by_prologue))
10668 crtl->stack_realign_needed = stack_realign;
10669 crtl->stack_realign_finalized = true;
10670 return;
10674 /* If drap has been set, but it actually isn't live at the start
10675 of the function, there is no reason to set it up. */
10676 if (crtl->drap_reg)
10678 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10679 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10681 crtl->drap_reg = NULL_RTX;
10682 crtl->need_drap = false;
10685 else
10686 cfun->machine->no_drap_save_restore = true;
10688 frame_pointer_needed = false;
10689 stack_realign = false;
10690 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10691 crtl->stack_alignment_needed = incoming_stack_boundary;
10692 crtl->stack_alignment_estimated = incoming_stack_boundary;
10693 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10694 crtl->preferred_stack_boundary = incoming_stack_boundary;
10695 df_finish_pass (true);
10696 df_scan_alloc (NULL);
10697 df_scan_blocks ();
10698 df_compute_regs_ever_live (true);
10699 df_analyze ();
10702 crtl->stack_realign_needed = stack_realign;
10703 crtl->stack_realign_finalized = true;
10706 /* Expand the prologue into a bunch of separate insns. */
10708 void
10709 ix86_expand_prologue (void)
10711 struct machine_function *m = cfun->machine;
10712 rtx insn, t;
10713 bool pic_reg_used;
10714 struct ix86_frame frame;
10715 HOST_WIDE_INT allocate;
10716 bool int_registers_saved;
10717 bool sse_registers_saved;
10719 ix86_finalize_stack_realign_flags ();
10721 /* DRAP should not coexist with stack_realign_fp */
10722 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10724 memset (&m->fs, 0, sizeof (m->fs));
10726 /* Initialize CFA state for before the prologue. */
10727 m->fs.cfa_reg = stack_pointer_rtx;
10728 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10730 /* Track SP offset to the CFA. We continue tracking this after we've
10731 swapped the CFA register away from SP. In the case of re-alignment
10732 this is fudged; we're interested to offsets within the local frame. */
10733 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10734 m->fs.sp_valid = true;
10736 ix86_compute_frame_layout (&frame);
10738 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10740 /* We should have already generated an error for any use of
10741 ms_hook on a nested function. */
10742 gcc_checking_assert (!ix86_static_chain_on_stack);
10744 /* Check if profiling is active and we shall use profiling before
10745 prologue variant. If so sorry. */
10746 if (crtl->profile && flag_fentry != 0)
10747 sorry ("ms_hook_prologue attribute isn%'t compatible "
10748 "with -mfentry for 32-bit");
10750 /* In ix86_asm_output_function_label we emitted:
10751 8b ff movl.s %edi,%edi
10752 55 push %ebp
10753 8b ec movl.s %esp,%ebp
10755 This matches the hookable function prologue in Win32 API
10756 functions in Microsoft Windows XP Service Pack 2 and newer.
10757 Wine uses this to enable Windows apps to hook the Win32 API
10758 functions provided by Wine.
10760 What that means is that we've already set up the frame pointer. */
10762 if (frame_pointer_needed
10763 && !(crtl->drap_reg && crtl->stack_realign_needed))
10765 rtx push, mov;
10767 /* We've decided to use the frame pointer already set up.
10768 Describe this to the unwinder by pretending that both
10769 push and mov insns happen right here.
10771 Putting the unwind info here at the end of the ms_hook
10772 is done so that we can make absolutely certain we get
10773 the required byte sequence at the start of the function,
10774 rather than relying on an assembler that can produce
10775 the exact encoding required.
10777 However it does mean (in the unpatched case) that we have
10778 a 1 insn window where the asynchronous unwind info is
10779 incorrect. However, if we placed the unwind info at
10780 its correct location we would have incorrect unwind info
10781 in the patched case. Which is probably all moot since
10782 I don't expect Wine generates dwarf2 unwind info for the
10783 system libraries that use this feature. */
10785 insn = emit_insn (gen_blockage ());
10787 push = gen_push (hard_frame_pointer_rtx);
10788 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10789 stack_pointer_rtx);
10790 RTX_FRAME_RELATED_P (push) = 1;
10791 RTX_FRAME_RELATED_P (mov) = 1;
10793 RTX_FRAME_RELATED_P (insn) = 1;
10794 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10795 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10797 /* Note that gen_push incremented m->fs.cfa_offset, even
10798 though we didn't emit the push insn here. */
10799 m->fs.cfa_reg = hard_frame_pointer_rtx;
10800 m->fs.fp_offset = m->fs.cfa_offset;
10801 m->fs.fp_valid = true;
10803 else
10805 /* The frame pointer is not needed so pop %ebp again.
10806 This leaves us with a pristine state. */
10807 emit_insn (gen_pop (hard_frame_pointer_rtx));
10811 /* The first insn of a function that accepts its static chain on the
10812 stack is to push the register that would be filled in by a direct
10813 call. This insn will be skipped by the trampoline. */
10814 else if (ix86_static_chain_on_stack)
10816 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10817 emit_insn (gen_blockage ());
10819 /* We don't want to interpret this push insn as a register save,
10820 only as a stack adjustment. The real copy of the register as
10821 a save will be done later, if needed. */
10822 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10823 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10824 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10825 RTX_FRAME_RELATED_P (insn) = 1;
10828 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10829 of DRAP is needed and stack realignment is really needed after reload */
10830 if (stack_realign_drap)
10832 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10834 /* Only need to push parameter pointer reg if it is caller saved. */
10835 if (!call_used_regs[REGNO (crtl->drap_reg)])
10837 /* Push arg pointer reg */
10838 insn = emit_insn (gen_push (crtl->drap_reg));
10839 RTX_FRAME_RELATED_P (insn) = 1;
10842 /* Grab the argument pointer. */
10843 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10844 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10845 RTX_FRAME_RELATED_P (insn) = 1;
10846 m->fs.cfa_reg = crtl->drap_reg;
10847 m->fs.cfa_offset = 0;
10849 /* Align the stack. */
10850 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10851 stack_pointer_rtx,
10852 GEN_INT (-align_bytes)));
10853 RTX_FRAME_RELATED_P (insn) = 1;
10855 /* Replicate the return address on the stack so that return
10856 address can be reached via (argp - 1) slot. This is needed
10857 to implement macro RETURN_ADDR_RTX and intrinsic function
10858 expand_builtin_return_addr etc. */
10859 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10860 t = gen_frame_mem (word_mode, t);
10861 insn = emit_insn (gen_push (t));
10862 RTX_FRAME_RELATED_P (insn) = 1;
10864 /* For the purposes of frame and register save area addressing,
10865 we've started over with a new frame. */
10866 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10867 m->fs.realigned = true;
10870 int_registers_saved = (frame.nregs == 0);
10871 sse_registers_saved = (frame.nsseregs == 0);
10873 if (frame_pointer_needed && !m->fs.fp_valid)
10875 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10876 slower on all targets. Also sdb doesn't like it. */
10877 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10878 RTX_FRAME_RELATED_P (insn) = 1;
10880 /* Push registers now, before setting the frame pointer
10881 on SEH target. */
10882 if (!int_registers_saved
10883 && TARGET_SEH
10884 && !frame.save_regs_using_mov)
10886 ix86_emit_save_regs ();
10887 int_registers_saved = true;
10888 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10891 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10893 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10894 RTX_FRAME_RELATED_P (insn) = 1;
10896 if (m->fs.cfa_reg == stack_pointer_rtx)
10897 m->fs.cfa_reg = hard_frame_pointer_rtx;
10898 m->fs.fp_offset = m->fs.sp_offset;
10899 m->fs.fp_valid = true;
10903 if (!int_registers_saved)
10905 /* If saving registers via PUSH, do so now. */
10906 if (!frame.save_regs_using_mov)
10908 ix86_emit_save_regs ();
10909 int_registers_saved = true;
10910 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10913 /* When using red zone we may start register saving before allocating
10914 the stack frame saving one cycle of the prologue. However, avoid
10915 doing this if we have to probe the stack; at least on x86_64 the
10916 stack probe can turn into a call that clobbers a red zone location. */
10917 else if (ix86_using_red_zone ()
10918 && (! TARGET_STACK_PROBE
10919 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10921 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10922 int_registers_saved = true;
10926 if (stack_realign_fp)
10928 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10929 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10931 /* The computation of the size of the re-aligned stack frame means
10932 that we must allocate the size of the register save area before
10933 performing the actual alignment. Otherwise we cannot guarantee
10934 that there's enough storage above the realignment point. */
10935 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10936 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10937 GEN_INT (m->fs.sp_offset
10938 - frame.sse_reg_save_offset),
10939 -1, false);
10941 /* Align the stack. */
10942 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10943 stack_pointer_rtx,
10944 GEN_INT (-align_bytes)));
10946 /* For the purposes of register save area addressing, the stack
10947 pointer is no longer valid. As for the value of sp_offset,
10948 see ix86_compute_frame_layout, which we need to match in order
10949 to pass verification of stack_pointer_offset at the end. */
10950 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10951 m->fs.sp_valid = false;
10954 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10956 if (flag_stack_usage_info)
10958 /* We start to count from ARG_POINTER. */
10959 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10961 /* If it was realigned, take into account the fake frame. */
10962 if (stack_realign_drap)
10964 if (ix86_static_chain_on_stack)
10965 stack_size += UNITS_PER_WORD;
10967 if (!call_used_regs[REGNO (crtl->drap_reg)])
10968 stack_size += UNITS_PER_WORD;
10970 /* This over-estimates by 1 minimal-stack-alignment-unit but
10971 mitigates that by counting in the new return address slot. */
10972 current_function_dynamic_stack_size
10973 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10976 current_function_static_stack_size = stack_size;
10979 /* On SEH target with very large frame size, allocate an area to save
10980 SSE registers (as the very large allocation won't be described). */
10981 if (TARGET_SEH
10982 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10983 && !sse_registers_saved)
10985 HOST_WIDE_INT sse_size =
10986 frame.sse_reg_save_offset - frame.reg_save_offset;
10988 gcc_assert (int_registers_saved);
10990 /* No need to do stack checking as the area will be immediately
10991 written. */
10992 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10993 GEN_INT (-sse_size), -1,
10994 m->fs.cfa_reg == stack_pointer_rtx);
10995 allocate -= sse_size;
10996 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10997 sse_registers_saved = true;
11000 /* The stack has already been decremented by the instruction calling us
11001 so probe if the size is non-negative to preserve the protection area. */
11002 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11004 /* We expect the registers to be saved when probes are used. */
11005 gcc_assert (int_registers_saved);
11007 if (STACK_CHECK_MOVING_SP)
11009 if (!(crtl->is_leaf && !cfun->calls_alloca
11010 && allocate <= PROBE_INTERVAL))
11012 ix86_adjust_stack_and_probe (allocate);
11013 allocate = 0;
11016 else
11018 HOST_WIDE_INT size = allocate;
11020 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11021 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11023 if (TARGET_STACK_PROBE)
11025 if (crtl->is_leaf && !cfun->calls_alloca)
11027 if (size > PROBE_INTERVAL)
11028 ix86_emit_probe_stack_range (0, size);
11030 else
11031 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11033 else
11035 if (crtl->is_leaf && !cfun->calls_alloca)
11037 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11038 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11039 size - STACK_CHECK_PROTECT);
11041 else
11042 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11047 if (allocate == 0)
11049 else if (!ix86_target_stack_probe ()
11050 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11052 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11053 GEN_INT (-allocate), -1,
11054 m->fs.cfa_reg == stack_pointer_rtx);
11056 else
11058 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11059 rtx r10 = NULL;
11060 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11061 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11062 bool eax_live = ix86_eax_live_at_start_p ();
11063 bool r10_live = false;
11065 if (TARGET_64BIT)
11066 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11068 if (eax_live)
11070 insn = emit_insn (gen_push (eax));
11071 allocate -= UNITS_PER_WORD;
11072 /* Note that SEH directives need to continue tracking the stack
11073 pointer even after the frame pointer has been set up. */
11074 if (sp_is_cfa_reg || TARGET_SEH)
11076 if (sp_is_cfa_reg)
11077 m->fs.cfa_offset += UNITS_PER_WORD;
11078 RTX_FRAME_RELATED_P (insn) = 1;
11082 if (r10_live)
11084 r10 = gen_rtx_REG (Pmode, R10_REG);
11085 insn = emit_insn (gen_push (r10));
11086 allocate -= UNITS_PER_WORD;
11087 if (sp_is_cfa_reg || TARGET_SEH)
11089 if (sp_is_cfa_reg)
11090 m->fs.cfa_offset += UNITS_PER_WORD;
11091 RTX_FRAME_RELATED_P (insn) = 1;
11095 emit_move_insn (eax, GEN_INT (allocate));
11096 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11098 /* Use the fact that AX still contains ALLOCATE. */
11099 adjust_stack_insn = (Pmode == DImode
11100 ? gen_pro_epilogue_adjust_stack_di_sub
11101 : gen_pro_epilogue_adjust_stack_si_sub);
11103 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11104 stack_pointer_rtx, eax));
11106 if (sp_is_cfa_reg || TARGET_SEH)
11108 if (sp_is_cfa_reg)
11109 m->fs.cfa_offset += allocate;
11110 RTX_FRAME_RELATED_P (insn) = 1;
11111 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11112 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11113 plus_constant (Pmode, stack_pointer_rtx,
11114 -allocate)));
11116 m->fs.sp_offset += allocate;
11118 /* Use stack_pointer_rtx for relative addressing so that code
11119 works for realigned stack, too. */
11120 if (r10_live && eax_live)
11122 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11123 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11124 gen_frame_mem (word_mode, t));
11125 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11126 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11127 gen_frame_mem (word_mode, t));
11129 else if (eax_live || r10_live)
11131 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11132 emit_move_insn (gen_rtx_REG (word_mode,
11133 (eax_live ? AX_REG : R10_REG)),
11134 gen_frame_mem (word_mode, t));
11137 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11139 /* If we havn't already set up the frame pointer, do so now. */
11140 if (frame_pointer_needed && !m->fs.fp_valid)
11142 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11143 GEN_INT (frame.stack_pointer_offset
11144 - frame.hard_frame_pointer_offset));
11145 insn = emit_insn (insn);
11146 RTX_FRAME_RELATED_P (insn) = 1;
11147 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11149 if (m->fs.cfa_reg == stack_pointer_rtx)
11150 m->fs.cfa_reg = hard_frame_pointer_rtx;
11151 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11152 m->fs.fp_valid = true;
11155 if (!int_registers_saved)
11156 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11157 if (!sse_registers_saved)
11158 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11160 pic_reg_used = false;
11161 /* We don't use pic-register for pe-coff target. */
11162 if (pic_offset_table_rtx
11163 && !TARGET_PECOFF
11164 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11165 || crtl->profile))
11167 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11169 if (alt_pic_reg_used != INVALID_REGNUM)
11170 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11172 pic_reg_used = true;
11175 if (pic_reg_used)
11177 if (TARGET_64BIT)
11179 if (ix86_cmodel == CM_LARGE_PIC)
11181 rtx label, tmp_reg;
11183 gcc_assert (Pmode == DImode);
11184 label = gen_label_rtx ();
11185 emit_label (label);
11186 LABEL_PRESERVE_P (label) = 1;
11187 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11188 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11189 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11190 label));
11191 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11192 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11193 pic_offset_table_rtx, tmp_reg));
11195 else
11196 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11198 else
11200 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11201 RTX_FRAME_RELATED_P (insn) = 1;
11202 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11206 /* In the pic_reg_used case, make sure that the got load isn't deleted
11207 when mcount needs it. Blockage to avoid call movement across mcount
11208 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11209 note. */
11210 if (crtl->profile && !flag_fentry && pic_reg_used)
11211 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11213 if (crtl->drap_reg && !crtl->stack_realign_needed)
11215 /* vDRAP is setup but after reload it turns out stack realign
11216 isn't necessary, here we will emit prologue to setup DRAP
11217 without stack realign adjustment */
11218 t = choose_baseaddr (0);
11219 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11222 /* Prevent instructions from being scheduled into register save push
11223 sequence when access to the redzone area is done through frame pointer.
11224 The offset between the frame pointer and the stack pointer is calculated
11225 relative to the value of the stack pointer at the end of the function
11226 prologue, and moving instructions that access redzone area via frame
11227 pointer inside push sequence violates this assumption. */
11228 if (frame_pointer_needed && frame.red_zone_size)
11229 emit_insn (gen_memory_blockage ());
11231 /* Emit cld instruction if stringops are used in the function. */
11232 if (TARGET_CLD && ix86_current_function_needs_cld)
11233 emit_insn (gen_cld ());
11235 /* SEH requires that the prologue end within 256 bytes of the start of
11236 the function. Prevent instruction schedules that would extend that.
11237 Further, prevent alloca modifications to the stack pointer from being
11238 combined with prologue modifications. */
11239 if (TARGET_SEH)
11240 emit_insn (gen_prologue_use (stack_pointer_rtx));
11243 /* Emit code to restore REG using a POP insn. */
11245 static void
11246 ix86_emit_restore_reg_using_pop (rtx reg)
11248 struct machine_function *m = cfun->machine;
11249 rtx insn = emit_insn (gen_pop (reg));
11251 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11252 m->fs.sp_offset -= UNITS_PER_WORD;
11254 if (m->fs.cfa_reg == crtl->drap_reg
11255 && REGNO (reg) == REGNO (crtl->drap_reg))
11257 /* Previously we'd represented the CFA as an expression
11258 like *(%ebp - 8). We've just popped that value from
11259 the stack, which means we need to reset the CFA to
11260 the drap register. This will remain until we restore
11261 the stack pointer. */
11262 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11263 RTX_FRAME_RELATED_P (insn) = 1;
11265 /* This means that the DRAP register is valid for addressing too. */
11266 m->fs.drap_valid = true;
11267 return;
11270 if (m->fs.cfa_reg == stack_pointer_rtx)
11272 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11273 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11274 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11275 RTX_FRAME_RELATED_P (insn) = 1;
11277 m->fs.cfa_offset -= UNITS_PER_WORD;
11280 /* When the frame pointer is the CFA, and we pop it, we are
11281 swapping back to the stack pointer as the CFA. This happens
11282 for stack frames that don't allocate other data, so we assume
11283 the stack pointer is now pointing at the return address, i.e.
11284 the function entry state, which makes the offset be 1 word. */
11285 if (reg == hard_frame_pointer_rtx)
11287 m->fs.fp_valid = false;
11288 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11290 m->fs.cfa_reg = stack_pointer_rtx;
11291 m->fs.cfa_offset -= UNITS_PER_WORD;
11293 add_reg_note (insn, REG_CFA_DEF_CFA,
11294 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11295 GEN_INT (m->fs.cfa_offset)));
11296 RTX_FRAME_RELATED_P (insn) = 1;
11301 /* Emit code to restore saved registers using POP insns. */
11303 static void
11304 ix86_emit_restore_regs_using_pop (void)
11306 unsigned int regno;
11308 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11309 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11310 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11313 /* Emit code and notes for the LEAVE instruction. */
11315 static void
11316 ix86_emit_leave (void)
11318 struct machine_function *m = cfun->machine;
11319 rtx insn = emit_insn (ix86_gen_leave ());
11321 ix86_add_queued_cfa_restore_notes (insn);
11323 gcc_assert (m->fs.fp_valid);
11324 m->fs.sp_valid = true;
11325 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11326 m->fs.fp_valid = false;
11328 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11330 m->fs.cfa_reg = stack_pointer_rtx;
11331 m->fs.cfa_offset = m->fs.sp_offset;
11333 add_reg_note (insn, REG_CFA_DEF_CFA,
11334 plus_constant (Pmode, stack_pointer_rtx,
11335 m->fs.sp_offset));
11336 RTX_FRAME_RELATED_P (insn) = 1;
11338 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11339 m->fs.fp_offset);
11342 /* Emit code to restore saved registers using MOV insns.
11343 First register is restored from CFA - CFA_OFFSET. */
11344 static void
11345 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11346 bool maybe_eh_return)
11348 struct machine_function *m = cfun->machine;
11349 unsigned int regno;
11351 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11352 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11354 rtx reg = gen_rtx_REG (word_mode, regno);
11355 rtx insn, mem;
11357 mem = choose_baseaddr (cfa_offset);
11358 mem = gen_frame_mem (word_mode, mem);
11359 insn = emit_move_insn (reg, mem);
11361 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11363 /* Previously we'd represented the CFA as an expression
11364 like *(%ebp - 8). We've just popped that value from
11365 the stack, which means we need to reset the CFA to
11366 the drap register. This will remain until we restore
11367 the stack pointer. */
11368 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11369 RTX_FRAME_RELATED_P (insn) = 1;
11371 /* This means that the DRAP register is valid for addressing. */
11372 m->fs.drap_valid = true;
11374 else
11375 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11377 cfa_offset -= UNITS_PER_WORD;
11381 /* Emit code to restore saved registers using MOV insns.
11382 First register is restored from CFA - CFA_OFFSET. */
11383 static void
11384 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11385 bool maybe_eh_return)
11387 unsigned int regno;
11389 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11390 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11392 rtx reg = gen_rtx_REG (V4SFmode, regno);
11393 rtx mem;
11395 mem = choose_baseaddr (cfa_offset);
11396 mem = gen_rtx_MEM (V4SFmode, mem);
11397 set_mem_align (mem, 128);
11398 emit_move_insn (reg, mem);
11400 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11402 cfa_offset -= 16;
11406 /* Restore function stack, frame, and registers. */
11408 void
11409 ix86_expand_epilogue (int style)
11411 struct machine_function *m = cfun->machine;
11412 struct machine_frame_state frame_state_save = m->fs;
11413 struct ix86_frame frame;
11414 bool restore_regs_via_mov;
11415 bool using_drap;
11417 ix86_finalize_stack_realign_flags ();
11418 ix86_compute_frame_layout (&frame);
11420 m->fs.sp_valid = (!frame_pointer_needed
11421 || (crtl->sp_is_unchanging
11422 && !stack_realign_fp));
11423 gcc_assert (!m->fs.sp_valid
11424 || m->fs.sp_offset == frame.stack_pointer_offset);
11426 /* The FP must be valid if the frame pointer is present. */
11427 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11428 gcc_assert (!m->fs.fp_valid
11429 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11431 /* We must have *some* valid pointer to the stack frame. */
11432 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11434 /* The DRAP is never valid at this point. */
11435 gcc_assert (!m->fs.drap_valid);
11437 /* See the comment about red zone and frame
11438 pointer usage in ix86_expand_prologue. */
11439 if (frame_pointer_needed && frame.red_zone_size)
11440 emit_insn (gen_memory_blockage ());
11442 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11443 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11445 /* Determine the CFA offset of the end of the red-zone. */
11446 m->fs.red_zone_offset = 0;
11447 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11449 /* The red-zone begins below the return address. */
11450 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11452 /* When the register save area is in the aligned portion of
11453 the stack, determine the maximum runtime displacement that
11454 matches up with the aligned frame. */
11455 if (stack_realign_drap)
11456 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11457 + UNITS_PER_WORD);
11460 /* Special care must be taken for the normal return case of a function
11461 using eh_return: the eax and edx registers are marked as saved, but
11462 not restored along this path. Adjust the save location to match. */
11463 if (crtl->calls_eh_return && style != 2)
11464 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11466 /* EH_RETURN requires the use of moves to function properly. */
11467 if (crtl->calls_eh_return)
11468 restore_regs_via_mov = true;
11469 /* SEH requires the use of pops to identify the epilogue. */
11470 else if (TARGET_SEH)
11471 restore_regs_via_mov = false;
11472 /* If we're only restoring one register and sp is not valid then
11473 using a move instruction to restore the register since it's
11474 less work than reloading sp and popping the register. */
11475 else if (!m->fs.sp_valid && frame.nregs <= 1)
11476 restore_regs_via_mov = true;
11477 else if (TARGET_EPILOGUE_USING_MOVE
11478 && cfun->machine->use_fast_prologue_epilogue
11479 && (frame.nregs > 1
11480 || m->fs.sp_offset != frame.reg_save_offset))
11481 restore_regs_via_mov = true;
11482 else if (frame_pointer_needed
11483 && !frame.nregs
11484 && m->fs.sp_offset != frame.reg_save_offset)
11485 restore_regs_via_mov = true;
11486 else if (frame_pointer_needed
11487 && TARGET_USE_LEAVE
11488 && cfun->machine->use_fast_prologue_epilogue
11489 && frame.nregs == 1)
11490 restore_regs_via_mov = true;
11491 else
11492 restore_regs_via_mov = false;
11494 if (restore_regs_via_mov || frame.nsseregs)
11496 /* Ensure that the entire register save area is addressable via
11497 the stack pointer, if we will restore via sp. */
11498 if (TARGET_64BIT
11499 && m->fs.sp_offset > 0x7fffffff
11500 && !(m->fs.fp_valid || m->fs.drap_valid)
11501 && (frame.nsseregs + frame.nregs) != 0)
11503 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11504 GEN_INT (m->fs.sp_offset
11505 - frame.sse_reg_save_offset),
11506 style,
11507 m->fs.cfa_reg == stack_pointer_rtx);
11511 /* If there are any SSE registers to restore, then we have to do it
11512 via moves, since there's obviously no pop for SSE regs. */
11513 if (frame.nsseregs)
11514 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11515 style == 2);
11517 if (restore_regs_via_mov)
11519 rtx t;
11521 if (frame.nregs)
11522 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11524 /* eh_return epilogues need %ecx added to the stack pointer. */
11525 if (style == 2)
11527 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11529 /* Stack align doesn't work with eh_return. */
11530 gcc_assert (!stack_realign_drap);
11531 /* Neither does regparm nested functions. */
11532 gcc_assert (!ix86_static_chain_on_stack);
11534 if (frame_pointer_needed)
11536 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11537 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11538 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11540 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11541 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11543 /* Note that we use SA as a temporary CFA, as the return
11544 address is at the proper place relative to it. We
11545 pretend this happens at the FP restore insn because
11546 prior to this insn the FP would be stored at the wrong
11547 offset relative to SA, and after this insn we have no
11548 other reasonable register to use for the CFA. We don't
11549 bother resetting the CFA to the SP for the duration of
11550 the return insn. */
11551 add_reg_note (insn, REG_CFA_DEF_CFA,
11552 plus_constant (Pmode, sa, UNITS_PER_WORD));
11553 ix86_add_queued_cfa_restore_notes (insn);
11554 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11555 RTX_FRAME_RELATED_P (insn) = 1;
11557 m->fs.cfa_reg = sa;
11558 m->fs.cfa_offset = UNITS_PER_WORD;
11559 m->fs.fp_valid = false;
11561 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11562 const0_rtx, style, false);
11564 else
11566 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11567 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11568 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11569 ix86_add_queued_cfa_restore_notes (insn);
11571 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11572 if (m->fs.cfa_offset != UNITS_PER_WORD)
11574 m->fs.cfa_offset = UNITS_PER_WORD;
11575 add_reg_note (insn, REG_CFA_DEF_CFA,
11576 plus_constant (Pmode, stack_pointer_rtx,
11577 UNITS_PER_WORD));
11578 RTX_FRAME_RELATED_P (insn) = 1;
11581 m->fs.sp_offset = UNITS_PER_WORD;
11582 m->fs.sp_valid = true;
11585 else
11587 /* SEH requires that the function end with (1) a stack adjustment
11588 if necessary, (2) a sequence of pops, and (3) a return or
11589 jump instruction. Prevent insns from the function body from
11590 being scheduled into this sequence. */
11591 if (TARGET_SEH)
11593 /* Prevent a catch region from being adjacent to the standard
11594 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11595 several other flags that would be interesting to test are
11596 not yet set up. */
11597 if (flag_non_call_exceptions)
11598 emit_insn (gen_nops (const1_rtx));
11599 else
11600 emit_insn (gen_blockage ());
11603 /* First step is to deallocate the stack frame so that we can
11604 pop the registers. Also do it on SEH target for very large
11605 frame as the emitted instructions aren't allowed by the ABI in
11606 epilogues. */
11607 if (!m->fs.sp_valid
11608 || (TARGET_SEH
11609 && (m->fs.sp_offset - frame.reg_save_offset
11610 >= SEH_MAX_FRAME_SIZE)))
11612 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11613 GEN_INT (m->fs.fp_offset
11614 - frame.reg_save_offset),
11615 style, false);
11617 else if (m->fs.sp_offset != frame.reg_save_offset)
11619 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11620 GEN_INT (m->fs.sp_offset
11621 - frame.reg_save_offset),
11622 style,
11623 m->fs.cfa_reg == stack_pointer_rtx);
11626 ix86_emit_restore_regs_using_pop ();
11629 /* If we used a stack pointer and haven't already got rid of it,
11630 then do so now. */
11631 if (m->fs.fp_valid)
11633 /* If the stack pointer is valid and pointing at the frame
11634 pointer store address, then we only need a pop. */
11635 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11636 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11637 /* Leave results in shorter dependency chains on CPUs that are
11638 able to grok it fast. */
11639 else if (TARGET_USE_LEAVE
11640 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11641 || !cfun->machine->use_fast_prologue_epilogue)
11642 ix86_emit_leave ();
11643 else
11645 pro_epilogue_adjust_stack (stack_pointer_rtx,
11646 hard_frame_pointer_rtx,
11647 const0_rtx, style, !using_drap);
11648 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11652 if (using_drap)
11654 int param_ptr_offset = UNITS_PER_WORD;
11655 rtx insn;
11657 gcc_assert (stack_realign_drap);
11659 if (ix86_static_chain_on_stack)
11660 param_ptr_offset += UNITS_PER_WORD;
11661 if (!call_used_regs[REGNO (crtl->drap_reg)])
11662 param_ptr_offset += UNITS_PER_WORD;
11664 insn = emit_insn (gen_rtx_SET
11665 (VOIDmode, stack_pointer_rtx,
11666 gen_rtx_PLUS (Pmode,
11667 crtl->drap_reg,
11668 GEN_INT (-param_ptr_offset))));
11669 m->fs.cfa_reg = stack_pointer_rtx;
11670 m->fs.cfa_offset = param_ptr_offset;
11671 m->fs.sp_offset = param_ptr_offset;
11672 m->fs.realigned = false;
11674 add_reg_note (insn, REG_CFA_DEF_CFA,
11675 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11676 GEN_INT (param_ptr_offset)));
11677 RTX_FRAME_RELATED_P (insn) = 1;
11679 if (!call_used_regs[REGNO (crtl->drap_reg)])
11680 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11683 /* At this point the stack pointer must be valid, and we must have
11684 restored all of the registers. We may not have deallocated the
11685 entire stack frame. We've delayed this until now because it may
11686 be possible to merge the local stack deallocation with the
11687 deallocation forced by ix86_static_chain_on_stack. */
11688 gcc_assert (m->fs.sp_valid);
11689 gcc_assert (!m->fs.fp_valid);
11690 gcc_assert (!m->fs.realigned);
11691 if (m->fs.sp_offset != UNITS_PER_WORD)
11693 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11694 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11695 style, true);
11697 else
11698 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11700 /* Sibcall epilogues don't want a return instruction. */
11701 if (style == 0)
11703 m->fs = frame_state_save;
11704 return;
11707 if (crtl->args.pops_args && crtl->args.size)
11709 rtx popc = GEN_INT (crtl->args.pops_args);
11711 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11712 address, do explicit add, and jump indirectly to the caller. */
11714 if (crtl->args.pops_args >= 65536)
11716 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11717 rtx insn;
11719 /* There is no "pascal" calling convention in any 64bit ABI. */
11720 gcc_assert (!TARGET_64BIT);
11722 insn = emit_insn (gen_pop (ecx));
11723 m->fs.cfa_offset -= UNITS_PER_WORD;
11724 m->fs.sp_offset -= UNITS_PER_WORD;
11726 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11727 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11728 add_reg_note (insn, REG_CFA_REGISTER,
11729 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11730 RTX_FRAME_RELATED_P (insn) = 1;
11732 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11733 popc, -1, true);
11734 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11736 else
11737 emit_jump_insn (gen_simple_return_pop_internal (popc));
11739 else
11740 emit_jump_insn (gen_simple_return_internal ());
11742 /* Restore the state back to the state from the prologue,
11743 so that it's correct for the next epilogue. */
11744 m->fs = frame_state_save;
11747 /* Reset from the function's potential modifications. */
11749 static void
11750 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11751 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11753 if (pic_offset_table_rtx)
11754 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11755 #if TARGET_MACHO
11756 /* Mach-O doesn't support labels at the end of objects, so if
11757 it looks like we might want one, insert a NOP. */
11759 rtx insn = get_last_insn ();
11760 rtx deleted_debug_label = NULL_RTX;
11761 while (insn
11762 && NOTE_P (insn)
11763 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11765 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11766 notes only, instead set their CODE_LABEL_NUMBER to -1,
11767 otherwise there would be code generation differences
11768 in between -g and -g0. */
11769 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11770 deleted_debug_label = insn;
11771 insn = PREV_INSN (insn);
11773 if (insn
11774 && (LABEL_P (insn)
11775 || (NOTE_P (insn)
11776 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11777 fputs ("\tnop\n", file);
11778 else if (deleted_debug_label)
11779 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11780 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11781 CODE_LABEL_NUMBER (insn) = -1;
11783 #endif
11787 /* Return a scratch register to use in the split stack prologue. The
11788 split stack prologue is used for -fsplit-stack. It is the first
11789 instructions in the function, even before the regular prologue.
11790 The scratch register can be any caller-saved register which is not
11791 used for parameters or for the static chain. */
11793 static unsigned int
11794 split_stack_prologue_scratch_regno (void)
11796 if (TARGET_64BIT)
11797 return R11_REG;
11798 else
11800 bool is_fastcall, is_thiscall;
11801 int regparm;
11803 is_fastcall = (lookup_attribute ("fastcall",
11804 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11805 != NULL);
11806 is_thiscall = (lookup_attribute ("thiscall",
11807 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11808 != NULL);
11809 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11811 if (is_fastcall)
11813 if (DECL_STATIC_CHAIN (cfun->decl))
11815 sorry ("-fsplit-stack does not support fastcall with "
11816 "nested function");
11817 return INVALID_REGNUM;
11819 return AX_REG;
11821 else if (is_thiscall)
11823 if (!DECL_STATIC_CHAIN (cfun->decl))
11824 return DX_REG;
11825 return AX_REG;
11827 else if (regparm < 3)
11829 if (!DECL_STATIC_CHAIN (cfun->decl))
11830 return CX_REG;
11831 else
11833 if (regparm >= 2)
11835 sorry ("-fsplit-stack does not support 2 register "
11836 " parameters for a nested function");
11837 return INVALID_REGNUM;
11839 return DX_REG;
11842 else
11844 /* FIXME: We could make this work by pushing a register
11845 around the addition and comparison. */
11846 sorry ("-fsplit-stack does not support 3 register parameters");
11847 return INVALID_REGNUM;
11852 /* A SYMBOL_REF for the function which allocates new stackspace for
11853 -fsplit-stack. */
11855 static GTY(()) rtx split_stack_fn;
11857 /* A SYMBOL_REF for the more stack function when using the large
11858 model. */
11860 static GTY(()) rtx split_stack_fn_large;
11862 /* Handle -fsplit-stack. These are the first instructions in the
11863 function, even before the regular prologue. */
11865 void
11866 ix86_expand_split_stack_prologue (void)
11868 struct ix86_frame frame;
11869 HOST_WIDE_INT allocate;
11870 unsigned HOST_WIDE_INT args_size;
11871 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11872 rtx scratch_reg = NULL_RTX;
11873 rtx varargs_label = NULL_RTX;
11874 rtx fn;
11876 gcc_assert (flag_split_stack && reload_completed);
11878 ix86_finalize_stack_realign_flags ();
11879 ix86_compute_frame_layout (&frame);
11880 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11882 /* This is the label we will branch to if we have enough stack
11883 space. We expect the basic block reordering pass to reverse this
11884 branch if optimizing, so that we branch in the unlikely case. */
11885 label = gen_label_rtx ();
11887 /* We need to compare the stack pointer minus the frame size with
11888 the stack boundary in the TCB. The stack boundary always gives
11889 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11890 can compare directly. Otherwise we need to do an addition. */
11892 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11893 UNSPEC_STACK_CHECK);
11894 limit = gen_rtx_CONST (Pmode, limit);
11895 limit = gen_rtx_MEM (Pmode, limit);
11896 if (allocate < SPLIT_STACK_AVAILABLE)
11897 current = stack_pointer_rtx;
11898 else
11900 unsigned int scratch_regno;
11901 rtx offset;
11903 /* We need a scratch register to hold the stack pointer minus
11904 the required frame size. Since this is the very start of the
11905 function, the scratch register can be any caller-saved
11906 register which is not used for parameters. */
11907 offset = GEN_INT (- allocate);
11908 scratch_regno = split_stack_prologue_scratch_regno ();
11909 if (scratch_regno == INVALID_REGNUM)
11910 return;
11911 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11912 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11914 /* We don't use ix86_gen_add3 in this case because it will
11915 want to split to lea, but when not optimizing the insn
11916 will not be split after this point. */
11917 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11918 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11919 offset)));
11921 else
11923 emit_move_insn (scratch_reg, offset);
11924 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11925 stack_pointer_rtx));
11927 current = scratch_reg;
11930 ix86_expand_branch (GEU, current, limit, label);
11931 jump_insn = get_last_insn ();
11932 JUMP_LABEL (jump_insn) = label;
11934 /* Mark the jump as very likely to be taken. */
11935 add_int_reg_note (jump_insn, REG_BR_PROB,
11936 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11938 if (split_stack_fn == NULL_RTX)
11939 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11940 fn = split_stack_fn;
11942 /* Get more stack space. We pass in the desired stack space and the
11943 size of the arguments to copy to the new stack. In 32-bit mode
11944 we push the parameters; __morestack will return on a new stack
11945 anyhow. In 64-bit mode we pass the parameters in r10 and
11946 r11. */
11947 allocate_rtx = GEN_INT (allocate);
11948 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11949 call_fusage = NULL_RTX;
11950 if (TARGET_64BIT)
11952 rtx reg10, reg11;
11954 reg10 = gen_rtx_REG (Pmode, R10_REG);
11955 reg11 = gen_rtx_REG (Pmode, R11_REG);
11957 /* If this function uses a static chain, it will be in %r10.
11958 Preserve it across the call to __morestack. */
11959 if (DECL_STATIC_CHAIN (cfun->decl))
11961 rtx rax;
11963 rax = gen_rtx_REG (word_mode, AX_REG);
11964 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11965 use_reg (&call_fusage, rax);
11968 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11969 && !TARGET_PECOFF)
11971 HOST_WIDE_INT argval;
11973 gcc_assert (Pmode == DImode);
11974 /* When using the large model we need to load the address
11975 into a register, and we've run out of registers. So we
11976 switch to a different calling convention, and we call a
11977 different function: __morestack_large. We pass the
11978 argument size in the upper 32 bits of r10 and pass the
11979 frame size in the lower 32 bits. */
11980 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11981 gcc_assert ((args_size & 0xffffffff) == args_size);
11983 if (split_stack_fn_large == NULL_RTX)
11984 split_stack_fn_large =
11985 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11987 if (ix86_cmodel == CM_LARGE_PIC)
11989 rtx label, x;
11991 label = gen_label_rtx ();
11992 emit_label (label);
11993 LABEL_PRESERVE_P (label) = 1;
11994 emit_insn (gen_set_rip_rex64 (reg10, label));
11995 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11996 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11997 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11998 UNSPEC_GOT);
11999 x = gen_rtx_CONST (Pmode, x);
12000 emit_move_insn (reg11, x);
12001 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12002 x = gen_const_mem (Pmode, x);
12003 emit_move_insn (reg11, x);
12005 else
12006 emit_move_insn (reg11, split_stack_fn_large);
12008 fn = reg11;
12010 argval = ((args_size << 16) << 16) + allocate;
12011 emit_move_insn (reg10, GEN_INT (argval));
12013 else
12015 emit_move_insn (reg10, allocate_rtx);
12016 emit_move_insn (reg11, GEN_INT (args_size));
12017 use_reg (&call_fusage, reg11);
12020 use_reg (&call_fusage, reg10);
12022 else
12024 emit_insn (gen_push (GEN_INT (args_size)));
12025 emit_insn (gen_push (allocate_rtx));
12027 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12028 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12029 NULL_RTX, false);
12030 add_function_usage_to (call_insn, call_fusage);
12032 /* In order to make call/return prediction work right, we now need
12033 to execute a return instruction. See
12034 libgcc/config/i386/morestack.S for the details on how this works.
12036 For flow purposes gcc must not see this as a return
12037 instruction--we need control flow to continue at the subsequent
12038 label. Therefore, we use an unspec. */
12039 gcc_assert (crtl->args.pops_args < 65536);
12040 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12042 /* If we are in 64-bit mode and this function uses a static chain,
12043 we saved %r10 in %rax before calling _morestack. */
12044 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12045 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12046 gen_rtx_REG (word_mode, AX_REG));
12048 /* If this function calls va_start, we need to store a pointer to
12049 the arguments on the old stack, because they may not have been
12050 all copied to the new stack. At this point the old stack can be
12051 found at the frame pointer value used by __morestack, because
12052 __morestack has set that up before calling back to us. Here we
12053 store that pointer in a scratch register, and in
12054 ix86_expand_prologue we store the scratch register in a stack
12055 slot. */
12056 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12058 unsigned int scratch_regno;
12059 rtx frame_reg;
12060 int words;
12062 scratch_regno = split_stack_prologue_scratch_regno ();
12063 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12064 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12066 /* 64-bit:
12067 fp -> old fp value
12068 return address within this function
12069 return address of caller of this function
12070 stack arguments
12071 So we add three words to get to the stack arguments.
12073 32-bit:
12074 fp -> old fp value
12075 return address within this function
12076 first argument to __morestack
12077 second argument to __morestack
12078 return address of caller of this function
12079 stack arguments
12080 So we add five words to get to the stack arguments.
12082 words = TARGET_64BIT ? 3 : 5;
12083 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12084 gen_rtx_PLUS (Pmode, frame_reg,
12085 GEN_INT (words * UNITS_PER_WORD))));
12087 varargs_label = gen_label_rtx ();
12088 emit_jump_insn (gen_jump (varargs_label));
12089 JUMP_LABEL (get_last_insn ()) = varargs_label;
12091 emit_barrier ();
12094 emit_label (label);
12095 LABEL_NUSES (label) = 1;
12097 /* If this function calls va_start, we now have to set the scratch
12098 register for the case where we do not call __morestack. In this
12099 case we need to set it based on the stack pointer. */
12100 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12102 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12103 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12104 GEN_INT (UNITS_PER_WORD))));
12106 emit_label (varargs_label);
12107 LABEL_NUSES (varargs_label) = 1;
12111 /* We may have to tell the dataflow pass that the split stack prologue
12112 is initializing a scratch register. */
12114 static void
12115 ix86_live_on_entry (bitmap regs)
12117 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12119 gcc_assert (flag_split_stack);
12120 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12124 /* Extract the parts of an RTL expression that is a valid memory address
12125 for an instruction. Return 0 if the structure of the address is
12126 grossly off. Return -1 if the address contains ASHIFT, so it is not
12127 strictly valid, but still used for computing length of lea instruction. */
12130 ix86_decompose_address (rtx addr, struct ix86_address *out)
12132 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12133 rtx base_reg, index_reg;
12134 HOST_WIDE_INT scale = 1;
12135 rtx scale_rtx = NULL_RTX;
12136 rtx tmp;
12137 int retval = 1;
12138 enum ix86_address_seg seg = SEG_DEFAULT;
12140 /* Allow zero-extended SImode addresses,
12141 they will be emitted with addr32 prefix. */
12142 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12144 if (GET_CODE (addr) == ZERO_EXTEND
12145 && GET_MODE (XEXP (addr, 0)) == SImode)
12147 addr = XEXP (addr, 0);
12148 if (CONST_INT_P (addr))
12149 return 0;
12151 else if (GET_CODE (addr) == AND
12152 && const_32bit_mask (XEXP (addr, 1), DImode))
12154 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12155 if (addr == NULL_RTX)
12156 return 0;
12158 if (CONST_INT_P (addr))
12159 return 0;
12163 /* Allow SImode subregs of DImode addresses,
12164 they will be emitted with addr32 prefix. */
12165 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12167 if (GET_CODE (addr) == SUBREG
12168 && GET_MODE (SUBREG_REG (addr)) == DImode)
12170 addr = SUBREG_REG (addr);
12171 if (CONST_INT_P (addr))
12172 return 0;
12176 if (REG_P (addr))
12177 base = addr;
12178 else if (GET_CODE (addr) == SUBREG)
12180 if (REG_P (SUBREG_REG (addr)))
12181 base = addr;
12182 else
12183 return 0;
12185 else if (GET_CODE (addr) == PLUS)
12187 rtx addends[4], op;
12188 int n = 0, i;
12190 op = addr;
12193 if (n >= 4)
12194 return 0;
12195 addends[n++] = XEXP (op, 1);
12196 op = XEXP (op, 0);
12198 while (GET_CODE (op) == PLUS);
12199 if (n >= 4)
12200 return 0;
12201 addends[n] = op;
12203 for (i = n; i >= 0; --i)
12205 op = addends[i];
12206 switch (GET_CODE (op))
12208 case MULT:
12209 if (index)
12210 return 0;
12211 index = XEXP (op, 0);
12212 scale_rtx = XEXP (op, 1);
12213 break;
12215 case ASHIFT:
12216 if (index)
12217 return 0;
12218 index = XEXP (op, 0);
12219 tmp = XEXP (op, 1);
12220 if (!CONST_INT_P (tmp))
12221 return 0;
12222 scale = INTVAL (tmp);
12223 if ((unsigned HOST_WIDE_INT) scale > 3)
12224 return 0;
12225 scale = 1 << scale;
12226 break;
12228 case ZERO_EXTEND:
12229 op = XEXP (op, 0);
12230 if (GET_CODE (op) != UNSPEC)
12231 return 0;
12232 /* FALLTHRU */
12234 case UNSPEC:
12235 if (XINT (op, 1) == UNSPEC_TP
12236 && TARGET_TLS_DIRECT_SEG_REFS
12237 && seg == SEG_DEFAULT)
12238 seg = DEFAULT_TLS_SEG_REG;
12239 else
12240 return 0;
12241 break;
12243 case SUBREG:
12244 if (!REG_P (SUBREG_REG (op)))
12245 return 0;
12246 /* FALLTHRU */
12248 case REG:
12249 if (!base)
12250 base = op;
12251 else if (!index)
12252 index = op;
12253 else
12254 return 0;
12255 break;
12257 case CONST:
12258 case CONST_INT:
12259 case SYMBOL_REF:
12260 case LABEL_REF:
12261 if (disp)
12262 return 0;
12263 disp = op;
12264 break;
12266 default:
12267 return 0;
12271 else if (GET_CODE (addr) == MULT)
12273 index = XEXP (addr, 0); /* index*scale */
12274 scale_rtx = XEXP (addr, 1);
12276 else if (GET_CODE (addr) == ASHIFT)
12278 /* We're called for lea too, which implements ashift on occasion. */
12279 index = XEXP (addr, 0);
12280 tmp = XEXP (addr, 1);
12281 if (!CONST_INT_P (tmp))
12282 return 0;
12283 scale = INTVAL (tmp);
12284 if ((unsigned HOST_WIDE_INT) scale > 3)
12285 return 0;
12286 scale = 1 << scale;
12287 retval = -1;
12289 else
12290 disp = addr; /* displacement */
12292 if (index)
12294 if (REG_P (index))
12296 else if (GET_CODE (index) == SUBREG
12297 && REG_P (SUBREG_REG (index)))
12299 else
12300 return 0;
12303 /* Extract the integral value of scale. */
12304 if (scale_rtx)
12306 if (!CONST_INT_P (scale_rtx))
12307 return 0;
12308 scale = INTVAL (scale_rtx);
12311 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12312 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12314 /* Avoid useless 0 displacement. */
12315 if (disp == const0_rtx && (base || index))
12316 disp = NULL_RTX;
12318 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12319 if (base_reg && index_reg && scale == 1
12320 && (index_reg == arg_pointer_rtx
12321 || index_reg == frame_pointer_rtx
12322 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12324 rtx tmp;
12325 tmp = base, base = index, index = tmp;
12326 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12329 /* Special case: %ebp cannot be encoded as a base without a displacement.
12330 Similarly %r13. */
12331 if (!disp
12332 && base_reg
12333 && (base_reg == hard_frame_pointer_rtx
12334 || base_reg == frame_pointer_rtx
12335 || base_reg == arg_pointer_rtx
12336 || (REG_P (base_reg)
12337 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12338 || REGNO (base_reg) == R13_REG))))
12339 disp = const0_rtx;
12341 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12342 Avoid this by transforming to [%esi+0].
12343 Reload calls address legitimization without cfun defined, so we need
12344 to test cfun for being non-NULL. */
12345 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12346 && base_reg && !index_reg && !disp
12347 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12348 disp = const0_rtx;
12350 /* Special case: encode reg+reg instead of reg*2. */
12351 if (!base && index && scale == 2)
12352 base = index, base_reg = index_reg, scale = 1;
12354 /* Special case: scaling cannot be encoded without base or displacement. */
12355 if (!base && !disp && index && scale != 1)
12356 disp = const0_rtx;
12358 out->base = base;
12359 out->index = index;
12360 out->disp = disp;
12361 out->scale = scale;
12362 out->seg = seg;
12364 return retval;
12367 /* Return cost of the memory address x.
12368 For i386, it is better to use a complex address than let gcc copy
12369 the address into a reg and make a new pseudo. But not if the address
12370 requires to two regs - that would mean more pseudos with longer
12371 lifetimes. */
12372 static int
12373 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12374 addr_space_t as ATTRIBUTE_UNUSED,
12375 bool speed ATTRIBUTE_UNUSED)
12377 struct ix86_address parts;
12378 int cost = 1;
12379 int ok = ix86_decompose_address (x, &parts);
12381 gcc_assert (ok);
12383 if (parts.base && GET_CODE (parts.base) == SUBREG)
12384 parts.base = SUBREG_REG (parts.base);
12385 if (parts.index && GET_CODE (parts.index) == SUBREG)
12386 parts.index = SUBREG_REG (parts.index);
12388 /* Attempt to minimize number of registers in the address. */
12389 if ((parts.base
12390 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12391 || (parts.index
12392 && (!REG_P (parts.index)
12393 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12394 cost++;
12396 if (parts.base
12397 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12398 && parts.index
12399 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12400 && parts.base != parts.index)
12401 cost++;
12403 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12404 since it's predecode logic can't detect the length of instructions
12405 and it degenerates to vector decoded. Increase cost of such
12406 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12407 to split such addresses or even refuse such addresses at all.
12409 Following addressing modes are affected:
12410 [base+scale*index]
12411 [scale*index+disp]
12412 [base+index]
12414 The first and last case may be avoidable by explicitly coding the zero in
12415 memory address, but I don't have AMD-K6 machine handy to check this
12416 theory. */
12418 if (TARGET_K6
12419 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12420 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12421 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12422 cost += 10;
12424 return cost;
12427 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12428 this is used for to form addresses to local data when -fPIC is in
12429 use. */
12431 static bool
12432 darwin_local_data_pic (rtx disp)
12434 return (GET_CODE (disp) == UNSPEC
12435 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12438 /* Determine if a given RTX is a valid constant. We already know this
12439 satisfies CONSTANT_P. */
12441 static bool
12442 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12444 switch (GET_CODE (x))
12446 case CONST:
12447 x = XEXP (x, 0);
12449 if (GET_CODE (x) == PLUS)
12451 if (!CONST_INT_P (XEXP (x, 1)))
12452 return false;
12453 x = XEXP (x, 0);
12456 if (TARGET_MACHO && darwin_local_data_pic (x))
12457 return true;
12459 /* Only some unspecs are valid as "constants". */
12460 if (GET_CODE (x) == UNSPEC)
12461 switch (XINT (x, 1))
12463 case UNSPEC_GOT:
12464 case UNSPEC_GOTOFF:
12465 case UNSPEC_PLTOFF:
12466 return TARGET_64BIT;
12467 case UNSPEC_TPOFF:
12468 case UNSPEC_NTPOFF:
12469 x = XVECEXP (x, 0, 0);
12470 return (GET_CODE (x) == SYMBOL_REF
12471 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12472 case UNSPEC_DTPOFF:
12473 x = XVECEXP (x, 0, 0);
12474 return (GET_CODE (x) == SYMBOL_REF
12475 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12476 default:
12477 return false;
12480 /* We must have drilled down to a symbol. */
12481 if (GET_CODE (x) == LABEL_REF)
12482 return true;
12483 if (GET_CODE (x) != SYMBOL_REF)
12484 return false;
12485 /* FALLTHRU */
12487 case SYMBOL_REF:
12488 /* TLS symbols are never valid. */
12489 if (SYMBOL_REF_TLS_MODEL (x))
12490 return false;
12492 /* DLLIMPORT symbols are never valid. */
12493 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12494 && SYMBOL_REF_DLLIMPORT_P (x))
12495 return false;
12497 #if TARGET_MACHO
12498 /* mdynamic-no-pic */
12499 if (MACHO_DYNAMIC_NO_PIC_P)
12500 return machopic_symbol_defined_p (x);
12501 #endif
12502 break;
12504 case CONST_DOUBLE:
12505 if (GET_MODE (x) == TImode
12506 && x != CONST0_RTX (TImode)
12507 && !TARGET_64BIT)
12508 return false;
12509 break;
12511 case CONST_VECTOR:
12512 if (!standard_sse_constant_p (x))
12513 return false;
12515 default:
12516 break;
12519 /* Otherwise we handle everything else in the move patterns. */
12520 return true;
12523 /* Determine if it's legal to put X into the constant pool. This
12524 is not possible for the address of thread-local symbols, which
12525 is checked above. */
12527 static bool
12528 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12530 /* We can always put integral constants and vectors in memory. */
12531 switch (GET_CODE (x))
12533 case CONST_INT:
12534 case CONST_DOUBLE:
12535 case CONST_VECTOR:
12536 return false;
12538 default:
12539 break;
12541 return !ix86_legitimate_constant_p (mode, x);
12544 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12545 otherwise zero. */
12547 static bool
12548 is_imported_p (rtx x)
12550 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12551 || GET_CODE (x) != SYMBOL_REF)
12552 return false;
12554 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12558 /* Nonzero if the constant value X is a legitimate general operand
12559 when generating PIC code. It is given that flag_pic is on and
12560 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12562 bool
12563 legitimate_pic_operand_p (rtx x)
12565 rtx inner;
12567 switch (GET_CODE (x))
12569 case CONST:
12570 inner = XEXP (x, 0);
12571 if (GET_CODE (inner) == PLUS
12572 && CONST_INT_P (XEXP (inner, 1)))
12573 inner = XEXP (inner, 0);
12575 /* Only some unspecs are valid as "constants". */
12576 if (GET_CODE (inner) == UNSPEC)
12577 switch (XINT (inner, 1))
12579 case UNSPEC_GOT:
12580 case UNSPEC_GOTOFF:
12581 case UNSPEC_PLTOFF:
12582 return TARGET_64BIT;
12583 case UNSPEC_TPOFF:
12584 x = XVECEXP (inner, 0, 0);
12585 return (GET_CODE (x) == SYMBOL_REF
12586 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12587 case UNSPEC_MACHOPIC_OFFSET:
12588 return legitimate_pic_address_disp_p (x);
12589 default:
12590 return false;
12592 /* FALLTHRU */
12594 case SYMBOL_REF:
12595 case LABEL_REF:
12596 return legitimate_pic_address_disp_p (x);
12598 default:
12599 return true;
12603 /* Determine if a given CONST RTX is a valid memory displacement
12604 in PIC mode. */
12606 bool
12607 legitimate_pic_address_disp_p (rtx disp)
12609 bool saw_plus;
12611 /* In 64bit mode we can allow direct addresses of symbols and labels
12612 when they are not dynamic symbols. */
12613 if (TARGET_64BIT)
12615 rtx op0 = disp, op1;
12617 switch (GET_CODE (disp))
12619 case LABEL_REF:
12620 return true;
12622 case CONST:
12623 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12624 break;
12625 op0 = XEXP (XEXP (disp, 0), 0);
12626 op1 = XEXP (XEXP (disp, 0), 1);
12627 if (!CONST_INT_P (op1)
12628 || INTVAL (op1) >= 16*1024*1024
12629 || INTVAL (op1) < -16*1024*1024)
12630 break;
12631 if (GET_CODE (op0) == LABEL_REF)
12632 return true;
12633 if (GET_CODE (op0) == CONST
12634 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12635 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12636 return true;
12637 if (GET_CODE (op0) == UNSPEC
12638 && XINT (op0, 1) == UNSPEC_PCREL)
12639 return true;
12640 if (GET_CODE (op0) != SYMBOL_REF)
12641 break;
12642 /* FALLTHRU */
12644 case SYMBOL_REF:
12645 /* TLS references should always be enclosed in UNSPEC.
12646 The dllimported symbol needs always to be resolved. */
12647 if (SYMBOL_REF_TLS_MODEL (op0)
12648 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12649 return false;
12651 if (TARGET_PECOFF)
12653 if (is_imported_p (op0))
12654 return true;
12656 if (SYMBOL_REF_FAR_ADDR_P (op0)
12657 || !SYMBOL_REF_LOCAL_P (op0))
12658 break;
12660 /* Function-symbols need to be resolved only for
12661 large-model.
12662 For the small-model we don't need to resolve anything
12663 here. */
12664 if ((ix86_cmodel != CM_LARGE_PIC
12665 && SYMBOL_REF_FUNCTION_P (op0))
12666 || ix86_cmodel == CM_SMALL_PIC)
12667 return true;
12668 /* Non-external symbols don't need to be resolved for
12669 large, and medium-model. */
12670 if ((ix86_cmodel == CM_LARGE_PIC
12671 || ix86_cmodel == CM_MEDIUM_PIC)
12672 && !SYMBOL_REF_EXTERNAL_P (op0))
12673 return true;
12675 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12676 && SYMBOL_REF_LOCAL_P (op0)
12677 && ix86_cmodel != CM_LARGE_PIC)
12678 return true;
12679 break;
12681 default:
12682 break;
12685 if (GET_CODE (disp) != CONST)
12686 return false;
12687 disp = XEXP (disp, 0);
12689 if (TARGET_64BIT)
12691 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12692 of GOT tables. We should not need these anyway. */
12693 if (GET_CODE (disp) != UNSPEC
12694 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12695 && XINT (disp, 1) != UNSPEC_GOTOFF
12696 && XINT (disp, 1) != UNSPEC_PCREL
12697 && XINT (disp, 1) != UNSPEC_PLTOFF))
12698 return false;
12700 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12701 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12702 return false;
12703 return true;
12706 saw_plus = false;
12707 if (GET_CODE (disp) == PLUS)
12709 if (!CONST_INT_P (XEXP (disp, 1)))
12710 return false;
12711 disp = XEXP (disp, 0);
12712 saw_plus = true;
12715 if (TARGET_MACHO && darwin_local_data_pic (disp))
12716 return true;
12718 if (GET_CODE (disp) != UNSPEC)
12719 return false;
12721 switch (XINT (disp, 1))
12723 case UNSPEC_GOT:
12724 if (saw_plus)
12725 return false;
12726 /* We need to check for both symbols and labels because VxWorks loads
12727 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12728 details. */
12729 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12730 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12731 case UNSPEC_GOTOFF:
12732 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12733 While ABI specify also 32bit relocation but we don't produce it in
12734 small PIC model at all. */
12735 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12736 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12737 && !TARGET_64BIT)
12738 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12739 return false;
12740 case UNSPEC_GOTTPOFF:
12741 case UNSPEC_GOTNTPOFF:
12742 case UNSPEC_INDNTPOFF:
12743 if (saw_plus)
12744 return false;
12745 disp = XVECEXP (disp, 0, 0);
12746 return (GET_CODE (disp) == SYMBOL_REF
12747 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12748 case UNSPEC_NTPOFF:
12749 disp = XVECEXP (disp, 0, 0);
12750 return (GET_CODE (disp) == SYMBOL_REF
12751 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12752 case UNSPEC_DTPOFF:
12753 disp = XVECEXP (disp, 0, 0);
12754 return (GET_CODE (disp) == SYMBOL_REF
12755 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12758 return false;
12761 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12762 replace the input X, or the original X if no replacement is called for.
12763 The output parameter *WIN is 1 if the calling macro should goto WIN,
12764 0 if it should not. */
12766 bool
12767 ix86_legitimize_reload_address (rtx x,
12768 enum machine_mode mode ATTRIBUTE_UNUSED,
12769 int opnum, int type,
12770 int ind_levels ATTRIBUTE_UNUSED)
12772 /* Reload can generate:
12774 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12775 (reg:DI 97))
12776 (reg:DI 2 cx))
12778 This RTX is rejected from ix86_legitimate_address_p due to
12779 non-strictness of base register 97. Following this rejection,
12780 reload pushes all three components into separate registers,
12781 creating invalid memory address RTX.
12783 Following code reloads only the invalid part of the
12784 memory address RTX. */
12786 if (GET_CODE (x) == PLUS
12787 && REG_P (XEXP (x, 1))
12788 && GET_CODE (XEXP (x, 0)) == PLUS
12789 && REG_P (XEXP (XEXP (x, 0), 1)))
12791 rtx base, index;
12792 bool something_reloaded = false;
12794 base = XEXP (XEXP (x, 0), 1);
12795 if (!REG_OK_FOR_BASE_STRICT_P (base))
12797 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12798 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12799 opnum, (enum reload_type) type);
12800 something_reloaded = true;
12803 index = XEXP (x, 1);
12804 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12806 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12807 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12808 opnum, (enum reload_type) type);
12809 something_reloaded = true;
12812 gcc_assert (something_reloaded);
12813 return true;
12816 return false;
12819 /* Determine if op is suitable RTX for an address register.
12820 Return naked register if a register or a register subreg is
12821 found, otherwise return NULL_RTX. */
12823 static rtx
12824 ix86_validate_address_register (rtx op)
12826 enum machine_mode mode = GET_MODE (op);
12828 /* Only SImode or DImode registers can form the address. */
12829 if (mode != SImode && mode != DImode)
12830 return NULL_RTX;
12832 if (REG_P (op))
12833 return op;
12834 else if (GET_CODE (op) == SUBREG)
12836 rtx reg = SUBREG_REG (op);
12838 if (!REG_P (reg))
12839 return NULL_RTX;
12841 mode = GET_MODE (reg);
12843 /* Don't allow SUBREGs that span more than a word. It can
12844 lead to spill failures when the register is one word out
12845 of a two word structure. */
12846 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12847 return NULL_RTX;
12849 /* Allow only SUBREGs of non-eliminable hard registers. */
12850 if (register_no_elim_operand (reg, mode))
12851 return reg;
12854 /* Op is not a register. */
12855 return NULL_RTX;
12858 /* Recognizes RTL expressions that are valid memory addresses for an
12859 instruction. The MODE argument is the machine mode for the MEM
12860 expression that wants to use this address.
12862 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12863 convert common non-canonical forms to canonical form so that they will
12864 be recognized. */
12866 static bool
12867 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12868 rtx addr, bool strict)
12870 struct ix86_address parts;
12871 rtx base, index, disp;
12872 HOST_WIDE_INT scale;
12873 enum ix86_address_seg seg;
12875 if (ix86_decompose_address (addr, &parts) <= 0)
12876 /* Decomposition failed. */
12877 return false;
12879 base = parts.base;
12880 index = parts.index;
12881 disp = parts.disp;
12882 scale = parts.scale;
12883 seg = parts.seg;
12885 /* Validate base register. */
12886 if (base)
12888 rtx reg = ix86_validate_address_register (base);
12890 if (reg == NULL_RTX)
12891 return false;
12893 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12894 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12895 /* Base is not valid. */
12896 return false;
12899 /* Validate index register. */
12900 if (index)
12902 rtx reg = ix86_validate_address_register (index);
12904 if (reg == NULL_RTX)
12905 return false;
12907 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12908 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12909 /* Index is not valid. */
12910 return false;
12913 /* Index and base should have the same mode. */
12914 if (base && index
12915 && GET_MODE (base) != GET_MODE (index))
12916 return false;
12918 /* Address override works only on the (%reg) part of %fs:(%reg). */
12919 if (seg != SEG_DEFAULT
12920 && ((base && GET_MODE (base) != word_mode)
12921 || (index && GET_MODE (index) != word_mode)))
12922 return false;
12924 /* Validate scale factor. */
12925 if (scale != 1)
12927 if (!index)
12928 /* Scale without index. */
12929 return false;
12931 if (scale != 2 && scale != 4 && scale != 8)
12932 /* Scale is not a valid multiplier. */
12933 return false;
12936 /* Validate displacement. */
12937 if (disp)
12939 if (GET_CODE (disp) == CONST
12940 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12941 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12942 switch (XINT (XEXP (disp, 0), 1))
12944 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12945 used. While ABI specify also 32bit relocations, we don't produce
12946 them at all and use IP relative instead. */
12947 case UNSPEC_GOT:
12948 case UNSPEC_GOTOFF:
12949 gcc_assert (flag_pic);
12950 if (!TARGET_64BIT)
12951 goto is_legitimate_pic;
12953 /* 64bit address unspec. */
12954 return false;
12956 case UNSPEC_GOTPCREL:
12957 case UNSPEC_PCREL:
12958 gcc_assert (flag_pic);
12959 goto is_legitimate_pic;
12961 case UNSPEC_GOTTPOFF:
12962 case UNSPEC_GOTNTPOFF:
12963 case UNSPEC_INDNTPOFF:
12964 case UNSPEC_NTPOFF:
12965 case UNSPEC_DTPOFF:
12966 break;
12968 case UNSPEC_STACK_CHECK:
12969 gcc_assert (flag_split_stack);
12970 break;
12972 default:
12973 /* Invalid address unspec. */
12974 return false;
12977 else if (SYMBOLIC_CONST (disp)
12978 && (flag_pic
12979 || (TARGET_MACHO
12980 #if TARGET_MACHO
12981 && MACHOPIC_INDIRECT
12982 && !machopic_operand_p (disp)
12983 #endif
12987 is_legitimate_pic:
12988 if (TARGET_64BIT && (index || base))
12990 /* foo@dtpoff(%rX) is ok. */
12991 if (GET_CODE (disp) != CONST
12992 || GET_CODE (XEXP (disp, 0)) != PLUS
12993 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12994 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12995 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12996 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12997 /* Non-constant pic memory reference. */
12998 return false;
13000 else if ((!TARGET_MACHO || flag_pic)
13001 && ! legitimate_pic_address_disp_p (disp))
13002 /* Displacement is an invalid pic construct. */
13003 return false;
13004 #if TARGET_MACHO
13005 else if (MACHO_DYNAMIC_NO_PIC_P
13006 && !ix86_legitimate_constant_p (Pmode, disp))
13007 /* displacment must be referenced via non_lazy_pointer */
13008 return false;
13009 #endif
13011 /* This code used to verify that a symbolic pic displacement
13012 includes the pic_offset_table_rtx register.
13014 While this is good idea, unfortunately these constructs may
13015 be created by "adds using lea" optimization for incorrect
13016 code like:
13018 int a;
13019 int foo(int i)
13021 return *(&a+i);
13024 This code is nonsensical, but results in addressing
13025 GOT table with pic_offset_table_rtx base. We can't
13026 just refuse it easily, since it gets matched by
13027 "addsi3" pattern, that later gets split to lea in the
13028 case output register differs from input. While this
13029 can be handled by separate addsi pattern for this case
13030 that never results in lea, this seems to be easier and
13031 correct fix for crash to disable this test. */
13033 else if (GET_CODE (disp) != LABEL_REF
13034 && !CONST_INT_P (disp)
13035 && (GET_CODE (disp) != CONST
13036 || !ix86_legitimate_constant_p (Pmode, disp))
13037 && (GET_CODE (disp) != SYMBOL_REF
13038 || !ix86_legitimate_constant_p (Pmode, disp)))
13039 /* Displacement is not constant. */
13040 return false;
13041 else if (TARGET_64BIT
13042 && !x86_64_immediate_operand (disp, VOIDmode))
13043 /* Displacement is out of range. */
13044 return false;
13045 /* In x32 mode, constant addresses are sign extended to 64bit, so
13046 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13047 else if (TARGET_X32 && !(index || base)
13048 && CONST_INT_P (disp)
13049 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13050 return false;
13053 /* Everything looks valid. */
13054 return true;
13057 /* Determine if a given RTX is a valid constant address. */
13059 bool
13060 constant_address_p (rtx x)
13062 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13065 /* Return a unique alias set for the GOT. */
13067 static alias_set_type
13068 ix86_GOT_alias_set (void)
13070 static alias_set_type set = -1;
13071 if (set == -1)
13072 set = new_alias_set ();
13073 return set;
13076 /* Return a legitimate reference for ORIG (an address) using the
13077 register REG. If REG is 0, a new pseudo is generated.
13079 There are two types of references that must be handled:
13081 1. Global data references must load the address from the GOT, via
13082 the PIC reg. An insn is emitted to do this load, and the reg is
13083 returned.
13085 2. Static data references, constant pool addresses, and code labels
13086 compute the address as an offset from the GOT, whose base is in
13087 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13088 differentiate them from global data objects. The returned
13089 address is the PIC reg + an unspec constant.
13091 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13092 reg also appears in the address. */
13094 static rtx
13095 legitimize_pic_address (rtx orig, rtx reg)
13097 rtx addr = orig;
13098 rtx new_rtx = orig;
13100 #if TARGET_MACHO
13101 if (TARGET_MACHO && !TARGET_64BIT)
13103 if (reg == 0)
13104 reg = gen_reg_rtx (Pmode);
13105 /* Use the generic Mach-O PIC machinery. */
13106 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13108 #endif
13110 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13112 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13113 if (tmp)
13114 return tmp;
13117 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13118 new_rtx = addr;
13119 else if (TARGET_64BIT && !TARGET_PECOFF
13120 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13122 rtx tmpreg;
13123 /* This symbol may be referenced via a displacement from the PIC
13124 base address (@GOTOFF). */
13126 if (reload_in_progress)
13127 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13128 if (GET_CODE (addr) == CONST)
13129 addr = XEXP (addr, 0);
13130 if (GET_CODE (addr) == PLUS)
13132 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13133 UNSPEC_GOTOFF);
13134 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13136 else
13137 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13138 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13139 if (!reg)
13140 tmpreg = gen_reg_rtx (Pmode);
13141 else
13142 tmpreg = reg;
13143 emit_move_insn (tmpreg, new_rtx);
13145 if (reg != 0)
13147 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13148 tmpreg, 1, OPTAB_DIRECT);
13149 new_rtx = reg;
13151 else
13152 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13154 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13156 /* This symbol may be referenced via a displacement from the PIC
13157 base address (@GOTOFF). */
13159 if (reload_in_progress)
13160 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13161 if (GET_CODE (addr) == CONST)
13162 addr = XEXP (addr, 0);
13163 if (GET_CODE (addr) == PLUS)
13165 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13166 UNSPEC_GOTOFF);
13167 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13169 else
13170 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13171 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13172 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13174 if (reg != 0)
13176 emit_move_insn (reg, new_rtx);
13177 new_rtx = reg;
13180 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13181 /* We can't use @GOTOFF for text labels on VxWorks;
13182 see gotoff_operand. */
13183 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13185 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13186 if (tmp)
13187 return tmp;
13189 /* For x64 PE-COFF there is no GOT table. So we use address
13190 directly. */
13191 if (TARGET_64BIT && TARGET_PECOFF)
13193 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13194 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13196 if (reg == 0)
13197 reg = gen_reg_rtx (Pmode);
13198 emit_move_insn (reg, new_rtx);
13199 new_rtx = reg;
13201 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13203 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13204 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13205 new_rtx = gen_const_mem (Pmode, new_rtx);
13206 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13208 if (reg == 0)
13209 reg = gen_reg_rtx (Pmode);
13210 /* Use directly gen_movsi, otherwise the address is loaded
13211 into register for CSE. We don't want to CSE this addresses,
13212 instead we CSE addresses from the GOT table, so skip this. */
13213 emit_insn (gen_movsi (reg, new_rtx));
13214 new_rtx = reg;
13216 else
13218 /* This symbol must be referenced via a load from the
13219 Global Offset Table (@GOT). */
13221 if (reload_in_progress)
13222 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13223 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13224 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13225 if (TARGET_64BIT)
13226 new_rtx = force_reg (Pmode, new_rtx);
13227 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13228 new_rtx = gen_const_mem (Pmode, new_rtx);
13229 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13231 if (reg == 0)
13232 reg = gen_reg_rtx (Pmode);
13233 emit_move_insn (reg, new_rtx);
13234 new_rtx = reg;
13237 else
13239 if (CONST_INT_P (addr)
13240 && !x86_64_immediate_operand (addr, VOIDmode))
13242 if (reg)
13244 emit_move_insn (reg, addr);
13245 new_rtx = reg;
13247 else
13248 new_rtx = force_reg (Pmode, addr);
13250 else if (GET_CODE (addr) == CONST)
13252 addr = XEXP (addr, 0);
13254 /* We must match stuff we generate before. Assume the only
13255 unspecs that can get here are ours. Not that we could do
13256 anything with them anyway.... */
13257 if (GET_CODE (addr) == UNSPEC
13258 || (GET_CODE (addr) == PLUS
13259 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13260 return orig;
13261 gcc_assert (GET_CODE (addr) == PLUS);
13263 if (GET_CODE (addr) == PLUS)
13265 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13267 /* Check first to see if this is a constant offset from a @GOTOFF
13268 symbol reference. */
13269 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13270 && CONST_INT_P (op1))
13272 if (!TARGET_64BIT)
13274 if (reload_in_progress)
13275 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13276 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13277 UNSPEC_GOTOFF);
13278 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13279 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13280 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13282 if (reg != 0)
13284 emit_move_insn (reg, new_rtx);
13285 new_rtx = reg;
13288 else
13290 if (INTVAL (op1) < -16*1024*1024
13291 || INTVAL (op1) >= 16*1024*1024)
13293 if (!x86_64_immediate_operand (op1, Pmode))
13294 op1 = force_reg (Pmode, op1);
13295 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13299 else
13301 rtx base = legitimize_pic_address (op0, reg);
13302 enum machine_mode mode = GET_MODE (base);
13303 new_rtx
13304 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13306 if (CONST_INT_P (new_rtx))
13308 if (INTVAL (new_rtx) < -16*1024*1024
13309 || INTVAL (new_rtx) >= 16*1024*1024)
13311 if (!x86_64_immediate_operand (new_rtx, mode))
13312 new_rtx = force_reg (mode, new_rtx);
13313 new_rtx
13314 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13316 else
13317 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13319 else
13321 if (GET_CODE (new_rtx) == PLUS
13322 && CONSTANT_P (XEXP (new_rtx, 1)))
13324 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13325 new_rtx = XEXP (new_rtx, 1);
13327 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13332 return new_rtx;
13335 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13337 static rtx
13338 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13340 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13342 if (GET_MODE (tp) != tp_mode)
13344 gcc_assert (GET_MODE (tp) == SImode);
13345 gcc_assert (tp_mode == DImode);
13347 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13350 if (to_reg)
13351 tp = copy_to_mode_reg (tp_mode, tp);
13353 return tp;
13356 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13358 static GTY(()) rtx ix86_tls_symbol;
13360 static rtx
13361 ix86_tls_get_addr (void)
13363 if (!ix86_tls_symbol)
13365 const char *sym
13366 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13367 ? "___tls_get_addr" : "__tls_get_addr");
13369 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13372 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13374 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13375 UNSPEC_PLTOFF);
13376 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13377 gen_rtx_CONST (Pmode, unspec));
13380 return ix86_tls_symbol;
13383 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13385 static GTY(()) rtx ix86_tls_module_base_symbol;
13388 ix86_tls_module_base (void)
13390 if (!ix86_tls_module_base_symbol)
13392 ix86_tls_module_base_symbol
13393 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13395 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13396 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13399 return ix86_tls_module_base_symbol;
13402 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13403 false if we expect this to be used for a memory address and true if
13404 we expect to load the address into a register. */
13406 static rtx
13407 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13409 rtx dest, base, off;
13410 rtx pic = NULL_RTX, tp = NULL_RTX;
13411 enum machine_mode tp_mode = Pmode;
13412 int type;
13414 /* Fall back to global dynamic model if tool chain cannot support local
13415 dynamic. */
13416 if (TARGET_SUN_TLS && !TARGET_64BIT
13417 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13418 && model == TLS_MODEL_LOCAL_DYNAMIC)
13419 model = TLS_MODEL_GLOBAL_DYNAMIC;
13421 switch (model)
13423 case TLS_MODEL_GLOBAL_DYNAMIC:
13424 dest = gen_reg_rtx (Pmode);
13426 if (!TARGET_64BIT)
13428 if (flag_pic && !TARGET_PECOFF)
13429 pic = pic_offset_table_rtx;
13430 else
13432 pic = gen_reg_rtx (Pmode);
13433 emit_insn (gen_set_got (pic));
13437 if (TARGET_GNU2_TLS)
13439 if (TARGET_64BIT)
13440 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13441 else
13442 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13444 tp = get_thread_pointer (Pmode, true);
13445 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13447 if (GET_MODE (x) != Pmode)
13448 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13450 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13452 else
13454 rtx caddr = ix86_tls_get_addr ();
13456 if (TARGET_64BIT)
13458 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13459 rtx insns;
13461 start_sequence ();
13462 emit_call_insn
13463 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13464 insns = get_insns ();
13465 end_sequence ();
13467 if (GET_MODE (x) != Pmode)
13468 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13470 RTL_CONST_CALL_P (insns) = 1;
13471 emit_libcall_block (insns, dest, rax, x);
13473 else
13474 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13476 break;
13478 case TLS_MODEL_LOCAL_DYNAMIC:
13479 base = gen_reg_rtx (Pmode);
13481 if (!TARGET_64BIT)
13483 if (flag_pic)
13484 pic = pic_offset_table_rtx;
13485 else
13487 pic = gen_reg_rtx (Pmode);
13488 emit_insn (gen_set_got (pic));
13492 if (TARGET_GNU2_TLS)
13494 rtx tmp = ix86_tls_module_base ();
13496 if (TARGET_64BIT)
13497 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13498 else
13499 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13501 tp = get_thread_pointer (Pmode, true);
13502 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13503 gen_rtx_MINUS (Pmode, tmp, tp));
13505 else
13507 rtx caddr = ix86_tls_get_addr ();
13509 if (TARGET_64BIT)
13511 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13512 rtx insns, eqv;
13514 start_sequence ();
13515 emit_call_insn
13516 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13517 insns = get_insns ();
13518 end_sequence ();
13520 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13521 share the LD_BASE result with other LD model accesses. */
13522 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13523 UNSPEC_TLS_LD_BASE);
13525 RTL_CONST_CALL_P (insns) = 1;
13526 emit_libcall_block (insns, base, rax, eqv);
13528 else
13529 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13532 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13533 off = gen_rtx_CONST (Pmode, off);
13535 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13537 if (TARGET_GNU2_TLS)
13539 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13541 if (GET_MODE (x) != Pmode)
13542 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13544 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13546 break;
13548 case TLS_MODEL_INITIAL_EXEC:
13549 if (TARGET_64BIT)
13551 if (TARGET_SUN_TLS && !TARGET_X32)
13553 /* The Sun linker took the AMD64 TLS spec literally
13554 and can only handle %rax as destination of the
13555 initial executable code sequence. */
13557 dest = gen_reg_rtx (DImode);
13558 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13559 return dest;
13562 /* Generate DImode references to avoid %fs:(%reg32)
13563 problems and linker IE->LE relaxation bug. */
13564 tp_mode = DImode;
13565 pic = NULL;
13566 type = UNSPEC_GOTNTPOFF;
13568 else if (flag_pic)
13570 if (reload_in_progress)
13571 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13572 pic = pic_offset_table_rtx;
13573 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13575 else if (!TARGET_ANY_GNU_TLS)
13577 pic = gen_reg_rtx (Pmode);
13578 emit_insn (gen_set_got (pic));
13579 type = UNSPEC_GOTTPOFF;
13581 else
13583 pic = NULL;
13584 type = UNSPEC_INDNTPOFF;
13587 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13588 off = gen_rtx_CONST (tp_mode, off);
13589 if (pic)
13590 off = gen_rtx_PLUS (tp_mode, pic, off);
13591 off = gen_const_mem (tp_mode, off);
13592 set_mem_alias_set (off, ix86_GOT_alias_set ());
13594 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13596 base = get_thread_pointer (tp_mode,
13597 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13598 off = force_reg (tp_mode, off);
13599 return gen_rtx_PLUS (tp_mode, base, off);
13601 else
13603 base = get_thread_pointer (Pmode, true);
13604 dest = gen_reg_rtx (Pmode);
13605 emit_insn (ix86_gen_sub3 (dest, base, off));
13607 break;
13609 case TLS_MODEL_LOCAL_EXEC:
13610 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13611 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13612 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13613 off = gen_rtx_CONST (Pmode, off);
13615 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13617 base = get_thread_pointer (Pmode,
13618 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13619 return gen_rtx_PLUS (Pmode, base, off);
13621 else
13623 base = get_thread_pointer (Pmode, true);
13624 dest = gen_reg_rtx (Pmode);
13625 emit_insn (ix86_gen_sub3 (dest, base, off));
13627 break;
13629 default:
13630 gcc_unreachable ();
13633 return dest;
13636 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13637 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13638 unique refptr-DECL symbol corresponding to symbol DECL. */
13640 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13641 htab_t dllimport_map;
13643 static tree
13644 get_dllimport_decl (tree decl, bool beimport)
13646 struct tree_map *h, in;
13647 void **loc;
13648 const char *name;
13649 const char *prefix;
13650 size_t namelen, prefixlen;
13651 char *imp_name;
13652 tree to;
13653 rtx rtl;
13655 if (!dllimport_map)
13656 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13658 in.hash = htab_hash_pointer (decl);
13659 in.base.from = decl;
13660 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13661 h = (struct tree_map *) *loc;
13662 if (h)
13663 return h->to;
13665 *loc = h = ggc_alloc_tree_map ();
13666 h->hash = in.hash;
13667 h->base.from = decl;
13668 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13669 VAR_DECL, NULL, ptr_type_node);
13670 DECL_ARTIFICIAL (to) = 1;
13671 DECL_IGNORED_P (to) = 1;
13672 DECL_EXTERNAL (to) = 1;
13673 TREE_READONLY (to) = 1;
13675 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13676 name = targetm.strip_name_encoding (name);
13677 if (beimport)
13678 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13679 ? "*__imp_" : "*__imp__";
13680 else
13681 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13682 namelen = strlen (name);
13683 prefixlen = strlen (prefix);
13684 imp_name = (char *) alloca (namelen + prefixlen + 1);
13685 memcpy (imp_name, prefix, prefixlen);
13686 memcpy (imp_name + prefixlen, name, namelen + 1);
13688 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13689 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13690 SET_SYMBOL_REF_DECL (rtl, to);
13691 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13692 if (!beimport)
13694 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13695 #ifdef SUB_TARGET_RECORD_STUB
13696 SUB_TARGET_RECORD_STUB (name);
13697 #endif
13700 rtl = gen_const_mem (Pmode, rtl);
13701 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13703 SET_DECL_RTL (to, rtl);
13704 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13706 return to;
13709 /* Expand SYMBOL into its corresponding far-addresse symbol.
13710 WANT_REG is true if we require the result be a register. */
13712 static rtx
13713 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13715 tree imp_decl;
13716 rtx x;
13718 gcc_assert (SYMBOL_REF_DECL (symbol));
13719 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13721 x = DECL_RTL (imp_decl);
13722 if (want_reg)
13723 x = force_reg (Pmode, x);
13724 return x;
13727 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13728 true if we require the result be a register. */
13730 static rtx
13731 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13733 tree imp_decl;
13734 rtx x;
13736 gcc_assert (SYMBOL_REF_DECL (symbol));
13737 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13739 x = DECL_RTL (imp_decl);
13740 if (want_reg)
13741 x = force_reg (Pmode, x);
13742 return x;
13745 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13746 is true if we require the result be a register. */
13748 static rtx
13749 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13751 if (!TARGET_PECOFF)
13752 return NULL_RTX;
13754 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13756 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13757 return legitimize_dllimport_symbol (addr, inreg);
13758 if (GET_CODE (addr) == CONST
13759 && GET_CODE (XEXP (addr, 0)) == PLUS
13760 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13761 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13763 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13764 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13768 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13769 return NULL_RTX;
13770 if (GET_CODE (addr) == SYMBOL_REF
13771 && !is_imported_p (addr)
13772 && SYMBOL_REF_EXTERNAL_P (addr)
13773 && SYMBOL_REF_DECL (addr))
13774 return legitimize_pe_coff_extern_decl (addr, inreg);
13776 if (GET_CODE (addr) == CONST
13777 && GET_CODE (XEXP (addr, 0)) == PLUS
13778 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13779 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13780 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13781 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13783 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13784 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13786 return NULL_RTX;
13789 /* Try machine-dependent ways of modifying an illegitimate address
13790 to be legitimate. If we find one, return the new, valid address.
13791 This macro is used in only one place: `memory_address' in explow.c.
13793 OLDX is the address as it was before break_out_memory_refs was called.
13794 In some cases it is useful to look at this to decide what needs to be done.
13796 It is always safe for this macro to do nothing. It exists to recognize
13797 opportunities to optimize the output.
13799 For the 80386, we handle X+REG by loading X into a register R and
13800 using R+REG. R will go in a general reg and indexing will be used.
13801 However, if REG is a broken-out memory address or multiplication,
13802 nothing needs to be done because REG can certainly go in a general reg.
13804 When -fpic is used, special handling is needed for symbolic references.
13805 See comments by legitimize_pic_address in i386.c for details. */
13807 static rtx
13808 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13809 enum machine_mode mode)
13811 int changed = 0;
13812 unsigned log;
13814 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13815 if (log)
13816 return legitimize_tls_address (x, (enum tls_model) log, false);
13817 if (GET_CODE (x) == CONST
13818 && GET_CODE (XEXP (x, 0)) == PLUS
13819 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13820 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13822 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13823 (enum tls_model) log, false);
13824 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13827 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13829 rtx tmp = legitimize_pe_coff_symbol (x, true);
13830 if (tmp)
13831 return tmp;
13834 if (flag_pic && SYMBOLIC_CONST (x))
13835 return legitimize_pic_address (x, 0);
13837 #if TARGET_MACHO
13838 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13839 return machopic_indirect_data_reference (x, 0);
13840 #endif
13842 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13843 if (GET_CODE (x) == ASHIFT
13844 && CONST_INT_P (XEXP (x, 1))
13845 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13847 changed = 1;
13848 log = INTVAL (XEXP (x, 1));
13849 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13850 GEN_INT (1 << log));
13853 if (GET_CODE (x) == PLUS)
13855 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13857 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13858 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13859 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13861 changed = 1;
13862 log = INTVAL (XEXP (XEXP (x, 0), 1));
13863 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13864 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13865 GEN_INT (1 << log));
13868 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13869 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13870 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13872 changed = 1;
13873 log = INTVAL (XEXP (XEXP (x, 1), 1));
13874 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13875 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13876 GEN_INT (1 << log));
13879 /* Put multiply first if it isn't already. */
13880 if (GET_CODE (XEXP (x, 1)) == MULT)
13882 rtx tmp = XEXP (x, 0);
13883 XEXP (x, 0) = XEXP (x, 1);
13884 XEXP (x, 1) = tmp;
13885 changed = 1;
13888 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13889 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13890 created by virtual register instantiation, register elimination, and
13891 similar optimizations. */
13892 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13894 changed = 1;
13895 x = gen_rtx_PLUS (Pmode,
13896 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13897 XEXP (XEXP (x, 1), 0)),
13898 XEXP (XEXP (x, 1), 1));
13901 /* Canonicalize
13902 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13903 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13904 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13905 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13906 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13907 && CONSTANT_P (XEXP (x, 1)))
13909 rtx constant;
13910 rtx other = NULL_RTX;
13912 if (CONST_INT_P (XEXP (x, 1)))
13914 constant = XEXP (x, 1);
13915 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13917 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13919 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13920 other = XEXP (x, 1);
13922 else
13923 constant = 0;
13925 if (constant)
13927 changed = 1;
13928 x = gen_rtx_PLUS (Pmode,
13929 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13930 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13931 plus_constant (Pmode, other,
13932 INTVAL (constant)));
13936 if (changed && ix86_legitimate_address_p (mode, x, false))
13937 return x;
13939 if (GET_CODE (XEXP (x, 0)) == MULT)
13941 changed = 1;
13942 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13945 if (GET_CODE (XEXP (x, 1)) == MULT)
13947 changed = 1;
13948 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13951 if (changed
13952 && REG_P (XEXP (x, 1))
13953 && REG_P (XEXP (x, 0)))
13954 return x;
13956 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13958 changed = 1;
13959 x = legitimize_pic_address (x, 0);
13962 if (changed && ix86_legitimate_address_p (mode, x, false))
13963 return x;
13965 if (REG_P (XEXP (x, 0)))
13967 rtx temp = gen_reg_rtx (Pmode);
13968 rtx val = force_operand (XEXP (x, 1), temp);
13969 if (val != temp)
13971 val = convert_to_mode (Pmode, val, 1);
13972 emit_move_insn (temp, val);
13975 XEXP (x, 1) = temp;
13976 return x;
13979 else if (REG_P (XEXP (x, 1)))
13981 rtx temp = gen_reg_rtx (Pmode);
13982 rtx val = force_operand (XEXP (x, 0), temp);
13983 if (val != temp)
13985 val = convert_to_mode (Pmode, val, 1);
13986 emit_move_insn (temp, val);
13989 XEXP (x, 0) = temp;
13990 return x;
13994 return x;
13997 /* Print an integer constant expression in assembler syntax. Addition
13998 and subtraction are the only arithmetic that may appear in these
13999 expressions. FILE is the stdio stream to write to, X is the rtx, and
14000 CODE is the operand print code from the output string. */
14002 static void
14003 output_pic_addr_const (FILE *file, rtx x, int code)
14005 char buf[256];
14007 switch (GET_CODE (x))
14009 case PC:
14010 gcc_assert (flag_pic);
14011 putc ('.', file);
14012 break;
14014 case SYMBOL_REF:
14015 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14016 output_addr_const (file, x);
14017 else
14019 const char *name = XSTR (x, 0);
14021 /* Mark the decl as referenced so that cgraph will
14022 output the function. */
14023 if (SYMBOL_REF_DECL (x))
14024 mark_decl_referenced (SYMBOL_REF_DECL (x));
14026 #if TARGET_MACHO
14027 if (MACHOPIC_INDIRECT
14028 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14029 name = machopic_indirection_name (x, /*stub_p=*/true);
14030 #endif
14031 assemble_name (file, name);
14033 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14034 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14035 fputs ("@PLT", file);
14036 break;
14038 case LABEL_REF:
14039 x = XEXP (x, 0);
14040 /* FALLTHRU */
14041 case CODE_LABEL:
14042 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14043 assemble_name (asm_out_file, buf);
14044 break;
14046 case CONST_INT:
14047 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14048 break;
14050 case CONST:
14051 /* This used to output parentheses around the expression,
14052 but that does not work on the 386 (either ATT or BSD assembler). */
14053 output_pic_addr_const (file, XEXP (x, 0), code);
14054 break;
14056 case CONST_DOUBLE:
14057 if (GET_MODE (x) == VOIDmode)
14059 /* We can use %d if the number is <32 bits and positive. */
14060 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14061 fprintf (file, "0x%lx%08lx",
14062 (unsigned long) CONST_DOUBLE_HIGH (x),
14063 (unsigned long) CONST_DOUBLE_LOW (x));
14064 else
14065 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14067 else
14068 /* We can't handle floating point constants;
14069 TARGET_PRINT_OPERAND must handle them. */
14070 output_operand_lossage ("floating constant misused");
14071 break;
14073 case PLUS:
14074 /* Some assemblers need integer constants to appear first. */
14075 if (CONST_INT_P (XEXP (x, 0)))
14077 output_pic_addr_const (file, XEXP (x, 0), code);
14078 putc ('+', file);
14079 output_pic_addr_const (file, XEXP (x, 1), code);
14081 else
14083 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14084 output_pic_addr_const (file, XEXP (x, 1), code);
14085 putc ('+', file);
14086 output_pic_addr_const (file, XEXP (x, 0), code);
14088 break;
14090 case MINUS:
14091 if (!TARGET_MACHO)
14092 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14093 output_pic_addr_const (file, XEXP (x, 0), code);
14094 putc ('-', file);
14095 output_pic_addr_const (file, XEXP (x, 1), code);
14096 if (!TARGET_MACHO)
14097 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14098 break;
14100 case UNSPEC:
14101 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14103 bool f = i386_asm_output_addr_const_extra (file, x);
14104 gcc_assert (f);
14105 break;
14108 gcc_assert (XVECLEN (x, 0) == 1);
14109 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14110 switch (XINT (x, 1))
14112 case UNSPEC_GOT:
14113 fputs ("@GOT", file);
14114 break;
14115 case UNSPEC_GOTOFF:
14116 fputs ("@GOTOFF", file);
14117 break;
14118 case UNSPEC_PLTOFF:
14119 fputs ("@PLTOFF", file);
14120 break;
14121 case UNSPEC_PCREL:
14122 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14123 "(%rip)" : "[rip]", file);
14124 break;
14125 case UNSPEC_GOTPCREL:
14126 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14127 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14128 break;
14129 case UNSPEC_GOTTPOFF:
14130 /* FIXME: This might be @TPOFF in Sun ld too. */
14131 fputs ("@gottpoff", file);
14132 break;
14133 case UNSPEC_TPOFF:
14134 fputs ("@tpoff", file);
14135 break;
14136 case UNSPEC_NTPOFF:
14137 if (TARGET_64BIT)
14138 fputs ("@tpoff", file);
14139 else
14140 fputs ("@ntpoff", file);
14141 break;
14142 case UNSPEC_DTPOFF:
14143 fputs ("@dtpoff", file);
14144 break;
14145 case UNSPEC_GOTNTPOFF:
14146 if (TARGET_64BIT)
14147 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14148 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14149 else
14150 fputs ("@gotntpoff", file);
14151 break;
14152 case UNSPEC_INDNTPOFF:
14153 fputs ("@indntpoff", file);
14154 break;
14155 #if TARGET_MACHO
14156 case UNSPEC_MACHOPIC_OFFSET:
14157 putc ('-', file);
14158 machopic_output_function_base_name (file);
14159 break;
14160 #endif
14161 default:
14162 output_operand_lossage ("invalid UNSPEC as operand");
14163 break;
14165 break;
14167 default:
14168 output_operand_lossage ("invalid expression as operand");
14172 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14173 We need to emit DTP-relative relocations. */
14175 static void ATTRIBUTE_UNUSED
14176 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14178 fputs (ASM_LONG, file);
14179 output_addr_const (file, x);
14180 fputs ("@dtpoff", file);
14181 switch (size)
14183 case 4:
14184 break;
14185 case 8:
14186 fputs (", 0", file);
14187 break;
14188 default:
14189 gcc_unreachable ();
14193 /* Return true if X is a representation of the PIC register. This copes
14194 with calls from ix86_find_base_term, where the register might have
14195 been replaced by a cselib value. */
14197 static bool
14198 ix86_pic_register_p (rtx x)
14200 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14201 return (pic_offset_table_rtx
14202 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14203 else
14204 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14207 /* Helper function for ix86_delegitimize_address.
14208 Attempt to delegitimize TLS local-exec accesses. */
14210 static rtx
14211 ix86_delegitimize_tls_address (rtx orig_x)
14213 rtx x = orig_x, unspec;
14214 struct ix86_address addr;
14216 if (!TARGET_TLS_DIRECT_SEG_REFS)
14217 return orig_x;
14218 if (MEM_P (x))
14219 x = XEXP (x, 0);
14220 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14221 return orig_x;
14222 if (ix86_decompose_address (x, &addr) == 0
14223 || addr.seg != DEFAULT_TLS_SEG_REG
14224 || addr.disp == NULL_RTX
14225 || GET_CODE (addr.disp) != CONST)
14226 return orig_x;
14227 unspec = XEXP (addr.disp, 0);
14228 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14229 unspec = XEXP (unspec, 0);
14230 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14231 return orig_x;
14232 x = XVECEXP (unspec, 0, 0);
14233 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14234 if (unspec != XEXP (addr.disp, 0))
14235 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14236 if (addr.index)
14238 rtx idx = addr.index;
14239 if (addr.scale != 1)
14240 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14241 x = gen_rtx_PLUS (Pmode, idx, x);
14243 if (addr.base)
14244 x = gen_rtx_PLUS (Pmode, addr.base, x);
14245 if (MEM_P (orig_x))
14246 x = replace_equiv_address_nv (orig_x, x);
14247 return x;
14250 /* In the name of slightly smaller debug output, and to cater to
14251 general assembler lossage, recognize PIC+GOTOFF and turn it back
14252 into a direct symbol reference.
14254 On Darwin, this is necessary to avoid a crash, because Darwin
14255 has a different PIC label for each routine but the DWARF debugging
14256 information is not associated with any particular routine, so it's
14257 necessary to remove references to the PIC label from RTL stored by
14258 the DWARF output code. */
14260 static rtx
14261 ix86_delegitimize_address (rtx x)
14263 rtx orig_x = delegitimize_mem_from_attrs (x);
14264 /* addend is NULL or some rtx if x is something+GOTOFF where
14265 something doesn't include the PIC register. */
14266 rtx addend = NULL_RTX;
14267 /* reg_addend is NULL or a multiple of some register. */
14268 rtx reg_addend = NULL_RTX;
14269 /* const_addend is NULL or a const_int. */
14270 rtx const_addend = NULL_RTX;
14271 /* This is the result, or NULL. */
14272 rtx result = NULL_RTX;
14274 x = orig_x;
14276 if (MEM_P (x))
14277 x = XEXP (x, 0);
14279 if (TARGET_64BIT)
14281 if (GET_CODE (x) == CONST
14282 && GET_CODE (XEXP (x, 0)) == PLUS
14283 && GET_MODE (XEXP (x, 0)) == Pmode
14284 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14285 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14286 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14288 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14289 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14290 if (MEM_P (orig_x))
14291 x = replace_equiv_address_nv (orig_x, x);
14292 return x;
14295 if (GET_CODE (x) == CONST
14296 && GET_CODE (XEXP (x, 0)) == UNSPEC
14297 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14298 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14299 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14301 x = XVECEXP (XEXP (x, 0), 0, 0);
14302 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14304 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14305 GET_MODE (x), 0);
14306 if (x == NULL_RTX)
14307 return orig_x;
14309 return x;
14312 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14313 return ix86_delegitimize_tls_address (orig_x);
14315 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14316 and -mcmodel=medium -fpic. */
14319 if (GET_CODE (x) != PLUS
14320 || GET_CODE (XEXP (x, 1)) != CONST)
14321 return ix86_delegitimize_tls_address (orig_x);
14323 if (ix86_pic_register_p (XEXP (x, 0)))
14324 /* %ebx + GOT/GOTOFF */
14326 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14328 /* %ebx + %reg * scale + GOT/GOTOFF */
14329 reg_addend = XEXP (x, 0);
14330 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14331 reg_addend = XEXP (reg_addend, 1);
14332 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14333 reg_addend = XEXP (reg_addend, 0);
14334 else
14336 reg_addend = NULL_RTX;
14337 addend = XEXP (x, 0);
14340 else
14341 addend = XEXP (x, 0);
14343 x = XEXP (XEXP (x, 1), 0);
14344 if (GET_CODE (x) == PLUS
14345 && CONST_INT_P (XEXP (x, 1)))
14347 const_addend = XEXP (x, 1);
14348 x = XEXP (x, 0);
14351 if (GET_CODE (x) == UNSPEC
14352 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14353 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14354 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14355 && !MEM_P (orig_x) && !addend)))
14356 result = XVECEXP (x, 0, 0);
14358 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14359 && !MEM_P (orig_x))
14360 result = XVECEXP (x, 0, 0);
14362 if (! result)
14363 return ix86_delegitimize_tls_address (orig_x);
14365 if (const_addend)
14366 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14367 if (reg_addend)
14368 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14369 if (addend)
14371 /* If the rest of original X doesn't involve the PIC register, add
14372 addend and subtract pic_offset_table_rtx. This can happen e.g.
14373 for code like:
14374 leal (%ebx, %ecx, 4), %ecx
14376 movl foo@GOTOFF(%ecx), %edx
14377 in which case we return (%ecx - %ebx) + foo. */
14378 if (pic_offset_table_rtx)
14379 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14380 pic_offset_table_rtx),
14381 result);
14382 else
14383 return orig_x;
14385 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14387 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14388 if (result == NULL_RTX)
14389 return orig_x;
14391 return result;
14394 /* If X is a machine specific address (i.e. a symbol or label being
14395 referenced as a displacement from the GOT implemented using an
14396 UNSPEC), then return the base term. Otherwise return X. */
14399 ix86_find_base_term (rtx x)
14401 rtx term;
14403 if (TARGET_64BIT)
14405 if (GET_CODE (x) != CONST)
14406 return x;
14407 term = XEXP (x, 0);
14408 if (GET_CODE (term) == PLUS
14409 && (CONST_INT_P (XEXP (term, 1))
14410 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14411 term = XEXP (term, 0);
14412 if (GET_CODE (term) != UNSPEC
14413 || (XINT (term, 1) != UNSPEC_GOTPCREL
14414 && XINT (term, 1) != UNSPEC_PCREL))
14415 return x;
14417 return XVECEXP (term, 0, 0);
14420 return ix86_delegitimize_address (x);
14423 static void
14424 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14425 bool fp, FILE *file)
14427 const char *suffix;
14429 if (mode == CCFPmode || mode == CCFPUmode)
14431 code = ix86_fp_compare_code_to_integer (code);
14432 mode = CCmode;
14434 if (reverse)
14435 code = reverse_condition (code);
14437 switch (code)
14439 case EQ:
14440 switch (mode)
14442 case CCAmode:
14443 suffix = "a";
14444 break;
14446 case CCCmode:
14447 suffix = "c";
14448 break;
14450 case CCOmode:
14451 suffix = "o";
14452 break;
14454 case CCSmode:
14455 suffix = "s";
14456 break;
14458 default:
14459 suffix = "e";
14461 break;
14462 case NE:
14463 switch (mode)
14465 case CCAmode:
14466 suffix = "na";
14467 break;
14469 case CCCmode:
14470 suffix = "nc";
14471 break;
14473 case CCOmode:
14474 suffix = "no";
14475 break;
14477 case CCSmode:
14478 suffix = "ns";
14479 break;
14481 default:
14482 suffix = "ne";
14484 break;
14485 case GT:
14486 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14487 suffix = "g";
14488 break;
14489 case GTU:
14490 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14491 Those same assemblers have the same but opposite lossage on cmov. */
14492 if (mode == CCmode)
14493 suffix = fp ? "nbe" : "a";
14494 else
14495 gcc_unreachable ();
14496 break;
14497 case LT:
14498 switch (mode)
14500 case CCNOmode:
14501 case CCGOCmode:
14502 suffix = "s";
14503 break;
14505 case CCmode:
14506 case CCGCmode:
14507 suffix = "l";
14508 break;
14510 default:
14511 gcc_unreachable ();
14513 break;
14514 case LTU:
14515 if (mode == CCmode)
14516 suffix = "b";
14517 else if (mode == CCCmode)
14518 suffix = "c";
14519 else
14520 gcc_unreachable ();
14521 break;
14522 case GE:
14523 switch (mode)
14525 case CCNOmode:
14526 case CCGOCmode:
14527 suffix = "ns";
14528 break;
14530 case CCmode:
14531 case CCGCmode:
14532 suffix = "ge";
14533 break;
14535 default:
14536 gcc_unreachable ();
14538 break;
14539 case GEU:
14540 if (mode == CCmode)
14541 suffix = fp ? "nb" : "ae";
14542 else if (mode == CCCmode)
14543 suffix = "nc";
14544 else
14545 gcc_unreachable ();
14546 break;
14547 case LE:
14548 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14549 suffix = "le";
14550 break;
14551 case LEU:
14552 if (mode == CCmode)
14553 suffix = "be";
14554 else
14555 gcc_unreachable ();
14556 break;
14557 case UNORDERED:
14558 suffix = fp ? "u" : "p";
14559 break;
14560 case ORDERED:
14561 suffix = fp ? "nu" : "np";
14562 break;
14563 default:
14564 gcc_unreachable ();
14566 fputs (suffix, file);
14569 /* Print the name of register X to FILE based on its machine mode and number.
14570 If CODE is 'w', pretend the mode is HImode.
14571 If CODE is 'b', pretend the mode is QImode.
14572 If CODE is 'k', pretend the mode is SImode.
14573 If CODE is 'q', pretend the mode is DImode.
14574 If CODE is 'x', pretend the mode is V4SFmode.
14575 If CODE is 't', pretend the mode is V8SFmode.
14576 If CODE is 'g', pretend the mode is V16SFmode.
14577 If CODE is 'h', pretend the reg is the 'high' byte register.
14578 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14579 If CODE is 'd', duplicate the operand for AVX instruction.
14582 void
14583 print_reg (rtx x, int code, FILE *file)
14585 const char *reg;
14586 unsigned int regno;
14587 bool duplicated = code == 'd' && TARGET_AVX;
14589 if (ASSEMBLER_DIALECT == ASM_ATT)
14590 putc ('%', file);
14592 if (x == pc_rtx)
14594 gcc_assert (TARGET_64BIT);
14595 fputs ("rip", file);
14596 return;
14599 regno = true_regnum (x);
14600 gcc_assert (regno != ARG_POINTER_REGNUM
14601 && regno != FRAME_POINTER_REGNUM
14602 && regno != FLAGS_REG
14603 && regno != FPSR_REG
14604 && regno != FPCR_REG);
14606 if (code == 'w' || MMX_REG_P (x))
14607 code = 2;
14608 else if (code == 'b')
14609 code = 1;
14610 else if (code == 'k')
14611 code = 4;
14612 else if (code == 'q')
14613 code = 8;
14614 else if (code == 'y')
14615 code = 3;
14616 else if (code == 'h')
14617 code = 0;
14618 else if (code == 'x')
14619 code = 16;
14620 else if (code == 't')
14621 code = 32;
14622 else if (code == 'g')
14623 code = 64;
14624 else
14625 code = GET_MODE_SIZE (GET_MODE (x));
14627 /* Irritatingly, AMD extended registers use different naming convention
14628 from the normal registers: "r%d[bwd]" */
14629 if (REX_INT_REGNO_P (regno))
14631 gcc_assert (TARGET_64BIT);
14632 putc ('r', file);
14633 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14634 switch (code)
14636 case 0:
14637 error ("extended registers have no high halves");
14638 break;
14639 case 1:
14640 putc ('b', file);
14641 break;
14642 case 2:
14643 putc ('w', file);
14644 break;
14645 case 4:
14646 putc ('d', file);
14647 break;
14648 case 8:
14649 /* no suffix */
14650 break;
14651 default:
14652 error ("unsupported operand size for extended register");
14653 break;
14655 return;
14658 reg = NULL;
14659 switch (code)
14661 case 3:
14662 if (STACK_TOP_P (x))
14664 reg = "st(0)";
14665 break;
14667 /* FALLTHRU */
14668 case 8:
14669 case 4:
14670 case 12:
14671 if (! ANY_FP_REG_P (x))
14672 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14673 /* FALLTHRU */
14674 case 16:
14675 case 2:
14676 normal:
14677 reg = hi_reg_name[regno];
14678 break;
14679 case 1:
14680 if (regno >= ARRAY_SIZE (qi_reg_name))
14681 goto normal;
14682 reg = qi_reg_name[regno];
14683 break;
14684 case 0:
14685 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14686 goto normal;
14687 reg = qi_high_reg_name[regno];
14688 break;
14689 case 32:
14690 if (SSE_REG_P (x))
14692 gcc_assert (!duplicated);
14693 putc ('y', file);
14694 fputs (hi_reg_name[regno] + 1, file);
14695 return;
14697 case 64:
14698 if (SSE_REG_P (x))
14700 gcc_assert (!duplicated);
14701 putc ('z', file);
14702 fputs (hi_reg_name[REGNO (x)] + 1, file);
14703 return;
14705 break;
14706 default:
14707 gcc_unreachable ();
14710 fputs (reg, file);
14711 if (duplicated)
14713 if (ASSEMBLER_DIALECT == ASM_ATT)
14714 fprintf (file, ", %%%s", reg);
14715 else
14716 fprintf (file, ", %s", reg);
14720 /* Locate some local-dynamic symbol still in use by this function
14721 so that we can print its name in some tls_local_dynamic_base
14722 pattern. */
14724 static int
14725 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14727 rtx x = *px;
14729 if (GET_CODE (x) == SYMBOL_REF
14730 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14732 cfun->machine->some_ld_name = XSTR (x, 0);
14733 return 1;
14736 return 0;
14739 static const char *
14740 get_some_local_dynamic_name (void)
14742 rtx insn;
14744 if (cfun->machine->some_ld_name)
14745 return cfun->machine->some_ld_name;
14747 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14748 if (NONDEBUG_INSN_P (insn)
14749 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14750 return cfun->machine->some_ld_name;
14752 return NULL;
14755 /* Meaning of CODE:
14756 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14757 C -- print opcode suffix for set/cmov insn.
14758 c -- like C, but print reversed condition
14759 F,f -- likewise, but for floating-point.
14760 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14761 otherwise nothing
14762 R -- print embeded rounding and sae.
14763 r -- print only sae.
14764 z -- print the opcode suffix for the size of the current operand.
14765 Z -- likewise, with special suffixes for x87 instructions.
14766 * -- print a star (in certain assembler syntax)
14767 A -- print an absolute memory reference.
14768 E -- print address with DImode register names if TARGET_64BIT.
14769 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14770 s -- print a shift double count, followed by the assemblers argument
14771 delimiter.
14772 b -- print the QImode name of the register for the indicated operand.
14773 %b0 would print %al if operands[0] is reg 0.
14774 w -- likewise, print the HImode name of the register.
14775 k -- likewise, print the SImode name of the register.
14776 q -- likewise, print the DImode name of the register.
14777 x -- likewise, print the V4SFmode name of the register.
14778 t -- likewise, print the V8SFmode name of the register.
14779 g -- likewise, print the V16SFmode name of the register.
14780 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14781 y -- print "st(0)" instead of "st" as a register.
14782 d -- print duplicated register operand for AVX instruction.
14783 D -- print condition for SSE cmp instruction.
14784 P -- if PIC, print an @PLT suffix.
14785 p -- print raw symbol name.
14786 X -- don't print any sort of PIC '@' suffix for a symbol.
14787 & -- print some in-use local-dynamic symbol name.
14788 H -- print a memory address offset by 8; used for sse high-parts
14789 Y -- print condition for XOP pcom* instruction.
14790 + -- print a branch hint as 'cs' or 'ds' prefix
14791 ; -- print a semicolon (after prefixes due to bug in older gas).
14792 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14793 @ -- print a segment register of thread base pointer load
14794 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14797 void
14798 ix86_print_operand (FILE *file, rtx x, int code)
14800 if (code)
14802 switch (code)
14804 case 'A':
14805 switch (ASSEMBLER_DIALECT)
14807 case ASM_ATT:
14808 putc ('*', file);
14809 break;
14811 case ASM_INTEL:
14812 /* Intel syntax. For absolute addresses, registers should not
14813 be surrounded by braces. */
14814 if (!REG_P (x))
14816 putc ('[', file);
14817 ix86_print_operand (file, x, 0);
14818 putc (']', file);
14819 return;
14821 break;
14823 default:
14824 gcc_unreachable ();
14827 ix86_print_operand (file, x, 0);
14828 return;
14830 case 'E':
14831 /* Wrap address in an UNSPEC to declare special handling. */
14832 if (TARGET_64BIT)
14833 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14835 output_address (x);
14836 return;
14838 case 'L':
14839 if (ASSEMBLER_DIALECT == ASM_ATT)
14840 putc ('l', file);
14841 return;
14843 case 'W':
14844 if (ASSEMBLER_DIALECT == ASM_ATT)
14845 putc ('w', file);
14846 return;
14848 case 'B':
14849 if (ASSEMBLER_DIALECT == ASM_ATT)
14850 putc ('b', file);
14851 return;
14853 case 'Q':
14854 if (ASSEMBLER_DIALECT == ASM_ATT)
14855 putc ('l', file);
14856 return;
14858 case 'S':
14859 if (ASSEMBLER_DIALECT == ASM_ATT)
14860 putc ('s', file);
14861 return;
14863 case 'T':
14864 if (ASSEMBLER_DIALECT == ASM_ATT)
14865 putc ('t', file);
14866 return;
14868 case 'O':
14869 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14870 if (ASSEMBLER_DIALECT != ASM_ATT)
14871 return;
14873 switch (GET_MODE_SIZE (GET_MODE (x)))
14875 case 2:
14876 putc ('w', file);
14877 break;
14879 case 4:
14880 putc ('l', file);
14881 break;
14883 case 8:
14884 putc ('q', file);
14885 break;
14887 default:
14888 output_operand_lossage
14889 ("invalid operand size for operand code 'O'");
14890 return;
14893 putc ('.', file);
14894 #endif
14895 return;
14897 case 'z':
14898 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14900 /* Opcodes don't get size suffixes if using Intel opcodes. */
14901 if (ASSEMBLER_DIALECT == ASM_INTEL)
14902 return;
14904 switch (GET_MODE_SIZE (GET_MODE (x)))
14906 case 1:
14907 putc ('b', file);
14908 return;
14910 case 2:
14911 putc ('w', file);
14912 return;
14914 case 4:
14915 putc ('l', file);
14916 return;
14918 case 8:
14919 putc ('q', file);
14920 return;
14922 default:
14923 output_operand_lossage
14924 ("invalid operand size for operand code 'z'");
14925 return;
14929 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14930 warning
14931 (0, "non-integer operand used with operand code 'z'");
14932 /* FALLTHRU */
14934 case 'Z':
14935 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14936 if (ASSEMBLER_DIALECT == ASM_INTEL)
14937 return;
14939 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14941 switch (GET_MODE_SIZE (GET_MODE (x)))
14943 case 2:
14944 #ifdef HAVE_AS_IX86_FILDS
14945 putc ('s', file);
14946 #endif
14947 return;
14949 case 4:
14950 putc ('l', file);
14951 return;
14953 case 8:
14954 #ifdef HAVE_AS_IX86_FILDQ
14955 putc ('q', file);
14956 #else
14957 fputs ("ll", file);
14958 #endif
14959 return;
14961 default:
14962 break;
14965 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14967 /* 387 opcodes don't get size suffixes
14968 if the operands are registers. */
14969 if (STACK_REG_P (x))
14970 return;
14972 switch (GET_MODE_SIZE (GET_MODE (x)))
14974 case 4:
14975 putc ('s', file);
14976 return;
14978 case 8:
14979 putc ('l', file);
14980 return;
14982 case 12:
14983 case 16:
14984 putc ('t', file);
14985 return;
14987 default:
14988 break;
14991 else
14993 output_operand_lossage
14994 ("invalid operand type used with operand code 'Z'");
14995 return;
14998 output_operand_lossage
14999 ("invalid operand size for operand code 'Z'");
15000 return;
15002 case 'd':
15003 case 'b':
15004 case 'w':
15005 case 'k':
15006 case 'q':
15007 case 'h':
15008 case 't':
15009 case 'g':
15010 case 'y':
15011 case 'x':
15012 case 'X':
15013 case 'P':
15014 case 'p':
15015 break;
15017 case 's':
15018 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15020 ix86_print_operand (file, x, 0);
15021 fputs (", ", file);
15023 return;
15025 case 'Y':
15026 switch (GET_CODE (x))
15028 case NE:
15029 fputs ("neq", file);
15030 break;
15031 case EQ:
15032 fputs ("eq", file);
15033 break;
15034 case GE:
15035 case GEU:
15036 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15037 break;
15038 case GT:
15039 case GTU:
15040 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15041 break;
15042 case LE:
15043 case LEU:
15044 fputs ("le", file);
15045 break;
15046 case LT:
15047 case LTU:
15048 fputs ("lt", file);
15049 break;
15050 case UNORDERED:
15051 fputs ("unord", file);
15052 break;
15053 case ORDERED:
15054 fputs ("ord", file);
15055 break;
15056 case UNEQ:
15057 fputs ("ueq", file);
15058 break;
15059 case UNGE:
15060 fputs ("nlt", file);
15061 break;
15062 case UNGT:
15063 fputs ("nle", file);
15064 break;
15065 case UNLE:
15066 fputs ("ule", file);
15067 break;
15068 case UNLT:
15069 fputs ("ult", file);
15070 break;
15071 case LTGT:
15072 fputs ("une", file);
15073 break;
15074 default:
15075 output_operand_lossage ("operand is not a condition code, "
15076 "invalid operand code 'Y'");
15077 return;
15079 return;
15081 case 'D':
15082 /* Little bit of braindamage here. The SSE compare instructions
15083 does use completely different names for the comparisons that the
15084 fp conditional moves. */
15085 switch (GET_CODE (x))
15087 case UNEQ:
15088 if (TARGET_AVX)
15090 fputs ("eq_us", file);
15091 break;
15093 case EQ:
15094 fputs ("eq", file);
15095 break;
15096 case UNLT:
15097 if (TARGET_AVX)
15099 fputs ("nge", file);
15100 break;
15102 case LT:
15103 fputs ("lt", file);
15104 break;
15105 case UNLE:
15106 if (TARGET_AVX)
15108 fputs ("ngt", file);
15109 break;
15111 case LE:
15112 fputs ("le", file);
15113 break;
15114 case UNORDERED:
15115 fputs ("unord", file);
15116 break;
15117 case LTGT:
15118 if (TARGET_AVX)
15120 fputs ("neq_oq", file);
15121 break;
15123 case NE:
15124 fputs ("neq", file);
15125 break;
15126 case GE:
15127 if (TARGET_AVX)
15129 fputs ("ge", file);
15130 break;
15132 case UNGE:
15133 fputs ("nlt", file);
15134 break;
15135 case GT:
15136 if (TARGET_AVX)
15138 fputs ("gt", file);
15139 break;
15141 case UNGT:
15142 fputs ("nle", file);
15143 break;
15144 case ORDERED:
15145 fputs ("ord", file);
15146 break;
15147 default:
15148 output_operand_lossage ("operand is not a condition code, "
15149 "invalid operand code 'D'");
15150 return;
15152 return;
15154 case 'F':
15155 case 'f':
15156 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15157 if (ASSEMBLER_DIALECT == ASM_ATT)
15158 putc ('.', file);
15159 #endif
15161 case 'C':
15162 case 'c':
15163 if (!COMPARISON_P (x))
15165 output_operand_lossage ("operand is not a condition code, "
15166 "invalid operand code '%c'", code);
15167 return;
15169 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15170 code == 'c' || code == 'f',
15171 code == 'F' || code == 'f',
15172 file);
15173 return;
15175 case 'H':
15176 if (!offsettable_memref_p (x))
15178 output_operand_lossage ("operand is not an offsettable memory "
15179 "reference, invalid operand code 'H'");
15180 return;
15182 /* It doesn't actually matter what mode we use here, as we're
15183 only going to use this for printing. */
15184 x = adjust_address_nv (x, DImode, 8);
15185 /* Output 'qword ptr' for intel assembler dialect. */
15186 if (ASSEMBLER_DIALECT == ASM_INTEL)
15187 code = 'q';
15188 break;
15190 case 'K':
15191 gcc_assert (CONST_INT_P (x));
15193 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15194 #ifdef HAVE_AS_IX86_HLE
15195 fputs ("xacquire ", file);
15196 #else
15197 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15198 #endif
15199 else if (INTVAL (x) & IX86_HLE_RELEASE)
15200 #ifdef HAVE_AS_IX86_HLE
15201 fputs ("xrelease ", file);
15202 #else
15203 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15204 #endif
15205 /* We do not want to print value of the operand. */
15206 return;
15208 case 'N':
15209 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15210 fputs ("{z}", file);
15211 return;
15213 case 'r':
15214 gcc_assert (CONST_INT_P (x));
15215 gcc_assert (INTVAL (x) == ROUND_SAE);
15217 if (ASSEMBLER_DIALECT == ASM_INTEL)
15218 fputs (", ", file);
15220 fputs ("{sae}", file);
15222 if (ASSEMBLER_DIALECT == ASM_ATT)
15223 fputs (", ", file);
15225 return;
15227 case 'R':
15228 gcc_assert (CONST_INT_P (x));
15230 if (ASSEMBLER_DIALECT == ASM_INTEL)
15231 fputs (", ", file);
15233 switch (INTVAL (x))
15235 case ROUND_NEAREST_INT | ROUND_SAE:
15236 fputs ("{rn-sae}", file);
15237 break;
15238 case ROUND_NEG_INF | ROUND_SAE:
15239 fputs ("{rd-sae}", file);
15240 break;
15241 case ROUND_POS_INF | ROUND_SAE:
15242 fputs ("{ru-sae}", file);
15243 break;
15244 case ROUND_ZERO | ROUND_SAE:
15245 fputs ("{rz-sae}", file);
15246 break;
15247 default:
15248 gcc_unreachable ();
15251 if (ASSEMBLER_DIALECT == ASM_ATT)
15252 fputs (", ", file);
15254 return;
15256 case '*':
15257 if (ASSEMBLER_DIALECT == ASM_ATT)
15258 putc ('*', file);
15259 return;
15261 case '&':
15263 const char *name = get_some_local_dynamic_name ();
15264 if (name == NULL)
15265 output_operand_lossage ("'%%&' used without any "
15266 "local dynamic TLS references");
15267 else
15268 assemble_name (file, name);
15269 return;
15272 case '+':
15274 rtx x;
15276 if (!optimize
15277 || optimize_function_for_size_p (cfun)
15278 || !TARGET_BRANCH_PREDICTION_HINTS)
15279 return;
15281 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15282 if (x)
15284 int pred_val = XINT (x, 0);
15286 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15287 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15289 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15290 bool cputaken
15291 = final_forward_branch_p (current_output_insn) == 0;
15293 /* Emit hints only in the case default branch prediction
15294 heuristics would fail. */
15295 if (taken != cputaken)
15297 /* We use 3e (DS) prefix for taken branches and
15298 2e (CS) prefix for not taken branches. */
15299 if (taken)
15300 fputs ("ds ; ", file);
15301 else
15302 fputs ("cs ; ", file);
15306 return;
15309 case ';':
15310 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15311 putc (';', file);
15312 #endif
15313 return;
15315 case '@':
15316 if (ASSEMBLER_DIALECT == ASM_ATT)
15317 putc ('%', file);
15319 /* The kernel uses a different segment register for performance
15320 reasons; a system call would not have to trash the userspace
15321 segment register, which would be expensive. */
15322 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15323 fputs ("fs", file);
15324 else
15325 fputs ("gs", file);
15326 return;
15328 case '~':
15329 putc (TARGET_AVX2 ? 'i' : 'f', file);
15330 return;
15332 case '^':
15333 if (TARGET_64BIT && Pmode != word_mode)
15334 fputs ("addr32 ", file);
15335 return;
15337 default:
15338 output_operand_lossage ("invalid operand code '%c'", code);
15342 if (REG_P (x))
15343 print_reg (x, code, file);
15345 else if (MEM_P (x))
15347 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15348 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15349 && GET_MODE (x) != BLKmode)
15351 const char * size;
15352 switch (GET_MODE_SIZE (GET_MODE (x)))
15354 case 1: size = "BYTE"; break;
15355 case 2: size = "WORD"; break;
15356 case 4: size = "DWORD"; break;
15357 case 8: size = "QWORD"; break;
15358 case 12: size = "TBYTE"; break;
15359 case 16:
15360 if (GET_MODE (x) == XFmode)
15361 size = "TBYTE";
15362 else
15363 size = "XMMWORD";
15364 break;
15365 case 32: size = "YMMWORD"; break;
15366 case 64: size = "ZMMWORD"; break;
15367 default:
15368 gcc_unreachable ();
15371 /* Check for explicit size override (codes 'b', 'w', 'k',
15372 'q' and 'x') */
15373 if (code == 'b')
15374 size = "BYTE";
15375 else if (code == 'w')
15376 size = "WORD";
15377 else if (code == 'k')
15378 size = "DWORD";
15379 else if (code == 'q')
15380 size = "QWORD";
15381 else if (code == 'x')
15382 size = "XMMWORD";
15384 fputs (size, file);
15385 fputs (" PTR ", file);
15388 x = XEXP (x, 0);
15389 /* Avoid (%rip) for call operands. */
15390 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15391 && !CONST_INT_P (x))
15392 output_addr_const (file, x);
15393 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15394 output_operand_lossage ("invalid constraints for operand");
15395 else
15396 output_address (x);
15399 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15401 REAL_VALUE_TYPE r;
15402 long l;
15404 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15405 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15407 if (ASSEMBLER_DIALECT == ASM_ATT)
15408 putc ('$', file);
15409 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15410 if (code == 'q')
15411 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15412 (unsigned long long) (int) l);
15413 else
15414 fprintf (file, "0x%08x", (unsigned int) l);
15417 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15419 REAL_VALUE_TYPE r;
15420 long l[2];
15422 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15423 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15425 if (ASSEMBLER_DIALECT == ASM_ATT)
15426 putc ('$', file);
15427 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15430 /* These float cases don't actually occur as immediate operands. */
15431 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15433 char dstr[30];
15435 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15436 fputs (dstr, file);
15439 else
15441 /* We have patterns that allow zero sets of memory, for instance.
15442 In 64-bit mode, we should probably support all 8-byte vectors,
15443 since we can in fact encode that into an immediate. */
15444 if (GET_CODE (x) == CONST_VECTOR)
15446 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15447 x = const0_rtx;
15450 if (code != 'P' && code != 'p')
15452 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15454 if (ASSEMBLER_DIALECT == ASM_ATT)
15455 putc ('$', file);
15457 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15458 || GET_CODE (x) == LABEL_REF)
15460 if (ASSEMBLER_DIALECT == ASM_ATT)
15461 putc ('$', file);
15462 else
15463 fputs ("OFFSET FLAT:", file);
15466 if (CONST_INT_P (x))
15467 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15468 else if (flag_pic || MACHOPIC_INDIRECT)
15469 output_pic_addr_const (file, x, code);
15470 else
15471 output_addr_const (file, x);
15475 static bool
15476 ix86_print_operand_punct_valid_p (unsigned char code)
15478 return (code == '@' || code == '*' || code == '+' || code == '&'
15479 || code == ';' || code == '~' || code == '^');
15482 /* Print a memory operand whose address is ADDR. */
15484 static void
15485 ix86_print_operand_address (FILE *file, rtx addr)
15487 struct ix86_address parts;
15488 rtx base, index, disp;
15489 int scale;
15490 int ok;
15491 bool vsib = false;
15492 int code = 0;
15494 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15496 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15497 gcc_assert (parts.index == NULL_RTX);
15498 parts.index = XVECEXP (addr, 0, 1);
15499 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15500 addr = XVECEXP (addr, 0, 0);
15501 vsib = true;
15503 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15505 gcc_assert (TARGET_64BIT);
15506 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15507 code = 'q';
15509 else
15510 ok = ix86_decompose_address (addr, &parts);
15512 gcc_assert (ok);
15514 base = parts.base;
15515 index = parts.index;
15516 disp = parts.disp;
15517 scale = parts.scale;
15519 switch (parts.seg)
15521 case SEG_DEFAULT:
15522 break;
15523 case SEG_FS:
15524 case SEG_GS:
15525 if (ASSEMBLER_DIALECT == ASM_ATT)
15526 putc ('%', file);
15527 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15528 break;
15529 default:
15530 gcc_unreachable ();
15533 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15534 if (TARGET_64BIT && !base && !index)
15536 rtx symbol = disp;
15538 if (GET_CODE (disp) == CONST
15539 && GET_CODE (XEXP (disp, 0)) == PLUS
15540 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15541 symbol = XEXP (XEXP (disp, 0), 0);
15543 if (GET_CODE (symbol) == LABEL_REF
15544 || (GET_CODE (symbol) == SYMBOL_REF
15545 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15546 base = pc_rtx;
15548 if (!base && !index)
15550 /* Displacement only requires special attention. */
15552 if (CONST_INT_P (disp))
15554 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15555 fputs ("ds:", file);
15556 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15558 else if (flag_pic)
15559 output_pic_addr_const (file, disp, 0);
15560 else
15561 output_addr_const (file, disp);
15563 else
15565 /* Print SImode register names to force addr32 prefix. */
15566 if (SImode_address_operand (addr, VOIDmode))
15568 #ifdef ENABLE_CHECKING
15569 gcc_assert (TARGET_64BIT);
15570 switch (GET_CODE (addr))
15572 case SUBREG:
15573 gcc_assert (GET_MODE (addr) == SImode);
15574 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15575 break;
15576 case ZERO_EXTEND:
15577 case AND:
15578 gcc_assert (GET_MODE (addr) == DImode);
15579 break;
15580 default:
15581 gcc_unreachable ();
15583 #endif
15584 gcc_assert (!code);
15585 code = 'k';
15587 else if (code == 0
15588 && TARGET_X32
15589 && disp
15590 && CONST_INT_P (disp)
15591 && INTVAL (disp) < -16*1024*1024)
15593 /* X32 runs in 64-bit mode, where displacement, DISP, in
15594 address DISP(%r64), is encoded as 32-bit immediate sign-
15595 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15596 address is %r64 + 0xffffffffbffffd00. When %r64 <
15597 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15598 which is invalid for x32. The correct address is %r64
15599 - 0x40000300 == 0xf7ffdd64. To properly encode
15600 -0x40000300(%r64) for x32, we zero-extend negative
15601 displacement by forcing addr32 prefix which truncates
15602 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15603 zero-extend all negative displacements, including -1(%rsp).
15604 However, for small negative displacements, sign-extension
15605 won't cause overflow. We only zero-extend negative
15606 displacements if they < -16*1024*1024, which is also used
15607 to check legitimate address displacements for PIC. */
15608 code = 'k';
15611 if (ASSEMBLER_DIALECT == ASM_ATT)
15613 if (disp)
15615 if (flag_pic)
15616 output_pic_addr_const (file, disp, 0);
15617 else if (GET_CODE (disp) == LABEL_REF)
15618 output_asm_label (disp);
15619 else
15620 output_addr_const (file, disp);
15623 putc ('(', file);
15624 if (base)
15625 print_reg (base, code, file);
15626 if (index)
15628 putc (',', file);
15629 print_reg (index, vsib ? 0 : code, file);
15630 if (scale != 1 || vsib)
15631 fprintf (file, ",%d", scale);
15633 putc (')', file);
15635 else
15637 rtx offset = NULL_RTX;
15639 if (disp)
15641 /* Pull out the offset of a symbol; print any symbol itself. */
15642 if (GET_CODE (disp) == CONST
15643 && GET_CODE (XEXP (disp, 0)) == PLUS
15644 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15646 offset = XEXP (XEXP (disp, 0), 1);
15647 disp = gen_rtx_CONST (VOIDmode,
15648 XEXP (XEXP (disp, 0), 0));
15651 if (flag_pic)
15652 output_pic_addr_const (file, disp, 0);
15653 else if (GET_CODE (disp) == LABEL_REF)
15654 output_asm_label (disp);
15655 else if (CONST_INT_P (disp))
15656 offset = disp;
15657 else
15658 output_addr_const (file, disp);
15661 putc ('[', file);
15662 if (base)
15664 print_reg (base, code, file);
15665 if (offset)
15667 if (INTVAL (offset) >= 0)
15668 putc ('+', file);
15669 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15672 else if (offset)
15673 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15674 else
15675 putc ('0', file);
15677 if (index)
15679 putc ('+', file);
15680 print_reg (index, vsib ? 0 : code, file);
15681 if (scale != 1 || vsib)
15682 fprintf (file, "*%d", scale);
15684 putc (']', file);
15689 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15691 static bool
15692 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15694 rtx op;
15696 if (GET_CODE (x) != UNSPEC)
15697 return false;
15699 op = XVECEXP (x, 0, 0);
15700 switch (XINT (x, 1))
15702 case UNSPEC_GOTTPOFF:
15703 output_addr_const (file, op);
15704 /* FIXME: This might be @TPOFF in Sun ld. */
15705 fputs ("@gottpoff", file);
15706 break;
15707 case UNSPEC_TPOFF:
15708 output_addr_const (file, op);
15709 fputs ("@tpoff", file);
15710 break;
15711 case UNSPEC_NTPOFF:
15712 output_addr_const (file, op);
15713 if (TARGET_64BIT)
15714 fputs ("@tpoff", file);
15715 else
15716 fputs ("@ntpoff", file);
15717 break;
15718 case UNSPEC_DTPOFF:
15719 output_addr_const (file, op);
15720 fputs ("@dtpoff", file);
15721 break;
15722 case UNSPEC_GOTNTPOFF:
15723 output_addr_const (file, op);
15724 if (TARGET_64BIT)
15725 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15726 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15727 else
15728 fputs ("@gotntpoff", file);
15729 break;
15730 case UNSPEC_INDNTPOFF:
15731 output_addr_const (file, op);
15732 fputs ("@indntpoff", file);
15733 break;
15734 #if TARGET_MACHO
15735 case UNSPEC_MACHOPIC_OFFSET:
15736 output_addr_const (file, op);
15737 putc ('-', file);
15738 machopic_output_function_base_name (file);
15739 break;
15740 #endif
15742 case UNSPEC_STACK_CHECK:
15744 int offset;
15746 gcc_assert (flag_split_stack);
15748 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15749 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15750 #else
15751 gcc_unreachable ();
15752 #endif
15754 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15756 break;
15758 default:
15759 return false;
15762 return true;
15765 /* Split one or more double-mode RTL references into pairs of half-mode
15766 references. The RTL can be REG, offsettable MEM, integer constant, or
15767 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15768 split and "num" is its length. lo_half and hi_half are output arrays
15769 that parallel "operands". */
15771 void
15772 split_double_mode (enum machine_mode mode, rtx operands[],
15773 int num, rtx lo_half[], rtx hi_half[])
15775 enum machine_mode half_mode;
15776 unsigned int byte;
15778 switch (mode)
15780 case TImode:
15781 half_mode = DImode;
15782 break;
15783 case DImode:
15784 half_mode = SImode;
15785 break;
15786 default:
15787 gcc_unreachable ();
15790 byte = GET_MODE_SIZE (half_mode);
15792 while (num--)
15794 rtx op = operands[num];
15796 /* simplify_subreg refuse to split volatile memory addresses,
15797 but we still have to handle it. */
15798 if (MEM_P (op))
15800 lo_half[num] = adjust_address (op, half_mode, 0);
15801 hi_half[num] = adjust_address (op, half_mode, byte);
15803 else
15805 lo_half[num] = simplify_gen_subreg (half_mode, op,
15806 GET_MODE (op) == VOIDmode
15807 ? mode : GET_MODE (op), 0);
15808 hi_half[num] = simplify_gen_subreg (half_mode, op,
15809 GET_MODE (op) == VOIDmode
15810 ? mode : GET_MODE (op), byte);
15815 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15816 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15817 is the expression of the binary operation. The output may either be
15818 emitted here, or returned to the caller, like all output_* functions.
15820 There is no guarantee that the operands are the same mode, as they
15821 might be within FLOAT or FLOAT_EXTEND expressions. */
15823 #ifndef SYSV386_COMPAT
15824 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15825 wants to fix the assemblers because that causes incompatibility
15826 with gcc. No-one wants to fix gcc because that causes
15827 incompatibility with assemblers... You can use the option of
15828 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15829 #define SYSV386_COMPAT 1
15830 #endif
15832 const char *
15833 output_387_binary_op (rtx insn, rtx *operands)
15835 static char buf[40];
15836 const char *p;
15837 const char *ssep;
15838 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15840 #ifdef ENABLE_CHECKING
15841 /* Even if we do not want to check the inputs, this documents input
15842 constraints. Which helps in understanding the following code. */
15843 if (STACK_REG_P (operands[0])
15844 && ((REG_P (operands[1])
15845 && REGNO (operands[0]) == REGNO (operands[1])
15846 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15847 || (REG_P (operands[2])
15848 && REGNO (operands[0]) == REGNO (operands[2])
15849 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15850 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15851 ; /* ok */
15852 else
15853 gcc_assert (is_sse);
15854 #endif
15856 switch (GET_CODE (operands[3]))
15858 case PLUS:
15859 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15860 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15861 p = "fiadd";
15862 else
15863 p = "fadd";
15864 ssep = "vadd";
15865 break;
15867 case MINUS:
15868 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15869 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15870 p = "fisub";
15871 else
15872 p = "fsub";
15873 ssep = "vsub";
15874 break;
15876 case MULT:
15877 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15878 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15879 p = "fimul";
15880 else
15881 p = "fmul";
15882 ssep = "vmul";
15883 break;
15885 case DIV:
15886 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15887 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15888 p = "fidiv";
15889 else
15890 p = "fdiv";
15891 ssep = "vdiv";
15892 break;
15894 default:
15895 gcc_unreachable ();
15898 if (is_sse)
15900 if (TARGET_AVX)
15902 strcpy (buf, ssep);
15903 if (GET_MODE (operands[0]) == SFmode)
15904 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15905 else
15906 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15908 else
15910 strcpy (buf, ssep + 1);
15911 if (GET_MODE (operands[0]) == SFmode)
15912 strcat (buf, "ss\t{%2, %0|%0, %2}");
15913 else
15914 strcat (buf, "sd\t{%2, %0|%0, %2}");
15916 return buf;
15918 strcpy (buf, p);
15920 switch (GET_CODE (operands[3]))
15922 case MULT:
15923 case PLUS:
15924 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15926 rtx temp = operands[2];
15927 operands[2] = operands[1];
15928 operands[1] = temp;
15931 /* know operands[0] == operands[1]. */
15933 if (MEM_P (operands[2]))
15935 p = "%Z2\t%2";
15936 break;
15939 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15941 if (STACK_TOP_P (operands[0]))
15942 /* How is it that we are storing to a dead operand[2]?
15943 Well, presumably operands[1] is dead too. We can't
15944 store the result to st(0) as st(0) gets popped on this
15945 instruction. Instead store to operands[2] (which I
15946 think has to be st(1)). st(1) will be popped later.
15947 gcc <= 2.8.1 didn't have this check and generated
15948 assembly code that the Unixware assembler rejected. */
15949 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15950 else
15951 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15952 break;
15955 if (STACK_TOP_P (operands[0]))
15956 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15957 else
15958 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15959 break;
15961 case MINUS:
15962 case DIV:
15963 if (MEM_P (operands[1]))
15965 p = "r%Z1\t%1";
15966 break;
15969 if (MEM_P (operands[2]))
15971 p = "%Z2\t%2";
15972 break;
15975 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15977 #if SYSV386_COMPAT
15978 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15979 derived assemblers, confusingly reverse the direction of
15980 the operation for fsub{r} and fdiv{r} when the
15981 destination register is not st(0). The Intel assembler
15982 doesn't have this brain damage. Read !SYSV386_COMPAT to
15983 figure out what the hardware really does. */
15984 if (STACK_TOP_P (operands[0]))
15985 p = "{p\t%0, %2|rp\t%2, %0}";
15986 else
15987 p = "{rp\t%2, %0|p\t%0, %2}";
15988 #else
15989 if (STACK_TOP_P (operands[0]))
15990 /* As above for fmul/fadd, we can't store to st(0). */
15991 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15992 else
15993 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15994 #endif
15995 break;
15998 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16000 #if SYSV386_COMPAT
16001 if (STACK_TOP_P (operands[0]))
16002 p = "{rp\t%0, %1|p\t%1, %0}";
16003 else
16004 p = "{p\t%1, %0|rp\t%0, %1}";
16005 #else
16006 if (STACK_TOP_P (operands[0]))
16007 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16008 else
16009 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16010 #endif
16011 break;
16014 if (STACK_TOP_P (operands[0]))
16016 if (STACK_TOP_P (operands[1]))
16017 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16018 else
16019 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16020 break;
16022 else if (STACK_TOP_P (operands[1]))
16024 #if SYSV386_COMPAT
16025 p = "{\t%1, %0|r\t%0, %1}";
16026 #else
16027 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16028 #endif
16030 else
16032 #if SYSV386_COMPAT
16033 p = "{r\t%2, %0|\t%0, %2}";
16034 #else
16035 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16036 #endif
16038 break;
16040 default:
16041 gcc_unreachable ();
16044 strcat (buf, p);
16045 return buf;
16048 /* Check if a 256bit AVX register is referenced inside of EXP. */
16050 static int
16051 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16053 rtx exp = *pexp;
16055 if (GET_CODE (exp) == SUBREG)
16056 exp = SUBREG_REG (exp);
16058 if (REG_P (exp)
16059 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16060 return 1;
16062 return 0;
16065 /* Return needed mode for entity in optimize_mode_switching pass. */
16067 static int
16068 ix86_avx_u128_mode_needed (rtx insn)
16070 if (CALL_P (insn))
16072 rtx link;
16074 /* Needed mode is set to AVX_U128_CLEAN if there are
16075 no 256bit modes used in function arguments. */
16076 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16077 link;
16078 link = XEXP (link, 1))
16080 if (GET_CODE (XEXP (link, 0)) == USE)
16082 rtx arg = XEXP (XEXP (link, 0), 0);
16084 if (ix86_check_avx256_register (&arg, NULL))
16085 return AVX_U128_DIRTY;
16089 return AVX_U128_CLEAN;
16092 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16093 changes state only when a 256bit register is written to, but we need
16094 to prevent the compiler from moving optimal insertion point above
16095 eventual read from 256bit register. */
16096 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16097 return AVX_U128_DIRTY;
16099 return AVX_U128_ANY;
16102 /* Return mode that i387 must be switched into
16103 prior to the execution of insn. */
16105 static int
16106 ix86_i387_mode_needed (int entity, rtx insn)
16108 enum attr_i387_cw mode;
16110 /* The mode UNINITIALIZED is used to store control word after a
16111 function call or ASM pattern. The mode ANY specify that function
16112 has no requirements on the control word and make no changes in the
16113 bits we are interested in. */
16115 if (CALL_P (insn)
16116 || (NONJUMP_INSN_P (insn)
16117 && (asm_noperands (PATTERN (insn)) >= 0
16118 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16119 return I387_CW_UNINITIALIZED;
16121 if (recog_memoized (insn) < 0)
16122 return I387_CW_ANY;
16124 mode = get_attr_i387_cw (insn);
16126 switch (entity)
16128 case I387_TRUNC:
16129 if (mode == I387_CW_TRUNC)
16130 return mode;
16131 break;
16133 case I387_FLOOR:
16134 if (mode == I387_CW_FLOOR)
16135 return mode;
16136 break;
16138 case I387_CEIL:
16139 if (mode == I387_CW_CEIL)
16140 return mode;
16141 break;
16143 case I387_MASK_PM:
16144 if (mode == I387_CW_MASK_PM)
16145 return mode;
16146 break;
16148 default:
16149 gcc_unreachable ();
16152 return I387_CW_ANY;
16155 /* Return mode that entity must be switched into
16156 prior to the execution of insn. */
16159 ix86_mode_needed (int entity, rtx insn)
16161 switch (entity)
16163 case AVX_U128:
16164 return ix86_avx_u128_mode_needed (insn);
16165 case I387_TRUNC:
16166 case I387_FLOOR:
16167 case I387_CEIL:
16168 case I387_MASK_PM:
16169 return ix86_i387_mode_needed (entity, insn);
16170 default:
16171 gcc_unreachable ();
16173 return 0;
16176 /* Check if a 256bit AVX register is referenced in stores. */
16178 static void
16179 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16181 if (ix86_check_avx256_register (&dest, NULL))
16183 bool *used = (bool *) data;
16184 *used = true;
16188 /* Calculate mode of upper 128bit AVX registers after the insn. */
16190 static int
16191 ix86_avx_u128_mode_after (int mode, rtx insn)
16193 rtx pat = PATTERN (insn);
16195 if (vzeroupper_operation (pat, VOIDmode)
16196 || vzeroall_operation (pat, VOIDmode))
16197 return AVX_U128_CLEAN;
16199 /* We know that state is clean after CALL insn if there are no
16200 256bit registers used in the function return register. */
16201 if (CALL_P (insn))
16203 bool avx_reg256_found = false;
16204 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16206 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16209 /* Otherwise, return current mode. Remember that if insn
16210 references AVX 256bit registers, the mode was already changed
16211 to DIRTY from MODE_NEEDED. */
16212 return mode;
16215 /* Return the mode that an insn results in. */
16218 ix86_mode_after (int entity, int mode, rtx insn)
16220 switch (entity)
16222 case AVX_U128:
16223 return ix86_avx_u128_mode_after (mode, insn);
16224 case I387_TRUNC:
16225 case I387_FLOOR:
16226 case I387_CEIL:
16227 case I387_MASK_PM:
16228 return mode;
16229 default:
16230 gcc_unreachable ();
16234 static int
16235 ix86_avx_u128_mode_entry (void)
16237 tree arg;
16239 /* Entry mode is set to AVX_U128_DIRTY if there are
16240 256bit modes used in function arguments. */
16241 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16242 arg = TREE_CHAIN (arg))
16244 rtx incoming = DECL_INCOMING_RTL (arg);
16246 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16247 return AVX_U128_DIRTY;
16250 return AVX_U128_CLEAN;
16253 /* Return a mode that ENTITY is assumed to be
16254 switched to at function entry. */
16257 ix86_mode_entry (int entity)
16259 switch (entity)
16261 case AVX_U128:
16262 return ix86_avx_u128_mode_entry ();
16263 case I387_TRUNC:
16264 case I387_FLOOR:
16265 case I387_CEIL:
16266 case I387_MASK_PM:
16267 return I387_CW_ANY;
16268 default:
16269 gcc_unreachable ();
16273 static int
16274 ix86_avx_u128_mode_exit (void)
16276 rtx reg = crtl->return_rtx;
16278 /* Exit mode is set to AVX_U128_DIRTY if there are
16279 256bit modes used in the function return register. */
16280 if (reg && ix86_check_avx256_register (&reg, NULL))
16281 return AVX_U128_DIRTY;
16283 return AVX_U128_CLEAN;
16286 /* Return a mode that ENTITY is assumed to be
16287 switched to at function exit. */
16290 ix86_mode_exit (int entity)
16292 switch (entity)
16294 case AVX_U128:
16295 return ix86_avx_u128_mode_exit ();
16296 case I387_TRUNC:
16297 case I387_FLOOR:
16298 case I387_CEIL:
16299 case I387_MASK_PM:
16300 return I387_CW_ANY;
16301 default:
16302 gcc_unreachable ();
16306 /* Output code to initialize control word copies used by trunc?f?i and
16307 rounding patterns. CURRENT_MODE is set to current control word,
16308 while NEW_MODE is set to new control word. */
16310 static void
16311 emit_i387_cw_initialization (int mode)
16313 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16314 rtx new_mode;
16316 enum ix86_stack_slot slot;
16318 rtx reg = gen_reg_rtx (HImode);
16320 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16321 emit_move_insn (reg, copy_rtx (stored_mode));
16323 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16324 || optimize_insn_for_size_p ())
16326 switch (mode)
16328 case I387_CW_TRUNC:
16329 /* round toward zero (truncate) */
16330 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16331 slot = SLOT_CW_TRUNC;
16332 break;
16334 case I387_CW_FLOOR:
16335 /* round down toward -oo */
16336 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16337 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16338 slot = SLOT_CW_FLOOR;
16339 break;
16341 case I387_CW_CEIL:
16342 /* round up toward +oo */
16343 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16344 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16345 slot = SLOT_CW_CEIL;
16346 break;
16348 case I387_CW_MASK_PM:
16349 /* mask precision exception for nearbyint() */
16350 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16351 slot = SLOT_CW_MASK_PM;
16352 break;
16354 default:
16355 gcc_unreachable ();
16358 else
16360 switch (mode)
16362 case I387_CW_TRUNC:
16363 /* round toward zero (truncate) */
16364 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16365 slot = SLOT_CW_TRUNC;
16366 break;
16368 case I387_CW_FLOOR:
16369 /* round down toward -oo */
16370 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16371 slot = SLOT_CW_FLOOR;
16372 break;
16374 case I387_CW_CEIL:
16375 /* round up toward +oo */
16376 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16377 slot = SLOT_CW_CEIL;
16378 break;
16380 case I387_CW_MASK_PM:
16381 /* mask precision exception for nearbyint() */
16382 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16383 slot = SLOT_CW_MASK_PM;
16384 break;
16386 default:
16387 gcc_unreachable ();
16391 gcc_assert (slot < MAX_386_STACK_LOCALS);
16393 new_mode = assign_386_stack_local (HImode, slot);
16394 emit_move_insn (new_mode, reg);
16397 /* Emit vzeroupper. */
16399 void
16400 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16402 int i;
16404 /* Cancel automatic vzeroupper insertion if there are
16405 live call-saved SSE registers at the insertion point. */
16407 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16408 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16409 return;
16411 if (TARGET_64BIT)
16412 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16413 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16414 return;
16416 emit_insn (gen_avx_vzeroupper ());
16419 /* Generate one or more insns to set ENTITY to MODE. */
16421 void
16422 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16424 switch (entity)
16426 case AVX_U128:
16427 if (mode == AVX_U128_CLEAN)
16428 ix86_avx_emit_vzeroupper (regs_live);
16429 break;
16430 case I387_TRUNC:
16431 case I387_FLOOR:
16432 case I387_CEIL:
16433 case I387_MASK_PM:
16434 if (mode != I387_CW_ANY
16435 && mode != I387_CW_UNINITIALIZED)
16436 emit_i387_cw_initialization (mode);
16437 break;
16438 default:
16439 gcc_unreachable ();
16443 /* Output code for INSN to convert a float to a signed int. OPERANDS
16444 are the insn operands. The output may be [HSD]Imode and the input
16445 operand may be [SDX]Fmode. */
16447 const char *
16448 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16450 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16451 int dimode_p = GET_MODE (operands[0]) == DImode;
16452 int round_mode = get_attr_i387_cw (insn);
16454 /* Jump through a hoop or two for DImode, since the hardware has no
16455 non-popping instruction. We used to do this a different way, but
16456 that was somewhat fragile and broke with post-reload splitters. */
16457 if ((dimode_p || fisttp) && !stack_top_dies)
16458 output_asm_insn ("fld\t%y1", operands);
16460 gcc_assert (STACK_TOP_P (operands[1]));
16461 gcc_assert (MEM_P (operands[0]));
16462 gcc_assert (GET_MODE (operands[1]) != TFmode);
16464 if (fisttp)
16465 output_asm_insn ("fisttp%Z0\t%0", operands);
16466 else
16468 if (round_mode != I387_CW_ANY)
16469 output_asm_insn ("fldcw\t%3", operands);
16470 if (stack_top_dies || dimode_p)
16471 output_asm_insn ("fistp%Z0\t%0", operands);
16472 else
16473 output_asm_insn ("fist%Z0\t%0", operands);
16474 if (round_mode != I387_CW_ANY)
16475 output_asm_insn ("fldcw\t%2", operands);
16478 return "";
16481 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16482 have the values zero or one, indicates the ffreep insn's operand
16483 from the OPERANDS array. */
16485 static const char *
16486 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16488 if (TARGET_USE_FFREEP)
16489 #ifdef HAVE_AS_IX86_FFREEP
16490 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16491 #else
16493 static char retval[32];
16494 int regno = REGNO (operands[opno]);
16496 gcc_assert (STACK_REGNO_P (regno));
16498 regno -= FIRST_STACK_REG;
16500 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16501 return retval;
16503 #endif
16505 return opno ? "fstp\t%y1" : "fstp\t%y0";
16509 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16510 should be used. UNORDERED_P is true when fucom should be used. */
16512 const char *
16513 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16515 int stack_top_dies;
16516 rtx cmp_op0, cmp_op1;
16517 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16519 if (eflags_p)
16521 cmp_op0 = operands[0];
16522 cmp_op1 = operands[1];
16524 else
16526 cmp_op0 = operands[1];
16527 cmp_op1 = operands[2];
16530 if (is_sse)
16532 if (GET_MODE (operands[0]) == SFmode)
16533 if (unordered_p)
16534 return "%vucomiss\t{%1, %0|%0, %1}";
16535 else
16536 return "%vcomiss\t{%1, %0|%0, %1}";
16537 else
16538 if (unordered_p)
16539 return "%vucomisd\t{%1, %0|%0, %1}";
16540 else
16541 return "%vcomisd\t{%1, %0|%0, %1}";
16544 gcc_assert (STACK_TOP_P (cmp_op0));
16546 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16548 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16550 if (stack_top_dies)
16552 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16553 return output_387_ffreep (operands, 1);
16555 else
16556 return "ftst\n\tfnstsw\t%0";
16559 if (STACK_REG_P (cmp_op1)
16560 && stack_top_dies
16561 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16562 && REGNO (cmp_op1) != FIRST_STACK_REG)
16564 /* If both the top of the 387 stack dies, and the other operand
16565 is also a stack register that dies, then this must be a
16566 `fcompp' float compare */
16568 if (eflags_p)
16570 /* There is no double popping fcomi variant. Fortunately,
16571 eflags is immune from the fstp's cc clobbering. */
16572 if (unordered_p)
16573 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16574 else
16575 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16576 return output_387_ffreep (operands, 0);
16578 else
16580 if (unordered_p)
16581 return "fucompp\n\tfnstsw\t%0";
16582 else
16583 return "fcompp\n\tfnstsw\t%0";
16586 else
16588 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16590 static const char * const alt[16] =
16592 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16593 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16594 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16595 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16597 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16598 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16599 NULL,
16600 NULL,
16602 "fcomi\t{%y1, %0|%0, %y1}",
16603 "fcomip\t{%y1, %0|%0, %y1}",
16604 "fucomi\t{%y1, %0|%0, %y1}",
16605 "fucomip\t{%y1, %0|%0, %y1}",
16607 NULL,
16608 NULL,
16609 NULL,
16610 NULL
16613 int mask;
16614 const char *ret;
16616 mask = eflags_p << 3;
16617 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16618 mask |= unordered_p << 1;
16619 mask |= stack_top_dies;
16621 gcc_assert (mask < 16);
16622 ret = alt[mask];
16623 gcc_assert (ret);
16625 return ret;
16629 void
16630 ix86_output_addr_vec_elt (FILE *file, int value)
16632 const char *directive = ASM_LONG;
16634 #ifdef ASM_QUAD
16635 if (TARGET_LP64)
16636 directive = ASM_QUAD;
16637 #else
16638 gcc_assert (!TARGET_64BIT);
16639 #endif
16641 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16644 void
16645 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16647 const char *directive = ASM_LONG;
16649 #ifdef ASM_QUAD
16650 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16651 directive = ASM_QUAD;
16652 #else
16653 gcc_assert (!TARGET_64BIT);
16654 #endif
16655 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16656 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16657 fprintf (file, "%s%s%d-%s%d\n",
16658 directive, LPREFIX, value, LPREFIX, rel);
16659 else if (HAVE_AS_GOTOFF_IN_DATA)
16660 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16661 #if TARGET_MACHO
16662 else if (TARGET_MACHO)
16664 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16665 machopic_output_function_base_name (file);
16666 putc ('\n', file);
16668 #endif
16669 else
16670 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16671 GOT_SYMBOL_NAME, LPREFIX, value);
16674 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16675 for the target. */
16677 void
16678 ix86_expand_clear (rtx dest)
16680 rtx tmp;
16682 /* We play register width games, which are only valid after reload. */
16683 gcc_assert (reload_completed);
16685 /* Avoid HImode and its attendant prefix byte. */
16686 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16687 dest = gen_rtx_REG (SImode, REGNO (dest));
16688 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16690 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16691 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16693 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16694 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16697 emit_insn (tmp);
16700 /* X is an unchanging MEM. If it is a constant pool reference, return
16701 the constant pool rtx, else NULL. */
16704 maybe_get_pool_constant (rtx x)
16706 x = ix86_delegitimize_address (XEXP (x, 0));
16708 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16709 return get_pool_constant (x);
16711 return NULL_RTX;
16714 void
16715 ix86_expand_move (enum machine_mode mode, rtx operands[])
16717 rtx op0, op1;
16718 enum tls_model model;
16720 op0 = operands[0];
16721 op1 = operands[1];
16723 if (GET_CODE (op1) == SYMBOL_REF)
16725 rtx tmp;
16727 model = SYMBOL_REF_TLS_MODEL (op1);
16728 if (model)
16730 op1 = legitimize_tls_address (op1, model, true);
16731 op1 = force_operand (op1, op0);
16732 if (op1 == op0)
16733 return;
16734 op1 = convert_to_mode (mode, op1, 1);
16736 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16737 op1 = tmp;
16739 else if (GET_CODE (op1) == CONST
16740 && GET_CODE (XEXP (op1, 0)) == PLUS
16741 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16743 rtx addend = XEXP (XEXP (op1, 0), 1);
16744 rtx symbol = XEXP (XEXP (op1, 0), 0);
16745 rtx tmp;
16747 model = SYMBOL_REF_TLS_MODEL (symbol);
16748 if (model)
16749 tmp = legitimize_tls_address (symbol, model, true);
16750 else
16751 tmp = legitimize_pe_coff_symbol (symbol, true);
16753 if (tmp)
16755 tmp = force_operand (tmp, NULL);
16756 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16757 op0, 1, OPTAB_DIRECT);
16758 if (tmp == op0)
16759 return;
16760 op1 = convert_to_mode (mode, tmp, 1);
16764 if ((flag_pic || MACHOPIC_INDIRECT)
16765 && symbolic_operand (op1, mode))
16767 if (TARGET_MACHO && !TARGET_64BIT)
16769 #if TARGET_MACHO
16770 /* dynamic-no-pic */
16771 if (MACHOPIC_INDIRECT)
16773 rtx temp = ((reload_in_progress
16774 || ((op0 && REG_P (op0))
16775 && mode == Pmode))
16776 ? op0 : gen_reg_rtx (Pmode));
16777 op1 = machopic_indirect_data_reference (op1, temp);
16778 if (MACHOPIC_PURE)
16779 op1 = machopic_legitimize_pic_address (op1, mode,
16780 temp == op1 ? 0 : temp);
16782 if (op0 != op1 && GET_CODE (op0) != MEM)
16784 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16785 emit_insn (insn);
16786 return;
16788 if (GET_CODE (op0) == MEM)
16789 op1 = force_reg (Pmode, op1);
16790 else
16792 rtx temp = op0;
16793 if (GET_CODE (temp) != REG)
16794 temp = gen_reg_rtx (Pmode);
16795 temp = legitimize_pic_address (op1, temp);
16796 if (temp == op0)
16797 return;
16798 op1 = temp;
16800 /* dynamic-no-pic */
16801 #endif
16803 else
16805 if (MEM_P (op0))
16806 op1 = force_reg (mode, op1);
16807 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16809 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16810 op1 = legitimize_pic_address (op1, reg);
16811 if (op0 == op1)
16812 return;
16813 op1 = convert_to_mode (mode, op1, 1);
16817 else
16819 if (MEM_P (op0)
16820 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16821 || !push_operand (op0, mode))
16822 && MEM_P (op1))
16823 op1 = force_reg (mode, op1);
16825 if (push_operand (op0, mode)
16826 && ! general_no_elim_operand (op1, mode))
16827 op1 = copy_to_mode_reg (mode, op1);
16829 /* Force large constants in 64bit compilation into register
16830 to get them CSEed. */
16831 if (can_create_pseudo_p ()
16832 && (mode == DImode) && TARGET_64BIT
16833 && immediate_operand (op1, mode)
16834 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16835 && !register_operand (op0, mode)
16836 && optimize)
16837 op1 = copy_to_mode_reg (mode, op1);
16839 if (can_create_pseudo_p ()
16840 && FLOAT_MODE_P (mode)
16841 && GET_CODE (op1) == CONST_DOUBLE)
16843 /* If we are loading a floating point constant to a register,
16844 force the value to memory now, since we'll get better code
16845 out the back end. */
16847 op1 = validize_mem (force_const_mem (mode, op1));
16848 if (!register_operand (op0, mode))
16850 rtx temp = gen_reg_rtx (mode);
16851 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16852 emit_move_insn (op0, temp);
16853 return;
16858 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16861 void
16862 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16864 rtx op0 = operands[0], op1 = operands[1];
16865 unsigned int align = GET_MODE_ALIGNMENT (mode);
16867 if (push_operand (op0, VOIDmode))
16868 op0 = emit_move_resolve_push (mode, op0);
16870 /* Force constants other than zero into memory. We do not know how
16871 the instructions used to build constants modify the upper 64 bits
16872 of the register, once we have that information we may be able
16873 to handle some of them more efficiently. */
16874 if (can_create_pseudo_p ()
16875 && register_operand (op0, mode)
16876 && (CONSTANT_P (op1)
16877 || (GET_CODE (op1) == SUBREG
16878 && CONSTANT_P (SUBREG_REG (op1))))
16879 && !standard_sse_constant_p (op1))
16880 op1 = validize_mem (force_const_mem (mode, op1));
16882 /* We need to check memory alignment for SSE mode since attribute
16883 can make operands unaligned. */
16884 if (can_create_pseudo_p ()
16885 && SSE_REG_MODE_P (mode)
16886 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16887 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16889 rtx tmp[2];
16891 /* ix86_expand_vector_move_misalign() does not like constants ... */
16892 if (CONSTANT_P (op1)
16893 || (GET_CODE (op1) == SUBREG
16894 && CONSTANT_P (SUBREG_REG (op1))))
16895 op1 = validize_mem (force_const_mem (mode, op1));
16897 /* ... nor both arguments in memory. */
16898 if (!register_operand (op0, mode)
16899 && !register_operand (op1, mode))
16900 op1 = force_reg (mode, op1);
16902 tmp[0] = op0; tmp[1] = op1;
16903 ix86_expand_vector_move_misalign (mode, tmp);
16904 return;
16907 /* Make operand1 a register if it isn't already. */
16908 if (can_create_pseudo_p ()
16909 && !register_operand (op0, mode)
16910 && !register_operand (op1, mode))
16912 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16913 return;
16916 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16919 /* Split 32-byte AVX unaligned load and store if needed. */
16921 static void
16922 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16924 rtx m;
16925 rtx (*extract) (rtx, rtx, rtx);
16926 rtx (*load_unaligned) (rtx, rtx);
16927 rtx (*store_unaligned) (rtx, rtx);
16928 enum machine_mode mode;
16930 switch (GET_MODE (op0))
16932 default:
16933 gcc_unreachable ();
16934 case V32QImode:
16935 extract = gen_avx_vextractf128v32qi;
16936 load_unaligned = gen_avx_loaddquv32qi;
16937 store_unaligned = gen_avx_storedquv32qi;
16938 mode = V16QImode;
16939 break;
16940 case V8SFmode:
16941 extract = gen_avx_vextractf128v8sf;
16942 load_unaligned = gen_avx_loadups256;
16943 store_unaligned = gen_avx_storeups256;
16944 mode = V4SFmode;
16945 break;
16946 case V4DFmode:
16947 extract = gen_avx_vextractf128v4df;
16948 load_unaligned = gen_avx_loadupd256;
16949 store_unaligned = gen_avx_storeupd256;
16950 mode = V2DFmode;
16951 break;
16954 if (MEM_P (op1))
16956 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16958 rtx r = gen_reg_rtx (mode);
16959 m = adjust_address (op1, mode, 0);
16960 emit_move_insn (r, m);
16961 m = adjust_address (op1, mode, 16);
16962 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16963 emit_move_insn (op0, r);
16965 /* Normal *mov<mode>_internal pattern will handle
16966 unaligned loads just fine if misaligned_operand
16967 is true, and without the UNSPEC it can be combined
16968 with arithmetic instructions. */
16969 else if (misaligned_operand (op1, GET_MODE (op1)))
16970 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16971 else
16972 emit_insn (load_unaligned (op0, op1));
16974 else if (MEM_P (op0))
16976 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16978 m = adjust_address (op0, mode, 0);
16979 emit_insn (extract (m, op1, const0_rtx));
16980 m = adjust_address (op0, mode, 16);
16981 emit_insn (extract (m, op1, const1_rtx));
16983 else
16984 emit_insn (store_unaligned (op0, op1));
16986 else
16987 gcc_unreachable ();
16990 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16991 straight to ix86_expand_vector_move. */
16992 /* Code generation for scalar reg-reg moves of single and double precision data:
16993 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16994 movaps reg, reg
16995 else
16996 movss reg, reg
16997 if (x86_sse_partial_reg_dependency == true)
16998 movapd reg, reg
16999 else
17000 movsd reg, reg
17002 Code generation for scalar loads of double precision data:
17003 if (x86_sse_split_regs == true)
17004 movlpd mem, reg (gas syntax)
17005 else
17006 movsd mem, reg
17008 Code generation for unaligned packed loads of single precision data
17009 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17010 if (x86_sse_unaligned_move_optimal)
17011 movups mem, reg
17013 if (x86_sse_partial_reg_dependency == true)
17015 xorps reg, reg
17016 movlps mem, reg
17017 movhps mem+8, reg
17019 else
17021 movlps mem, reg
17022 movhps mem+8, reg
17025 Code generation for unaligned packed loads of double precision data
17026 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17027 if (x86_sse_unaligned_move_optimal)
17028 movupd mem, reg
17030 if (x86_sse_split_regs == true)
17032 movlpd mem, reg
17033 movhpd mem+8, reg
17035 else
17037 movsd mem, reg
17038 movhpd mem+8, reg
17042 void
17043 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17045 rtx op0, op1, orig_op0 = NULL_RTX, m;
17046 rtx (*load_unaligned) (rtx, rtx);
17047 rtx (*store_unaligned) (rtx, rtx);
17049 op0 = operands[0];
17050 op1 = operands[1];
17052 if (GET_MODE_SIZE (mode) == 64)
17054 switch (GET_MODE_CLASS (mode))
17056 case MODE_VECTOR_INT:
17057 case MODE_INT:
17058 if (GET_MODE (op0) != V16SImode)
17060 if (!MEM_P (op0))
17062 orig_op0 = op0;
17063 op0 = gen_reg_rtx (V16SImode);
17065 else
17066 op0 = gen_lowpart (V16SImode, op0);
17068 op1 = gen_lowpart (V16SImode, op1);
17069 /* FALLTHRU */
17071 case MODE_VECTOR_FLOAT:
17072 switch (GET_MODE (op0))
17074 default:
17075 gcc_unreachable ();
17076 case V16SImode:
17077 load_unaligned = gen_avx512f_loaddquv16si;
17078 store_unaligned = gen_avx512f_storedquv16si;
17079 break;
17080 case V16SFmode:
17081 load_unaligned = gen_avx512f_loadups512;
17082 store_unaligned = gen_avx512f_storeups512;
17083 break;
17084 case V8DFmode:
17085 load_unaligned = gen_avx512f_loadupd512;
17086 store_unaligned = gen_avx512f_storeupd512;
17087 break;
17090 if (MEM_P (op1))
17091 emit_insn (load_unaligned (op0, op1));
17092 else if (MEM_P (op0))
17093 emit_insn (store_unaligned (op0, op1));
17094 else
17095 gcc_unreachable ();
17096 if (orig_op0)
17097 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17098 break;
17100 default:
17101 gcc_unreachable ();
17104 return;
17107 if (TARGET_AVX
17108 && GET_MODE_SIZE (mode) == 32)
17110 switch (GET_MODE_CLASS (mode))
17112 case MODE_VECTOR_INT:
17113 case MODE_INT:
17114 if (GET_MODE (op0) != V32QImode)
17116 if (!MEM_P (op0))
17118 orig_op0 = op0;
17119 op0 = gen_reg_rtx (V32QImode);
17121 else
17122 op0 = gen_lowpart (V32QImode, op0);
17124 op1 = gen_lowpart (V32QImode, op1);
17125 /* FALLTHRU */
17127 case MODE_VECTOR_FLOAT:
17128 ix86_avx256_split_vector_move_misalign (op0, op1);
17129 if (orig_op0)
17130 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17131 break;
17133 default:
17134 gcc_unreachable ();
17137 return;
17140 if (MEM_P (op1))
17142 /* Normal *mov<mode>_internal pattern will handle
17143 unaligned loads just fine if misaligned_operand
17144 is true, and without the UNSPEC it can be combined
17145 with arithmetic instructions. */
17146 if (TARGET_AVX
17147 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17148 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17149 && misaligned_operand (op1, GET_MODE (op1)))
17150 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17151 /* ??? If we have typed data, then it would appear that using
17152 movdqu is the only way to get unaligned data loaded with
17153 integer type. */
17154 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17156 if (GET_MODE (op0) != V16QImode)
17158 orig_op0 = op0;
17159 op0 = gen_reg_rtx (V16QImode);
17161 op1 = gen_lowpart (V16QImode, op1);
17162 /* We will eventually emit movups based on insn attributes. */
17163 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17164 if (orig_op0)
17165 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17167 else if (TARGET_SSE2 && mode == V2DFmode)
17169 rtx zero;
17171 if (TARGET_AVX
17172 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17173 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17174 || optimize_insn_for_size_p ())
17176 /* We will eventually emit movups based on insn attributes. */
17177 emit_insn (gen_sse2_loadupd (op0, op1));
17178 return;
17181 /* When SSE registers are split into halves, we can avoid
17182 writing to the top half twice. */
17183 if (TARGET_SSE_SPLIT_REGS)
17185 emit_clobber (op0);
17186 zero = op0;
17188 else
17190 /* ??? Not sure about the best option for the Intel chips.
17191 The following would seem to satisfy; the register is
17192 entirely cleared, breaking the dependency chain. We
17193 then store to the upper half, with a dependency depth
17194 of one. A rumor has it that Intel recommends two movsd
17195 followed by an unpacklpd, but this is unconfirmed. And
17196 given that the dependency depth of the unpacklpd would
17197 still be one, I'm not sure why this would be better. */
17198 zero = CONST0_RTX (V2DFmode);
17201 m = adjust_address (op1, DFmode, 0);
17202 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17203 m = adjust_address (op1, DFmode, 8);
17204 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17206 else
17208 rtx t;
17210 if (TARGET_AVX
17211 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17212 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17213 || optimize_insn_for_size_p ())
17215 if (GET_MODE (op0) != V4SFmode)
17217 orig_op0 = op0;
17218 op0 = gen_reg_rtx (V4SFmode);
17220 op1 = gen_lowpart (V4SFmode, op1);
17221 emit_insn (gen_sse_loadups (op0, op1));
17222 if (orig_op0)
17223 emit_move_insn (orig_op0,
17224 gen_lowpart (GET_MODE (orig_op0), op0));
17225 return;
17228 if (mode != V4SFmode)
17229 t = gen_reg_rtx (V4SFmode);
17230 else
17231 t = op0;
17233 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17234 emit_move_insn (t, CONST0_RTX (V4SFmode));
17235 else
17236 emit_clobber (t);
17238 m = adjust_address (op1, V2SFmode, 0);
17239 emit_insn (gen_sse_loadlps (t, t, m));
17240 m = adjust_address (op1, V2SFmode, 8);
17241 emit_insn (gen_sse_loadhps (t, t, m));
17242 if (mode != V4SFmode)
17243 emit_move_insn (op0, gen_lowpart (mode, t));
17246 else if (MEM_P (op0))
17248 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17250 op0 = gen_lowpart (V16QImode, op0);
17251 op1 = gen_lowpart (V16QImode, op1);
17252 /* We will eventually emit movups based on insn attributes. */
17253 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17255 else if (TARGET_SSE2 && mode == V2DFmode)
17257 if (TARGET_AVX
17258 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17259 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17260 || optimize_insn_for_size_p ())
17261 /* We will eventually emit movups based on insn attributes. */
17262 emit_insn (gen_sse2_storeupd (op0, op1));
17263 else
17265 m = adjust_address (op0, DFmode, 0);
17266 emit_insn (gen_sse2_storelpd (m, op1));
17267 m = adjust_address (op0, DFmode, 8);
17268 emit_insn (gen_sse2_storehpd (m, op1));
17271 else
17273 if (mode != V4SFmode)
17274 op1 = gen_lowpart (V4SFmode, op1);
17276 if (TARGET_AVX
17277 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17278 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17279 || optimize_insn_for_size_p ())
17281 op0 = gen_lowpart (V4SFmode, op0);
17282 emit_insn (gen_sse_storeups (op0, op1));
17284 else
17286 m = adjust_address (op0, V2SFmode, 0);
17287 emit_insn (gen_sse_storelps (m, op1));
17288 m = adjust_address (op0, V2SFmode, 8);
17289 emit_insn (gen_sse_storehps (m, op1));
17293 else
17294 gcc_unreachable ();
17297 /* Helper function of ix86_fixup_binary_operands to canonicalize
17298 operand order. Returns true if the operands should be swapped. */
17300 static bool
17301 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17302 rtx operands[])
17304 rtx dst = operands[0];
17305 rtx src1 = operands[1];
17306 rtx src2 = operands[2];
17308 /* If the operation is not commutative, we can't do anything. */
17309 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17310 return false;
17312 /* Highest priority is that src1 should match dst. */
17313 if (rtx_equal_p (dst, src1))
17314 return false;
17315 if (rtx_equal_p (dst, src2))
17316 return true;
17318 /* Next highest priority is that immediate constants come second. */
17319 if (immediate_operand (src2, mode))
17320 return false;
17321 if (immediate_operand (src1, mode))
17322 return true;
17324 /* Lowest priority is that memory references should come second. */
17325 if (MEM_P (src2))
17326 return false;
17327 if (MEM_P (src1))
17328 return true;
17330 return false;
17334 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17335 destination to use for the operation. If different from the true
17336 destination in operands[0], a copy operation will be required. */
17339 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17340 rtx operands[])
17342 rtx dst = operands[0];
17343 rtx src1 = operands[1];
17344 rtx src2 = operands[2];
17346 /* Canonicalize operand order. */
17347 if (ix86_swap_binary_operands_p (code, mode, operands))
17349 rtx temp;
17351 /* It is invalid to swap operands of different modes. */
17352 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17354 temp = src1;
17355 src1 = src2;
17356 src2 = temp;
17359 /* Both source operands cannot be in memory. */
17360 if (MEM_P (src1) && MEM_P (src2))
17362 /* Optimization: Only read from memory once. */
17363 if (rtx_equal_p (src1, src2))
17365 src2 = force_reg (mode, src2);
17366 src1 = src2;
17368 else if (rtx_equal_p (dst, src1))
17369 src2 = force_reg (mode, src2);
17370 else
17371 src1 = force_reg (mode, src1);
17374 /* If the destination is memory, and we do not have matching source
17375 operands, do things in registers. */
17376 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17377 dst = gen_reg_rtx (mode);
17379 /* Source 1 cannot be a constant. */
17380 if (CONSTANT_P (src1))
17381 src1 = force_reg (mode, src1);
17383 /* Source 1 cannot be a non-matching memory. */
17384 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17385 src1 = force_reg (mode, src1);
17387 /* Improve address combine. */
17388 if (code == PLUS
17389 && GET_MODE_CLASS (mode) == MODE_INT
17390 && MEM_P (src2))
17391 src2 = force_reg (mode, src2);
17393 operands[1] = src1;
17394 operands[2] = src2;
17395 return dst;
17398 /* Similarly, but assume that the destination has already been
17399 set up properly. */
17401 void
17402 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17403 enum machine_mode mode, rtx operands[])
17405 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17406 gcc_assert (dst == operands[0]);
17409 /* Attempt to expand a binary operator. Make the expansion closer to the
17410 actual machine, then just general_operand, which will allow 3 separate
17411 memory references (one output, two input) in a single insn. */
17413 void
17414 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17415 rtx operands[])
17417 rtx src1, src2, dst, op, clob;
17419 dst = ix86_fixup_binary_operands (code, mode, operands);
17420 src1 = operands[1];
17421 src2 = operands[2];
17423 /* Emit the instruction. */
17425 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17426 if (reload_in_progress)
17428 /* Reload doesn't know about the flags register, and doesn't know that
17429 it doesn't want to clobber it. We can only do this with PLUS. */
17430 gcc_assert (code == PLUS);
17431 emit_insn (op);
17433 else if (reload_completed
17434 && code == PLUS
17435 && !rtx_equal_p (dst, src1))
17437 /* This is going to be an LEA; avoid splitting it later. */
17438 emit_insn (op);
17440 else
17442 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17443 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17446 /* Fix up the destination if needed. */
17447 if (dst != operands[0])
17448 emit_move_insn (operands[0], dst);
17451 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17452 the given OPERANDS. */
17454 void
17455 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17456 rtx operands[])
17458 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17459 if (GET_CODE (operands[1]) == SUBREG)
17461 op1 = operands[1];
17462 op2 = operands[2];
17464 else if (GET_CODE (operands[2]) == SUBREG)
17466 op1 = operands[2];
17467 op2 = operands[1];
17469 /* Optimize (__m128i) d | (__m128i) e and similar code
17470 when d and e are float vectors into float vector logical
17471 insn. In C/C++ without using intrinsics there is no other way
17472 to express vector logical operation on float vectors than
17473 to cast them temporarily to integer vectors. */
17474 if (op1
17475 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17476 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17477 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17478 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17479 && SUBREG_BYTE (op1) == 0
17480 && (GET_CODE (op2) == CONST_VECTOR
17481 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17482 && SUBREG_BYTE (op2) == 0))
17483 && can_create_pseudo_p ())
17485 rtx dst;
17486 switch (GET_MODE (SUBREG_REG (op1)))
17488 case V4SFmode:
17489 case V8SFmode:
17490 case V2DFmode:
17491 case V4DFmode:
17492 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17493 if (GET_CODE (op2) == CONST_VECTOR)
17495 op2 = gen_lowpart (GET_MODE (dst), op2);
17496 op2 = force_reg (GET_MODE (dst), op2);
17498 else
17500 op1 = operands[1];
17501 op2 = SUBREG_REG (operands[2]);
17502 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17503 op2 = force_reg (GET_MODE (dst), op2);
17505 op1 = SUBREG_REG (op1);
17506 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17507 op1 = force_reg (GET_MODE (dst), op1);
17508 emit_insn (gen_rtx_SET (VOIDmode, dst,
17509 gen_rtx_fmt_ee (code, GET_MODE (dst),
17510 op1, op2)));
17511 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17512 return;
17513 default:
17514 break;
17517 if (!nonimmediate_operand (operands[1], mode))
17518 operands[1] = force_reg (mode, operands[1]);
17519 if (!nonimmediate_operand (operands[2], mode))
17520 operands[2] = force_reg (mode, operands[2]);
17521 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17522 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17523 gen_rtx_fmt_ee (code, mode, operands[1],
17524 operands[2])));
17527 /* Return TRUE or FALSE depending on whether the binary operator meets the
17528 appropriate constraints. */
17530 bool
17531 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17532 rtx operands[3])
17534 rtx dst = operands[0];
17535 rtx src1 = operands[1];
17536 rtx src2 = operands[2];
17538 /* Both source operands cannot be in memory. */
17539 if (MEM_P (src1) && MEM_P (src2))
17540 return false;
17542 /* Canonicalize operand order for commutative operators. */
17543 if (ix86_swap_binary_operands_p (code, mode, operands))
17545 rtx temp = src1;
17546 src1 = src2;
17547 src2 = temp;
17550 /* If the destination is memory, we must have a matching source operand. */
17551 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17552 return false;
17554 /* Source 1 cannot be a constant. */
17555 if (CONSTANT_P (src1))
17556 return false;
17558 /* Source 1 cannot be a non-matching memory. */
17559 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17560 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17561 return (code == AND
17562 && (mode == HImode
17563 || mode == SImode
17564 || (TARGET_64BIT && mode == DImode))
17565 && satisfies_constraint_L (src2));
17567 return true;
17570 /* Attempt to expand a unary operator. Make the expansion closer to the
17571 actual machine, then just general_operand, which will allow 2 separate
17572 memory references (one output, one input) in a single insn. */
17574 void
17575 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17576 rtx operands[])
17578 int matching_memory;
17579 rtx src, dst, op, clob;
17581 dst = operands[0];
17582 src = operands[1];
17584 /* If the destination is memory, and we do not have matching source
17585 operands, do things in registers. */
17586 matching_memory = 0;
17587 if (MEM_P (dst))
17589 if (rtx_equal_p (dst, src))
17590 matching_memory = 1;
17591 else
17592 dst = gen_reg_rtx (mode);
17595 /* When source operand is memory, destination must match. */
17596 if (MEM_P (src) && !matching_memory)
17597 src = force_reg (mode, src);
17599 /* Emit the instruction. */
17601 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17602 if (reload_in_progress || code == NOT)
17604 /* Reload doesn't know about the flags register, and doesn't know that
17605 it doesn't want to clobber it. */
17606 gcc_assert (code == NOT);
17607 emit_insn (op);
17609 else
17611 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17612 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17615 /* Fix up the destination if needed. */
17616 if (dst != operands[0])
17617 emit_move_insn (operands[0], dst);
17620 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17621 divisor are within the range [0-255]. */
17623 void
17624 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17625 bool signed_p)
17627 rtx end_label, qimode_label;
17628 rtx insn, div, mod;
17629 rtx scratch, tmp0, tmp1, tmp2;
17630 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17631 rtx (*gen_zero_extend) (rtx, rtx);
17632 rtx (*gen_test_ccno_1) (rtx, rtx);
17634 switch (mode)
17636 case SImode:
17637 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17638 gen_test_ccno_1 = gen_testsi_ccno_1;
17639 gen_zero_extend = gen_zero_extendqisi2;
17640 break;
17641 case DImode:
17642 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17643 gen_test_ccno_1 = gen_testdi_ccno_1;
17644 gen_zero_extend = gen_zero_extendqidi2;
17645 break;
17646 default:
17647 gcc_unreachable ();
17650 end_label = gen_label_rtx ();
17651 qimode_label = gen_label_rtx ();
17653 scratch = gen_reg_rtx (mode);
17655 /* Use 8bit unsigned divimod if dividend and divisor are within
17656 the range [0-255]. */
17657 emit_move_insn (scratch, operands[2]);
17658 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17659 scratch, 1, OPTAB_DIRECT);
17660 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17661 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17662 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17663 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17664 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17665 pc_rtx);
17666 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17667 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17668 JUMP_LABEL (insn) = qimode_label;
17670 /* Generate original signed/unsigned divimod. */
17671 div = gen_divmod4_1 (operands[0], operands[1],
17672 operands[2], operands[3]);
17673 emit_insn (div);
17675 /* Branch to the end. */
17676 emit_jump_insn (gen_jump (end_label));
17677 emit_barrier ();
17679 /* Generate 8bit unsigned divide. */
17680 emit_label (qimode_label);
17681 /* Don't use operands[0] for result of 8bit divide since not all
17682 registers support QImode ZERO_EXTRACT. */
17683 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17684 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17685 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17686 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17688 if (signed_p)
17690 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17691 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17693 else
17695 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17696 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17699 /* Extract remainder from AH. */
17700 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17701 if (REG_P (operands[1]))
17702 insn = emit_move_insn (operands[1], tmp1);
17703 else
17705 /* Need a new scratch register since the old one has result
17706 of 8bit divide. */
17707 scratch = gen_reg_rtx (mode);
17708 emit_move_insn (scratch, tmp1);
17709 insn = emit_move_insn (operands[1], scratch);
17711 set_unique_reg_note (insn, REG_EQUAL, mod);
17713 /* Zero extend quotient from AL. */
17714 tmp1 = gen_lowpart (QImode, tmp0);
17715 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17716 set_unique_reg_note (insn, REG_EQUAL, div);
17718 emit_label (end_label);
17721 /* Whether it is OK to emit CFI directives when emitting asm code. */
17723 bool
17724 ix86_emit_cfi ()
17726 return dwarf2out_do_cfi_asm ();
17729 #define LEA_MAX_STALL (3)
17730 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17732 /* Increase given DISTANCE in half-cycles according to
17733 dependencies between PREV and NEXT instructions.
17734 Add 1 half-cycle if there is no dependency and
17735 go to next cycle if there is some dependecy. */
17737 static unsigned int
17738 increase_distance (rtx prev, rtx next, unsigned int distance)
17740 df_ref *use_rec;
17741 df_ref *def_rec;
17743 if (!prev || !next)
17744 return distance + (distance & 1) + 2;
17746 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17747 return distance + 1;
17749 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17750 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17751 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17752 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17753 return distance + (distance & 1) + 2;
17755 return distance + 1;
17758 /* Function checks if instruction INSN defines register number
17759 REGNO1 or REGNO2. */
17761 static bool
17762 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17763 rtx insn)
17765 df_ref *def_rec;
17767 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17768 if (DF_REF_REG_DEF_P (*def_rec)
17769 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17770 && (regno1 == DF_REF_REGNO (*def_rec)
17771 || regno2 == DF_REF_REGNO (*def_rec)))
17773 return true;
17776 return false;
17779 /* Function checks if instruction INSN uses register number
17780 REGNO as a part of address expression. */
17782 static bool
17783 insn_uses_reg_mem (unsigned int regno, rtx insn)
17785 df_ref *use_rec;
17787 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17788 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17789 return true;
17791 return false;
17794 /* Search backward for non-agu definition of register number REGNO1
17795 or register number REGNO2 in basic block starting from instruction
17796 START up to head of basic block or instruction INSN.
17798 Function puts true value into *FOUND var if definition was found
17799 and false otherwise.
17801 Distance in half-cycles between START and found instruction or head
17802 of BB is added to DISTANCE and returned. */
17804 static int
17805 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17806 rtx insn, int distance,
17807 rtx start, bool *found)
17809 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17810 rtx prev = start;
17811 rtx next = NULL;
17813 *found = false;
17815 while (prev
17816 && prev != insn
17817 && distance < LEA_SEARCH_THRESHOLD)
17819 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17821 distance = increase_distance (prev, next, distance);
17822 if (insn_defines_reg (regno1, regno2, prev))
17824 if (recog_memoized (prev) < 0
17825 || get_attr_type (prev) != TYPE_LEA)
17827 *found = true;
17828 return distance;
17832 next = prev;
17834 if (prev == BB_HEAD (bb))
17835 break;
17837 prev = PREV_INSN (prev);
17840 return distance;
17843 /* Search backward for non-agu definition of register number REGNO1
17844 or register number REGNO2 in INSN's basic block until
17845 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17846 2. Reach neighbour BBs boundary, or
17847 3. Reach agu definition.
17848 Returns the distance between the non-agu definition point and INSN.
17849 If no definition point, returns -1. */
17851 static int
17852 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17853 rtx insn)
17855 basic_block bb = BLOCK_FOR_INSN (insn);
17856 int distance = 0;
17857 bool found = false;
17859 if (insn != BB_HEAD (bb))
17860 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17861 distance, PREV_INSN (insn),
17862 &found);
17864 if (!found && distance < LEA_SEARCH_THRESHOLD)
17866 edge e;
17867 edge_iterator ei;
17868 bool simple_loop = false;
17870 FOR_EACH_EDGE (e, ei, bb->preds)
17871 if (e->src == bb)
17873 simple_loop = true;
17874 break;
17877 if (simple_loop)
17878 distance = distance_non_agu_define_in_bb (regno1, regno2,
17879 insn, distance,
17880 BB_END (bb), &found);
17881 else
17883 int shortest_dist = -1;
17884 bool found_in_bb = false;
17886 FOR_EACH_EDGE (e, ei, bb->preds)
17888 int bb_dist
17889 = distance_non_agu_define_in_bb (regno1, regno2,
17890 insn, distance,
17891 BB_END (e->src),
17892 &found_in_bb);
17893 if (found_in_bb)
17895 if (shortest_dist < 0)
17896 shortest_dist = bb_dist;
17897 else if (bb_dist > 0)
17898 shortest_dist = MIN (bb_dist, shortest_dist);
17900 found = true;
17904 distance = shortest_dist;
17908 /* get_attr_type may modify recog data. We want to make sure
17909 that recog data is valid for instruction INSN, on which
17910 distance_non_agu_define is called. INSN is unchanged here. */
17911 extract_insn_cached (insn);
17913 if (!found)
17914 return -1;
17916 return distance >> 1;
17919 /* Return the distance in half-cycles between INSN and the next
17920 insn that uses register number REGNO in memory address added
17921 to DISTANCE. Return -1 if REGNO0 is set.
17923 Put true value into *FOUND if register usage was found and
17924 false otherwise.
17925 Put true value into *REDEFINED if register redefinition was
17926 found and false otherwise. */
17928 static int
17929 distance_agu_use_in_bb (unsigned int regno,
17930 rtx insn, int distance, rtx start,
17931 bool *found, bool *redefined)
17933 basic_block bb = NULL;
17934 rtx next = start;
17935 rtx prev = NULL;
17937 *found = false;
17938 *redefined = false;
17940 if (start != NULL_RTX)
17942 bb = BLOCK_FOR_INSN (start);
17943 if (start != BB_HEAD (bb))
17944 /* If insn and start belong to the same bb, set prev to insn,
17945 so the call to increase_distance will increase the distance
17946 between insns by 1. */
17947 prev = insn;
17950 while (next
17951 && next != insn
17952 && distance < LEA_SEARCH_THRESHOLD)
17954 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17956 distance = increase_distance(prev, next, distance);
17957 if (insn_uses_reg_mem (regno, next))
17959 /* Return DISTANCE if OP0 is used in memory
17960 address in NEXT. */
17961 *found = true;
17962 return distance;
17965 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17967 /* Return -1 if OP0 is set in NEXT. */
17968 *redefined = true;
17969 return -1;
17972 prev = next;
17975 if (next == BB_END (bb))
17976 break;
17978 next = NEXT_INSN (next);
17981 return distance;
17984 /* Return the distance between INSN and the next insn that uses
17985 register number REGNO0 in memory address. Return -1 if no such
17986 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17988 static int
17989 distance_agu_use (unsigned int regno0, rtx insn)
17991 basic_block bb = BLOCK_FOR_INSN (insn);
17992 int distance = 0;
17993 bool found = false;
17994 bool redefined = false;
17996 if (insn != BB_END (bb))
17997 distance = distance_agu_use_in_bb (regno0, insn, distance,
17998 NEXT_INSN (insn),
17999 &found, &redefined);
18001 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18003 edge e;
18004 edge_iterator ei;
18005 bool simple_loop = false;
18007 FOR_EACH_EDGE (e, ei, bb->succs)
18008 if (e->dest == bb)
18010 simple_loop = true;
18011 break;
18014 if (simple_loop)
18015 distance = distance_agu_use_in_bb (regno0, insn,
18016 distance, BB_HEAD (bb),
18017 &found, &redefined);
18018 else
18020 int shortest_dist = -1;
18021 bool found_in_bb = false;
18022 bool redefined_in_bb = false;
18024 FOR_EACH_EDGE (e, ei, bb->succs)
18026 int bb_dist
18027 = distance_agu_use_in_bb (regno0, insn,
18028 distance, BB_HEAD (e->dest),
18029 &found_in_bb, &redefined_in_bb);
18030 if (found_in_bb)
18032 if (shortest_dist < 0)
18033 shortest_dist = bb_dist;
18034 else if (bb_dist > 0)
18035 shortest_dist = MIN (bb_dist, shortest_dist);
18037 found = true;
18041 distance = shortest_dist;
18045 if (!found || redefined)
18046 return -1;
18048 return distance >> 1;
18051 /* Define this macro to tune LEA priority vs ADD, it take effect when
18052 there is a dilemma of choicing LEA or ADD
18053 Negative value: ADD is more preferred than LEA
18054 Zero: Netrual
18055 Positive value: LEA is more preferred than ADD*/
18056 #define IX86_LEA_PRIORITY 0
18058 /* Return true if usage of lea INSN has performance advantage
18059 over a sequence of instructions. Instructions sequence has
18060 SPLIT_COST cycles higher latency than lea latency. */
18062 static bool
18063 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18064 unsigned int regno2, int split_cost, bool has_scale)
18066 int dist_define, dist_use;
18068 /* For Silvermont if using a 2-source or 3-source LEA for
18069 non-destructive destination purposes, or due to wanting
18070 ability to use SCALE, the use of LEA is justified. */
18071 if (TARGET_SILVERMONT || TARGET_INTEL)
18073 if (has_scale)
18074 return true;
18075 if (split_cost < 1)
18076 return false;
18077 if (regno0 == regno1 || regno0 == regno2)
18078 return false;
18079 return true;
18082 dist_define = distance_non_agu_define (regno1, regno2, insn);
18083 dist_use = distance_agu_use (regno0, insn);
18085 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18087 /* If there is no non AGU operand definition, no AGU
18088 operand usage and split cost is 0 then both lea
18089 and non lea variants have same priority. Currently
18090 we prefer lea for 64 bit code and non lea on 32 bit
18091 code. */
18092 if (dist_use < 0 && split_cost == 0)
18093 return TARGET_64BIT || IX86_LEA_PRIORITY;
18094 else
18095 return true;
18098 /* With longer definitions distance lea is more preferable.
18099 Here we change it to take into account splitting cost and
18100 lea priority. */
18101 dist_define += split_cost + IX86_LEA_PRIORITY;
18103 /* If there is no use in memory addess then we just check
18104 that split cost exceeds AGU stall. */
18105 if (dist_use < 0)
18106 return dist_define > LEA_MAX_STALL;
18108 /* If this insn has both backward non-agu dependence and forward
18109 agu dependence, the one with short distance takes effect. */
18110 return dist_define >= dist_use;
18113 /* Return true if it is legal to clobber flags by INSN and
18114 false otherwise. */
18116 static bool
18117 ix86_ok_to_clobber_flags (rtx insn)
18119 basic_block bb = BLOCK_FOR_INSN (insn);
18120 df_ref *use;
18121 bitmap live;
18123 while (insn)
18125 if (NONDEBUG_INSN_P (insn))
18127 for (use = DF_INSN_USES (insn); *use; use++)
18128 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18129 return false;
18131 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18132 return true;
18135 if (insn == BB_END (bb))
18136 break;
18138 insn = NEXT_INSN (insn);
18141 live = df_get_live_out(bb);
18142 return !REGNO_REG_SET_P (live, FLAGS_REG);
18145 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18146 move and add to avoid AGU stalls. */
18148 bool
18149 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18151 unsigned int regno0, regno1, regno2;
18153 /* Check if we need to optimize. */
18154 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18155 return false;
18157 /* Check it is correct to split here. */
18158 if (!ix86_ok_to_clobber_flags(insn))
18159 return false;
18161 regno0 = true_regnum (operands[0]);
18162 regno1 = true_regnum (operands[1]);
18163 regno2 = true_regnum (operands[2]);
18165 /* We need to split only adds with non destructive
18166 destination operand. */
18167 if (regno0 == regno1 || regno0 == regno2)
18168 return false;
18169 else
18170 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18173 /* Return true if we should emit lea instruction instead of mov
18174 instruction. */
18176 bool
18177 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18179 unsigned int regno0, regno1;
18181 /* Check if we need to optimize. */
18182 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18183 return false;
18185 /* Use lea for reg to reg moves only. */
18186 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18187 return false;
18189 regno0 = true_regnum (operands[0]);
18190 regno1 = true_regnum (operands[1]);
18192 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18195 /* Return true if we need to split lea into a sequence of
18196 instructions to avoid AGU stalls. */
18198 bool
18199 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18201 unsigned int regno0, regno1, regno2;
18202 int split_cost;
18203 struct ix86_address parts;
18204 int ok;
18206 /* Check we need to optimize. */
18207 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18208 return false;
18210 /* The "at least two components" test below might not catch simple
18211 move or zero extension insns if parts.base is non-NULL and parts.disp
18212 is const0_rtx as the only components in the address, e.g. if the
18213 register is %rbp or %r13. As this test is much cheaper and moves or
18214 zero extensions are the common case, do this check first. */
18215 if (REG_P (operands[1])
18216 || (SImode_address_operand (operands[1], VOIDmode)
18217 && REG_P (XEXP (operands[1], 0))))
18218 return false;
18220 /* Check if it is OK to split here. */
18221 if (!ix86_ok_to_clobber_flags (insn))
18222 return false;
18224 ok = ix86_decompose_address (operands[1], &parts);
18225 gcc_assert (ok);
18227 /* There should be at least two components in the address. */
18228 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18229 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18230 return false;
18232 /* We should not split into add if non legitimate pic
18233 operand is used as displacement. */
18234 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18235 return false;
18237 regno0 = true_regnum (operands[0]) ;
18238 regno1 = INVALID_REGNUM;
18239 regno2 = INVALID_REGNUM;
18241 if (parts.base)
18242 regno1 = true_regnum (parts.base);
18243 if (parts.index)
18244 regno2 = true_regnum (parts.index);
18246 split_cost = 0;
18248 /* Compute how many cycles we will add to execution time
18249 if split lea into a sequence of instructions. */
18250 if (parts.base || parts.index)
18252 /* Have to use mov instruction if non desctructive
18253 destination form is used. */
18254 if (regno1 != regno0 && regno2 != regno0)
18255 split_cost += 1;
18257 /* Have to add index to base if both exist. */
18258 if (parts.base && parts.index)
18259 split_cost += 1;
18261 /* Have to use shift and adds if scale is 2 or greater. */
18262 if (parts.scale > 1)
18264 if (regno0 != regno1)
18265 split_cost += 1;
18266 else if (regno2 == regno0)
18267 split_cost += 4;
18268 else
18269 split_cost += parts.scale;
18272 /* Have to use add instruction with immediate if
18273 disp is non zero. */
18274 if (parts.disp && parts.disp != const0_rtx)
18275 split_cost += 1;
18277 /* Subtract the price of lea. */
18278 split_cost -= 1;
18281 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18282 parts.scale > 1);
18285 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18286 matches destination. RTX includes clobber of FLAGS_REG. */
18288 static void
18289 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18290 rtx dst, rtx src)
18292 rtx op, clob;
18294 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18295 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18297 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18300 /* Return true if regno1 def is nearest to the insn. */
18302 static bool
18303 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18305 rtx prev = insn;
18306 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18308 if (insn == start)
18309 return false;
18310 while (prev && prev != start)
18312 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18314 prev = PREV_INSN (prev);
18315 continue;
18317 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18318 return true;
18319 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18320 return false;
18321 prev = PREV_INSN (prev);
18324 /* None of the regs is defined in the bb. */
18325 return false;
18328 /* Split lea instructions into a sequence of instructions
18329 which are executed on ALU to avoid AGU stalls.
18330 It is assumed that it is allowed to clobber flags register
18331 at lea position. */
18333 void
18334 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18336 unsigned int regno0, regno1, regno2;
18337 struct ix86_address parts;
18338 rtx target, tmp;
18339 int ok, adds;
18341 ok = ix86_decompose_address (operands[1], &parts);
18342 gcc_assert (ok);
18344 target = gen_lowpart (mode, operands[0]);
18346 regno0 = true_regnum (target);
18347 regno1 = INVALID_REGNUM;
18348 regno2 = INVALID_REGNUM;
18350 if (parts.base)
18352 parts.base = gen_lowpart (mode, parts.base);
18353 regno1 = true_regnum (parts.base);
18356 if (parts.index)
18358 parts.index = gen_lowpart (mode, parts.index);
18359 regno2 = true_regnum (parts.index);
18362 if (parts.disp)
18363 parts.disp = gen_lowpart (mode, parts.disp);
18365 if (parts.scale > 1)
18367 /* Case r1 = r1 + ... */
18368 if (regno1 == regno0)
18370 /* If we have a case r1 = r1 + C * r2 then we
18371 should use multiplication which is very
18372 expensive. Assume cost model is wrong if we
18373 have such case here. */
18374 gcc_assert (regno2 != regno0);
18376 for (adds = parts.scale; adds > 0; adds--)
18377 ix86_emit_binop (PLUS, mode, target, parts.index);
18379 else
18381 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18382 if (regno0 != regno2)
18383 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18385 /* Use shift for scaling. */
18386 ix86_emit_binop (ASHIFT, mode, target,
18387 GEN_INT (exact_log2 (parts.scale)));
18389 if (parts.base)
18390 ix86_emit_binop (PLUS, mode, target, parts.base);
18392 if (parts.disp && parts.disp != const0_rtx)
18393 ix86_emit_binop (PLUS, mode, target, parts.disp);
18396 else if (!parts.base && !parts.index)
18398 gcc_assert(parts.disp);
18399 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18401 else
18403 if (!parts.base)
18405 if (regno0 != regno2)
18406 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18408 else if (!parts.index)
18410 if (regno0 != regno1)
18411 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18413 else
18415 if (regno0 == regno1)
18416 tmp = parts.index;
18417 else if (regno0 == regno2)
18418 tmp = parts.base;
18419 else
18421 rtx tmp1;
18423 /* Find better operand for SET instruction, depending
18424 on which definition is farther from the insn. */
18425 if (find_nearest_reg_def (insn, regno1, regno2))
18426 tmp = parts.index, tmp1 = parts.base;
18427 else
18428 tmp = parts.base, tmp1 = parts.index;
18430 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18432 if (parts.disp && parts.disp != const0_rtx)
18433 ix86_emit_binop (PLUS, mode, target, parts.disp);
18435 ix86_emit_binop (PLUS, mode, target, tmp1);
18436 return;
18439 ix86_emit_binop (PLUS, mode, target, tmp);
18442 if (parts.disp && parts.disp != const0_rtx)
18443 ix86_emit_binop (PLUS, mode, target, parts.disp);
18447 /* Return true if it is ok to optimize an ADD operation to LEA
18448 operation to avoid flag register consumation. For most processors,
18449 ADD is faster than LEA. For the processors like BONNELL, if the
18450 destination register of LEA holds an actual address which will be
18451 used soon, LEA is better and otherwise ADD is better. */
18453 bool
18454 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18456 unsigned int regno0 = true_regnum (operands[0]);
18457 unsigned int regno1 = true_regnum (operands[1]);
18458 unsigned int regno2 = true_regnum (operands[2]);
18460 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18461 if (regno0 != regno1 && regno0 != regno2)
18462 return true;
18464 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18465 return false;
18467 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18470 /* Return true if destination reg of SET_BODY is shift count of
18471 USE_BODY. */
18473 static bool
18474 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18476 rtx set_dest;
18477 rtx shift_rtx;
18478 int i;
18480 /* Retrieve destination of SET_BODY. */
18481 switch (GET_CODE (set_body))
18483 case SET:
18484 set_dest = SET_DEST (set_body);
18485 if (!set_dest || !REG_P (set_dest))
18486 return false;
18487 break;
18488 case PARALLEL:
18489 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18490 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18491 use_body))
18492 return true;
18493 default:
18494 return false;
18495 break;
18498 /* Retrieve shift count of USE_BODY. */
18499 switch (GET_CODE (use_body))
18501 case SET:
18502 shift_rtx = XEXP (use_body, 1);
18503 break;
18504 case PARALLEL:
18505 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18506 if (ix86_dep_by_shift_count_body (set_body,
18507 XVECEXP (use_body, 0, i)))
18508 return true;
18509 default:
18510 return false;
18511 break;
18514 if (shift_rtx
18515 && (GET_CODE (shift_rtx) == ASHIFT
18516 || GET_CODE (shift_rtx) == LSHIFTRT
18517 || GET_CODE (shift_rtx) == ASHIFTRT
18518 || GET_CODE (shift_rtx) == ROTATE
18519 || GET_CODE (shift_rtx) == ROTATERT))
18521 rtx shift_count = XEXP (shift_rtx, 1);
18523 /* Return true if shift count is dest of SET_BODY. */
18524 if (REG_P (shift_count))
18526 /* Add check since it can be invoked before register
18527 allocation in pre-reload schedule. */
18528 if (reload_completed
18529 && true_regnum (set_dest) == true_regnum (shift_count))
18530 return true;
18531 else if (REGNO(set_dest) == REGNO(shift_count))
18532 return true;
18536 return false;
18539 /* Return true if destination reg of SET_INSN is shift count of
18540 USE_INSN. */
18542 bool
18543 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18545 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18546 PATTERN (use_insn));
18549 /* Return TRUE or FALSE depending on whether the unary operator meets the
18550 appropriate constraints. */
18552 bool
18553 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18554 enum machine_mode mode ATTRIBUTE_UNUSED,
18555 rtx operands[2])
18557 /* If one of operands is memory, source and destination must match. */
18558 if ((MEM_P (operands[0])
18559 || MEM_P (operands[1]))
18560 && ! rtx_equal_p (operands[0], operands[1]))
18561 return false;
18562 return true;
18565 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18566 are ok, keeping in mind the possible movddup alternative. */
18568 bool
18569 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18571 if (MEM_P (operands[0]))
18572 return rtx_equal_p (operands[0], operands[1 + high]);
18573 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18574 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18575 return true;
18578 /* Post-reload splitter for converting an SF or DFmode value in an
18579 SSE register into an unsigned SImode. */
18581 void
18582 ix86_split_convert_uns_si_sse (rtx operands[])
18584 enum machine_mode vecmode;
18585 rtx value, large, zero_or_two31, input, two31, x;
18587 large = operands[1];
18588 zero_or_two31 = operands[2];
18589 input = operands[3];
18590 two31 = operands[4];
18591 vecmode = GET_MODE (large);
18592 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18594 /* Load up the value into the low element. We must ensure that the other
18595 elements are valid floats -- zero is the easiest such value. */
18596 if (MEM_P (input))
18598 if (vecmode == V4SFmode)
18599 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18600 else
18601 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18603 else
18605 input = gen_rtx_REG (vecmode, REGNO (input));
18606 emit_move_insn (value, CONST0_RTX (vecmode));
18607 if (vecmode == V4SFmode)
18608 emit_insn (gen_sse_movss (value, value, input));
18609 else
18610 emit_insn (gen_sse2_movsd (value, value, input));
18613 emit_move_insn (large, two31);
18614 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18616 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18617 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18619 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18620 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18622 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18623 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18625 large = gen_rtx_REG (V4SImode, REGNO (large));
18626 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18628 x = gen_rtx_REG (V4SImode, REGNO (value));
18629 if (vecmode == V4SFmode)
18630 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18631 else
18632 emit_insn (gen_sse2_cvttpd2dq (x, value));
18633 value = x;
18635 emit_insn (gen_xorv4si3 (value, value, large));
18638 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18639 Expects the 64-bit DImode to be supplied in a pair of integral
18640 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18641 -mfpmath=sse, !optimize_size only. */
18643 void
18644 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18646 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18647 rtx int_xmm, fp_xmm;
18648 rtx biases, exponents;
18649 rtx x;
18651 int_xmm = gen_reg_rtx (V4SImode);
18652 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18653 emit_insn (gen_movdi_to_sse (int_xmm, input));
18654 else if (TARGET_SSE_SPLIT_REGS)
18656 emit_clobber (int_xmm);
18657 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18659 else
18661 x = gen_reg_rtx (V2DImode);
18662 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18663 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18666 x = gen_rtx_CONST_VECTOR (V4SImode,
18667 gen_rtvec (4, GEN_INT (0x43300000UL),
18668 GEN_INT (0x45300000UL),
18669 const0_rtx, const0_rtx));
18670 exponents = validize_mem (force_const_mem (V4SImode, x));
18672 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18673 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18675 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18676 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18677 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18678 (0x1.0p84 + double(fp_value_hi_xmm)).
18679 Note these exponents differ by 32. */
18681 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18683 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18684 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18685 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18686 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18687 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18688 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18689 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18690 biases = validize_mem (force_const_mem (V2DFmode, biases));
18691 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18693 /* Add the upper and lower DFmode values together. */
18694 if (TARGET_SSE3)
18695 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18696 else
18698 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18699 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18700 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18703 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18706 /* Not used, but eases macroization of patterns. */
18707 void
18708 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18709 rtx input ATTRIBUTE_UNUSED)
18711 gcc_unreachable ();
18714 /* Convert an unsigned SImode value into a DFmode. Only currently used
18715 for SSE, but applicable anywhere. */
18717 void
18718 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18720 REAL_VALUE_TYPE TWO31r;
18721 rtx x, fp;
18723 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18724 NULL, 1, OPTAB_DIRECT);
18726 fp = gen_reg_rtx (DFmode);
18727 emit_insn (gen_floatsidf2 (fp, x));
18729 real_ldexp (&TWO31r, &dconst1, 31);
18730 x = const_double_from_real_value (TWO31r, DFmode);
18732 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18733 if (x != target)
18734 emit_move_insn (target, x);
18737 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18738 32-bit mode; otherwise we have a direct convert instruction. */
18740 void
18741 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18743 REAL_VALUE_TYPE TWO32r;
18744 rtx fp_lo, fp_hi, x;
18746 fp_lo = gen_reg_rtx (DFmode);
18747 fp_hi = gen_reg_rtx (DFmode);
18749 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18751 real_ldexp (&TWO32r, &dconst1, 32);
18752 x = const_double_from_real_value (TWO32r, DFmode);
18753 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18755 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18757 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18758 0, OPTAB_DIRECT);
18759 if (x != target)
18760 emit_move_insn (target, x);
18763 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18764 For x86_32, -mfpmath=sse, !optimize_size only. */
18765 void
18766 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18768 REAL_VALUE_TYPE ONE16r;
18769 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18771 real_ldexp (&ONE16r, &dconst1, 16);
18772 x = const_double_from_real_value (ONE16r, SFmode);
18773 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18774 NULL, 0, OPTAB_DIRECT);
18775 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18776 NULL, 0, OPTAB_DIRECT);
18777 fp_hi = gen_reg_rtx (SFmode);
18778 fp_lo = gen_reg_rtx (SFmode);
18779 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18780 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18781 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18782 0, OPTAB_DIRECT);
18783 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18784 0, OPTAB_DIRECT);
18785 if (!rtx_equal_p (target, fp_hi))
18786 emit_move_insn (target, fp_hi);
18789 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18790 a vector of unsigned ints VAL to vector of floats TARGET. */
18792 void
18793 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18795 rtx tmp[8];
18796 REAL_VALUE_TYPE TWO16r;
18797 enum machine_mode intmode = GET_MODE (val);
18798 enum machine_mode fltmode = GET_MODE (target);
18799 rtx (*cvt) (rtx, rtx);
18801 if (intmode == V4SImode)
18802 cvt = gen_floatv4siv4sf2;
18803 else
18804 cvt = gen_floatv8siv8sf2;
18805 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18806 tmp[0] = force_reg (intmode, tmp[0]);
18807 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18808 OPTAB_DIRECT);
18809 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18810 NULL_RTX, 1, OPTAB_DIRECT);
18811 tmp[3] = gen_reg_rtx (fltmode);
18812 emit_insn (cvt (tmp[3], tmp[1]));
18813 tmp[4] = gen_reg_rtx (fltmode);
18814 emit_insn (cvt (tmp[4], tmp[2]));
18815 real_ldexp (&TWO16r, &dconst1, 16);
18816 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18817 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18818 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18819 OPTAB_DIRECT);
18820 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18821 OPTAB_DIRECT);
18822 if (tmp[7] != target)
18823 emit_move_insn (target, tmp[7]);
18826 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18827 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18828 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18829 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18832 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18834 REAL_VALUE_TYPE TWO31r;
18835 rtx two31r, tmp[4];
18836 enum machine_mode mode = GET_MODE (val);
18837 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18838 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18839 rtx (*cmp) (rtx, rtx, rtx, rtx);
18840 int i;
18842 for (i = 0; i < 3; i++)
18843 tmp[i] = gen_reg_rtx (mode);
18844 real_ldexp (&TWO31r, &dconst1, 31);
18845 two31r = const_double_from_real_value (TWO31r, scalarmode);
18846 two31r = ix86_build_const_vector (mode, 1, two31r);
18847 two31r = force_reg (mode, two31r);
18848 switch (mode)
18850 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18851 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18852 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18853 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18854 default: gcc_unreachable ();
18856 tmp[3] = gen_rtx_LE (mode, two31r, val);
18857 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18858 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18859 0, OPTAB_DIRECT);
18860 if (intmode == V4SImode || TARGET_AVX2)
18861 *xorp = expand_simple_binop (intmode, ASHIFT,
18862 gen_lowpart (intmode, tmp[0]),
18863 GEN_INT (31), NULL_RTX, 0,
18864 OPTAB_DIRECT);
18865 else
18867 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18868 two31 = ix86_build_const_vector (intmode, 1, two31);
18869 *xorp = expand_simple_binop (intmode, AND,
18870 gen_lowpart (intmode, tmp[0]),
18871 two31, NULL_RTX, 0,
18872 OPTAB_DIRECT);
18874 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18875 0, OPTAB_DIRECT);
18878 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18879 then replicate the value for all elements of the vector
18880 register. */
18883 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18885 int i, n_elt;
18886 rtvec v;
18887 enum machine_mode scalar_mode;
18889 switch (mode)
18891 case V64QImode:
18892 case V32QImode:
18893 case V16QImode:
18894 case V32HImode:
18895 case V16HImode:
18896 case V8HImode:
18897 case V16SImode:
18898 case V8SImode:
18899 case V4SImode:
18900 case V8DImode:
18901 case V4DImode:
18902 case V2DImode:
18903 gcc_assert (vect);
18904 case V16SFmode:
18905 case V8SFmode:
18906 case V4SFmode:
18907 case V8DFmode:
18908 case V4DFmode:
18909 case V2DFmode:
18910 n_elt = GET_MODE_NUNITS (mode);
18911 v = rtvec_alloc (n_elt);
18912 scalar_mode = GET_MODE_INNER (mode);
18914 RTVEC_ELT (v, 0) = value;
18916 for (i = 1; i < n_elt; ++i)
18917 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18919 return gen_rtx_CONST_VECTOR (mode, v);
18921 default:
18922 gcc_unreachable ();
18926 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18927 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18928 for an SSE register. If VECT is true, then replicate the mask for
18929 all elements of the vector register. If INVERT is true, then create
18930 a mask excluding the sign bit. */
18933 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18935 enum machine_mode vec_mode, imode;
18936 HOST_WIDE_INT hi, lo;
18937 int shift = 63;
18938 rtx v;
18939 rtx mask;
18941 /* Find the sign bit, sign extended to 2*HWI. */
18942 switch (mode)
18944 case V16SImode:
18945 case V16SFmode:
18946 case V8SImode:
18947 case V4SImode:
18948 case V8SFmode:
18949 case V4SFmode:
18950 vec_mode = mode;
18951 mode = GET_MODE_INNER (mode);
18952 imode = SImode;
18953 lo = 0x80000000, hi = lo < 0;
18954 break;
18956 case V8DImode:
18957 case V4DImode:
18958 case V2DImode:
18959 case V8DFmode:
18960 case V4DFmode:
18961 case V2DFmode:
18962 vec_mode = mode;
18963 mode = GET_MODE_INNER (mode);
18964 imode = DImode;
18965 if (HOST_BITS_PER_WIDE_INT >= 64)
18966 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18967 else
18968 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18969 break;
18971 case TImode:
18972 case TFmode:
18973 vec_mode = VOIDmode;
18974 if (HOST_BITS_PER_WIDE_INT >= 64)
18976 imode = TImode;
18977 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18979 else
18981 rtvec vec;
18983 imode = DImode;
18984 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18986 if (invert)
18988 lo = ~lo, hi = ~hi;
18989 v = constm1_rtx;
18991 else
18992 v = const0_rtx;
18994 mask = immed_double_const (lo, hi, imode);
18996 vec = gen_rtvec (2, v, mask);
18997 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18998 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19000 return v;
19002 break;
19004 default:
19005 gcc_unreachable ();
19008 if (invert)
19009 lo = ~lo, hi = ~hi;
19011 /* Force this value into the low part of a fp vector constant. */
19012 mask = immed_double_const (lo, hi, imode);
19013 mask = gen_lowpart (mode, mask);
19015 if (vec_mode == VOIDmode)
19016 return force_reg (mode, mask);
19018 v = ix86_build_const_vector (vec_mode, vect, mask);
19019 return force_reg (vec_mode, v);
19022 /* Generate code for floating point ABS or NEG. */
19024 void
19025 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19026 rtx operands[])
19028 rtx mask, set, dst, src;
19029 bool use_sse = false;
19030 bool vector_mode = VECTOR_MODE_P (mode);
19031 enum machine_mode vmode = mode;
19033 if (vector_mode)
19034 use_sse = true;
19035 else if (mode == TFmode)
19036 use_sse = true;
19037 else if (TARGET_SSE_MATH)
19039 use_sse = SSE_FLOAT_MODE_P (mode);
19040 if (mode == SFmode)
19041 vmode = V4SFmode;
19042 else if (mode == DFmode)
19043 vmode = V2DFmode;
19046 /* NEG and ABS performed with SSE use bitwise mask operations.
19047 Create the appropriate mask now. */
19048 if (use_sse)
19049 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19050 else
19051 mask = NULL_RTX;
19053 dst = operands[0];
19054 src = operands[1];
19056 set = gen_rtx_fmt_e (code, mode, src);
19057 set = gen_rtx_SET (VOIDmode, dst, set);
19059 if (mask)
19061 rtx use, clob;
19062 rtvec par;
19064 use = gen_rtx_USE (VOIDmode, mask);
19065 if (vector_mode)
19066 par = gen_rtvec (2, set, use);
19067 else
19069 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19070 par = gen_rtvec (3, set, use, clob);
19072 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19074 else
19075 emit_insn (set);
19078 /* Expand a copysign operation. Special case operand 0 being a constant. */
19080 void
19081 ix86_expand_copysign (rtx operands[])
19083 enum machine_mode mode, vmode;
19084 rtx dest, op0, op1, mask, nmask;
19086 dest = operands[0];
19087 op0 = operands[1];
19088 op1 = operands[2];
19090 mode = GET_MODE (dest);
19092 if (mode == SFmode)
19093 vmode = V4SFmode;
19094 else if (mode == DFmode)
19095 vmode = V2DFmode;
19096 else
19097 vmode = mode;
19099 if (GET_CODE (op0) == CONST_DOUBLE)
19101 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19103 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19104 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19106 if (mode == SFmode || mode == DFmode)
19108 if (op0 == CONST0_RTX (mode))
19109 op0 = CONST0_RTX (vmode);
19110 else
19112 rtx v = ix86_build_const_vector (vmode, false, op0);
19114 op0 = force_reg (vmode, v);
19117 else if (op0 != CONST0_RTX (mode))
19118 op0 = force_reg (mode, op0);
19120 mask = ix86_build_signbit_mask (vmode, 0, 0);
19122 if (mode == SFmode)
19123 copysign_insn = gen_copysignsf3_const;
19124 else if (mode == DFmode)
19125 copysign_insn = gen_copysigndf3_const;
19126 else
19127 copysign_insn = gen_copysigntf3_const;
19129 emit_insn (copysign_insn (dest, op0, op1, mask));
19131 else
19133 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19135 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19136 mask = ix86_build_signbit_mask (vmode, 0, 0);
19138 if (mode == SFmode)
19139 copysign_insn = gen_copysignsf3_var;
19140 else if (mode == DFmode)
19141 copysign_insn = gen_copysigndf3_var;
19142 else
19143 copysign_insn = gen_copysigntf3_var;
19145 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19149 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19150 be a constant, and so has already been expanded into a vector constant. */
19152 void
19153 ix86_split_copysign_const (rtx operands[])
19155 enum machine_mode mode, vmode;
19156 rtx dest, op0, mask, x;
19158 dest = operands[0];
19159 op0 = operands[1];
19160 mask = operands[3];
19162 mode = GET_MODE (dest);
19163 vmode = GET_MODE (mask);
19165 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19166 x = gen_rtx_AND (vmode, dest, mask);
19167 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19169 if (op0 != CONST0_RTX (vmode))
19171 x = gen_rtx_IOR (vmode, dest, op0);
19172 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19176 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19177 so we have to do two masks. */
19179 void
19180 ix86_split_copysign_var (rtx operands[])
19182 enum machine_mode mode, vmode;
19183 rtx dest, scratch, op0, op1, mask, nmask, x;
19185 dest = operands[0];
19186 scratch = operands[1];
19187 op0 = operands[2];
19188 op1 = operands[3];
19189 nmask = operands[4];
19190 mask = operands[5];
19192 mode = GET_MODE (dest);
19193 vmode = GET_MODE (mask);
19195 if (rtx_equal_p (op0, op1))
19197 /* Shouldn't happen often (it's useless, obviously), but when it does
19198 we'd generate incorrect code if we continue below. */
19199 emit_move_insn (dest, op0);
19200 return;
19203 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19205 gcc_assert (REGNO (op1) == REGNO (scratch));
19207 x = gen_rtx_AND (vmode, scratch, mask);
19208 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19210 dest = mask;
19211 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19212 x = gen_rtx_NOT (vmode, dest);
19213 x = gen_rtx_AND (vmode, x, op0);
19214 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19216 else
19218 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19220 x = gen_rtx_AND (vmode, scratch, mask);
19222 else /* alternative 2,4 */
19224 gcc_assert (REGNO (mask) == REGNO (scratch));
19225 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19226 x = gen_rtx_AND (vmode, scratch, op1);
19228 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19230 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19232 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19233 x = gen_rtx_AND (vmode, dest, nmask);
19235 else /* alternative 3,4 */
19237 gcc_assert (REGNO (nmask) == REGNO (dest));
19238 dest = nmask;
19239 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19240 x = gen_rtx_AND (vmode, dest, op0);
19242 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19245 x = gen_rtx_IOR (vmode, dest, scratch);
19246 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19249 /* Return TRUE or FALSE depending on whether the first SET in INSN
19250 has source and destination with matching CC modes, and that the
19251 CC mode is at least as constrained as REQ_MODE. */
19253 bool
19254 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19256 rtx set;
19257 enum machine_mode set_mode;
19259 set = PATTERN (insn);
19260 if (GET_CODE (set) == PARALLEL)
19261 set = XVECEXP (set, 0, 0);
19262 gcc_assert (GET_CODE (set) == SET);
19263 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19265 set_mode = GET_MODE (SET_DEST (set));
19266 switch (set_mode)
19268 case CCNOmode:
19269 if (req_mode != CCNOmode
19270 && (req_mode != CCmode
19271 || XEXP (SET_SRC (set), 1) != const0_rtx))
19272 return false;
19273 break;
19274 case CCmode:
19275 if (req_mode == CCGCmode)
19276 return false;
19277 /* FALLTHRU */
19278 case CCGCmode:
19279 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19280 return false;
19281 /* FALLTHRU */
19282 case CCGOCmode:
19283 if (req_mode == CCZmode)
19284 return false;
19285 /* FALLTHRU */
19286 case CCZmode:
19287 break;
19289 case CCAmode:
19290 case CCCmode:
19291 case CCOmode:
19292 case CCSmode:
19293 if (set_mode != req_mode)
19294 return false;
19295 break;
19297 default:
19298 gcc_unreachable ();
19301 return GET_MODE (SET_SRC (set)) == set_mode;
19304 /* Generate insn patterns to do an integer compare of OPERANDS. */
19306 static rtx
19307 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19309 enum machine_mode cmpmode;
19310 rtx tmp, flags;
19312 cmpmode = SELECT_CC_MODE (code, op0, op1);
19313 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19315 /* This is very simple, but making the interface the same as in the
19316 FP case makes the rest of the code easier. */
19317 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19318 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19320 /* Return the test that should be put into the flags user, i.e.
19321 the bcc, scc, or cmov instruction. */
19322 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19325 /* Figure out whether to use ordered or unordered fp comparisons.
19326 Return the appropriate mode to use. */
19328 enum machine_mode
19329 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19331 /* ??? In order to make all comparisons reversible, we do all comparisons
19332 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19333 all forms trapping and nontrapping comparisons, we can make inequality
19334 comparisons trapping again, since it results in better code when using
19335 FCOM based compares. */
19336 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19339 enum machine_mode
19340 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19342 enum machine_mode mode = GET_MODE (op0);
19344 if (SCALAR_FLOAT_MODE_P (mode))
19346 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19347 return ix86_fp_compare_mode (code);
19350 switch (code)
19352 /* Only zero flag is needed. */
19353 case EQ: /* ZF=0 */
19354 case NE: /* ZF!=0 */
19355 return CCZmode;
19356 /* Codes needing carry flag. */
19357 case GEU: /* CF=0 */
19358 case LTU: /* CF=1 */
19359 /* Detect overflow checks. They need just the carry flag. */
19360 if (GET_CODE (op0) == PLUS
19361 && rtx_equal_p (op1, XEXP (op0, 0)))
19362 return CCCmode;
19363 else
19364 return CCmode;
19365 case GTU: /* CF=0 & ZF=0 */
19366 case LEU: /* CF=1 | ZF=1 */
19367 return CCmode;
19368 /* Codes possibly doable only with sign flag when
19369 comparing against zero. */
19370 case GE: /* SF=OF or SF=0 */
19371 case LT: /* SF<>OF or SF=1 */
19372 if (op1 == const0_rtx)
19373 return CCGOCmode;
19374 else
19375 /* For other cases Carry flag is not required. */
19376 return CCGCmode;
19377 /* Codes doable only with sign flag when comparing
19378 against zero, but we miss jump instruction for it
19379 so we need to use relational tests against overflow
19380 that thus needs to be zero. */
19381 case GT: /* ZF=0 & SF=OF */
19382 case LE: /* ZF=1 | SF<>OF */
19383 if (op1 == const0_rtx)
19384 return CCNOmode;
19385 else
19386 return CCGCmode;
19387 /* strcmp pattern do (use flags) and combine may ask us for proper
19388 mode. */
19389 case USE:
19390 return CCmode;
19391 default:
19392 gcc_unreachable ();
19396 /* Return the fixed registers used for condition codes. */
19398 static bool
19399 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19401 *p1 = FLAGS_REG;
19402 *p2 = FPSR_REG;
19403 return true;
19406 /* If two condition code modes are compatible, return a condition code
19407 mode which is compatible with both. Otherwise, return
19408 VOIDmode. */
19410 static enum machine_mode
19411 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19413 if (m1 == m2)
19414 return m1;
19416 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19417 return VOIDmode;
19419 if ((m1 == CCGCmode && m2 == CCGOCmode)
19420 || (m1 == CCGOCmode && m2 == CCGCmode))
19421 return CCGCmode;
19423 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19424 return m2;
19425 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19426 return m1;
19428 switch (m1)
19430 default:
19431 gcc_unreachable ();
19433 case CCmode:
19434 case CCGCmode:
19435 case CCGOCmode:
19436 case CCNOmode:
19437 case CCAmode:
19438 case CCCmode:
19439 case CCOmode:
19440 case CCSmode:
19441 case CCZmode:
19442 switch (m2)
19444 default:
19445 return VOIDmode;
19447 case CCmode:
19448 case CCGCmode:
19449 case CCGOCmode:
19450 case CCNOmode:
19451 case CCAmode:
19452 case CCCmode:
19453 case CCOmode:
19454 case CCSmode:
19455 case CCZmode:
19456 return CCmode;
19459 case CCFPmode:
19460 case CCFPUmode:
19461 /* These are only compatible with themselves, which we already
19462 checked above. */
19463 return VOIDmode;
19468 /* Return a comparison we can do and that it is equivalent to
19469 swap_condition (code) apart possibly from orderedness.
19470 But, never change orderedness if TARGET_IEEE_FP, returning
19471 UNKNOWN in that case if necessary. */
19473 static enum rtx_code
19474 ix86_fp_swap_condition (enum rtx_code code)
19476 switch (code)
19478 case GT: /* GTU - CF=0 & ZF=0 */
19479 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19480 case GE: /* GEU - CF=0 */
19481 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19482 case UNLT: /* LTU - CF=1 */
19483 return TARGET_IEEE_FP ? UNKNOWN : GT;
19484 case UNLE: /* LEU - CF=1 | ZF=1 */
19485 return TARGET_IEEE_FP ? UNKNOWN : GE;
19486 default:
19487 return swap_condition (code);
19491 /* Return cost of comparison CODE using the best strategy for performance.
19492 All following functions do use number of instructions as a cost metrics.
19493 In future this should be tweaked to compute bytes for optimize_size and
19494 take into account performance of various instructions on various CPUs. */
19496 static int
19497 ix86_fp_comparison_cost (enum rtx_code code)
19499 int arith_cost;
19501 /* The cost of code using bit-twiddling on %ah. */
19502 switch (code)
19504 case UNLE:
19505 case UNLT:
19506 case LTGT:
19507 case GT:
19508 case GE:
19509 case UNORDERED:
19510 case ORDERED:
19511 case UNEQ:
19512 arith_cost = 4;
19513 break;
19514 case LT:
19515 case NE:
19516 case EQ:
19517 case UNGE:
19518 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19519 break;
19520 case LE:
19521 case UNGT:
19522 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19523 break;
19524 default:
19525 gcc_unreachable ();
19528 switch (ix86_fp_comparison_strategy (code))
19530 case IX86_FPCMP_COMI:
19531 return arith_cost > 4 ? 3 : 2;
19532 case IX86_FPCMP_SAHF:
19533 return arith_cost > 4 ? 4 : 3;
19534 default:
19535 return arith_cost;
19539 /* Return strategy to use for floating-point. We assume that fcomi is always
19540 preferrable where available, since that is also true when looking at size
19541 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19543 enum ix86_fpcmp_strategy
19544 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19546 /* Do fcomi/sahf based test when profitable. */
19548 if (TARGET_CMOVE)
19549 return IX86_FPCMP_COMI;
19551 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19552 return IX86_FPCMP_SAHF;
19554 return IX86_FPCMP_ARITH;
19557 /* Swap, force into registers, or otherwise massage the two operands
19558 to a fp comparison. The operands are updated in place; the new
19559 comparison code is returned. */
19561 static enum rtx_code
19562 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19564 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19565 rtx op0 = *pop0, op1 = *pop1;
19566 enum machine_mode op_mode = GET_MODE (op0);
19567 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19569 /* All of the unordered compare instructions only work on registers.
19570 The same is true of the fcomi compare instructions. The XFmode
19571 compare instructions require registers except when comparing
19572 against zero or when converting operand 1 from fixed point to
19573 floating point. */
19575 if (!is_sse
19576 && (fpcmp_mode == CCFPUmode
19577 || (op_mode == XFmode
19578 && ! (standard_80387_constant_p (op0) == 1
19579 || standard_80387_constant_p (op1) == 1)
19580 && GET_CODE (op1) != FLOAT)
19581 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19583 op0 = force_reg (op_mode, op0);
19584 op1 = force_reg (op_mode, op1);
19586 else
19588 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19589 things around if they appear profitable, otherwise force op0
19590 into a register. */
19592 if (standard_80387_constant_p (op0) == 0
19593 || (MEM_P (op0)
19594 && ! (standard_80387_constant_p (op1) == 0
19595 || MEM_P (op1))))
19597 enum rtx_code new_code = ix86_fp_swap_condition (code);
19598 if (new_code != UNKNOWN)
19600 rtx tmp;
19601 tmp = op0, op0 = op1, op1 = tmp;
19602 code = new_code;
19606 if (!REG_P (op0))
19607 op0 = force_reg (op_mode, op0);
19609 if (CONSTANT_P (op1))
19611 int tmp = standard_80387_constant_p (op1);
19612 if (tmp == 0)
19613 op1 = validize_mem (force_const_mem (op_mode, op1));
19614 else if (tmp == 1)
19616 if (TARGET_CMOVE)
19617 op1 = force_reg (op_mode, op1);
19619 else
19620 op1 = force_reg (op_mode, op1);
19624 /* Try to rearrange the comparison to make it cheaper. */
19625 if (ix86_fp_comparison_cost (code)
19626 > ix86_fp_comparison_cost (swap_condition (code))
19627 && (REG_P (op1) || can_create_pseudo_p ()))
19629 rtx tmp;
19630 tmp = op0, op0 = op1, op1 = tmp;
19631 code = swap_condition (code);
19632 if (!REG_P (op0))
19633 op0 = force_reg (op_mode, op0);
19636 *pop0 = op0;
19637 *pop1 = op1;
19638 return code;
19641 /* Convert comparison codes we use to represent FP comparison to integer
19642 code that will result in proper branch. Return UNKNOWN if no such code
19643 is available. */
19645 enum rtx_code
19646 ix86_fp_compare_code_to_integer (enum rtx_code code)
19648 switch (code)
19650 case GT:
19651 return GTU;
19652 case GE:
19653 return GEU;
19654 case ORDERED:
19655 case UNORDERED:
19656 return code;
19657 break;
19658 case UNEQ:
19659 return EQ;
19660 break;
19661 case UNLT:
19662 return LTU;
19663 break;
19664 case UNLE:
19665 return LEU;
19666 break;
19667 case LTGT:
19668 return NE;
19669 break;
19670 default:
19671 return UNKNOWN;
19675 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19677 static rtx
19678 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19680 enum machine_mode fpcmp_mode, intcmp_mode;
19681 rtx tmp, tmp2;
19683 fpcmp_mode = ix86_fp_compare_mode (code);
19684 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19686 /* Do fcomi/sahf based test when profitable. */
19687 switch (ix86_fp_comparison_strategy (code))
19689 case IX86_FPCMP_COMI:
19690 intcmp_mode = fpcmp_mode;
19691 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19692 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19693 tmp);
19694 emit_insn (tmp);
19695 break;
19697 case IX86_FPCMP_SAHF:
19698 intcmp_mode = fpcmp_mode;
19699 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19700 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19701 tmp);
19703 if (!scratch)
19704 scratch = gen_reg_rtx (HImode);
19705 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19706 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19707 break;
19709 case IX86_FPCMP_ARITH:
19710 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19711 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19712 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19713 if (!scratch)
19714 scratch = gen_reg_rtx (HImode);
19715 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19717 /* In the unordered case, we have to check C2 for NaN's, which
19718 doesn't happen to work out to anything nice combination-wise.
19719 So do some bit twiddling on the value we've got in AH to come
19720 up with an appropriate set of condition codes. */
19722 intcmp_mode = CCNOmode;
19723 switch (code)
19725 case GT:
19726 case UNGT:
19727 if (code == GT || !TARGET_IEEE_FP)
19729 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19730 code = EQ;
19732 else
19734 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19735 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19736 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19737 intcmp_mode = CCmode;
19738 code = GEU;
19740 break;
19741 case LT:
19742 case UNLT:
19743 if (code == LT && TARGET_IEEE_FP)
19745 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19746 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19747 intcmp_mode = CCmode;
19748 code = EQ;
19750 else
19752 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19753 code = NE;
19755 break;
19756 case GE:
19757 case UNGE:
19758 if (code == GE || !TARGET_IEEE_FP)
19760 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19761 code = EQ;
19763 else
19765 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19766 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19767 code = NE;
19769 break;
19770 case LE:
19771 case UNLE:
19772 if (code == LE && TARGET_IEEE_FP)
19774 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19775 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19776 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19777 intcmp_mode = CCmode;
19778 code = LTU;
19780 else
19782 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19783 code = NE;
19785 break;
19786 case EQ:
19787 case UNEQ:
19788 if (code == EQ && TARGET_IEEE_FP)
19790 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19791 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19792 intcmp_mode = CCmode;
19793 code = EQ;
19795 else
19797 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19798 code = NE;
19800 break;
19801 case NE:
19802 case LTGT:
19803 if (code == NE && TARGET_IEEE_FP)
19805 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19806 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19807 GEN_INT (0x40)));
19808 code = NE;
19810 else
19812 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19813 code = EQ;
19815 break;
19817 case UNORDERED:
19818 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19819 code = NE;
19820 break;
19821 case ORDERED:
19822 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19823 code = EQ;
19824 break;
19826 default:
19827 gcc_unreachable ();
19829 break;
19831 default:
19832 gcc_unreachable();
19835 /* Return the test that should be put into the flags user, i.e.
19836 the bcc, scc, or cmov instruction. */
19837 return gen_rtx_fmt_ee (code, VOIDmode,
19838 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19839 const0_rtx);
19842 static rtx
19843 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19845 rtx ret;
19847 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19848 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19850 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19852 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19853 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19855 else
19856 ret = ix86_expand_int_compare (code, op0, op1);
19858 return ret;
19861 void
19862 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19864 enum machine_mode mode = GET_MODE (op0);
19865 rtx tmp;
19867 switch (mode)
19869 case SFmode:
19870 case DFmode:
19871 case XFmode:
19872 case QImode:
19873 case HImode:
19874 case SImode:
19875 simple:
19876 tmp = ix86_expand_compare (code, op0, op1);
19877 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19878 gen_rtx_LABEL_REF (VOIDmode, label),
19879 pc_rtx);
19880 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19881 return;
19883 case DImode:
19884 if (TARGET_64BIT)
19885 goto simple;
19886 case TImode:
19887 /* Expand DImode branch into multiple compare+branch. */
19889 rtx lo[2], hi[2], label2;
19890 enum rtx_code code1, code2, code3;
19891 enum machine_mode submode;
19893 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19895 tmp = op0, op0 = op1, op1 = tmp;
19896 code = swap_condition (code);
19899 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19900 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19902 submode = mode == DImode ? SImode : DImode;
19904 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19905 avoid two branches. This costs one extra insn, so disable when
19906 optimizing for size. */
19908 if ((code == EQ || code == NE)
19909 && (!optimize_insn_for_size_p ()
19910 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19912 rtx xor0, xor1;
19914 xor1 = hi[0];
19915 if (hi[1] != const0_rtx)
19916 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19917 NULL_RTX, 0, OPTAB_WIDEN);
19919 xor0 = lo[0];
19920 if (lo[1] != const0_rtx)
19921 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19922 NULL_RTX, 0, OPTAB_WIDEN);
19924 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19925 NULL_RTX, 0, OPTAB_WIDEN);
19927 ix86_expand_branch (code, tmp, const0_rtx, label);
19928 return;
19931 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19932 op1 is a constant and the low word is zero, then we can just
19933 examine the high word. Similarly for low word -1 and
19934 less-or-equal-than or greater-than. */
19936 if (CONST_INT_P (hi[1]))
19937 switch (code)
19939 case LT: case LTU: case GE: case GEU:
19940 if (lo[1] == const0_rtx)
19942 ix86_expand_branch (code, hi[0], hi[1], label);
19943 return;
19945 break;
19946 case LE: case LEU: case GT: case GTU:
19947 if (lo[1] == constm1_rtx)
19949 ix86_expand_branch (code, hi[0], hi[1], label);
19950 return;
19952 break;
19953 default:
19954 break;
19957 /* Otherwise, we need two or three jumps. */
19959 label2 = gen_label_rtx ();
19961 code1 = code;
19962 code2 = swap_condition (code);
19963 code3 = unsigned_condition (code);
19965 switch (code)
19967 case LT: case GT: case LTU: case GTU:
19968 break;
19970 case LE: code1 = LT; code2 = GT; break;
19971 case GE: code1 = GT; code2 = LT; break;
19972 case LEU: code1 = LTU; code2 = GTU; break;
19973 case GEU: code1 = GTU; code2 = LTU; break;
19975 case EQ: code1 = UNKNOWN; code2 = NE; break;
19976 case NE: code2 = UNKNOWN; break;
19978 default:
19979 gcc_unreachable ();
19983 * a < b =>
19984 * if (hi(a) < hi(b)) goto true;
19985 * if (hi(a) > hi(b)) goto false;
19986 * if (lo(a) < lo(b)) goto true;
19987 * false:
19990 if (code1 != UNKNOWN)
19991 ix86_expand_branch (code1, hi[0], hi[1], label);
19992 if (code2 != UNKNOWN)
19993 ix86_expand_branch (code2, hi[0], hi[1], label2);
19995 ix86_expand_branch (code3, lo[0], lo[1], label);
19997 if (code2 != UNKNOWN)
19998 emit_label (label2);
19999 return;
20002 default:
20003 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20004 goto simple;
20008 /* Split branch based on floating point condition. */
20009 void
20010 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20011 rtx target1, rtx target2, rtx tmp, rtx pushed)
20013 rtx condition;
20014 rtx i;
20016 if (target2 != pc_rtx)
20018 rtx tmp = target2;
20019 code = reverse_condition_maybe_unordered (code);
20020 target2 = target1;
20021 target1 = tmp;
20024 condition = ix86_expand_fp_compare (code, op1, op2,
20025 tmp);
20027 /* Remove pushed operand from stack. */
20028 if (pushed)
20029 ix86_free_from_memory (GET_MODE (pushed));
20031 i = emit_jump_insn (gen_rtx_SET
20032 (VOIDmode, pc_rtx,
20033 gen_rtx_IF_THEN_ELSE (VOIDmode,
20034 condition, target1, target2)));
20035 if (split_branch_probability >= 0)
20036 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20039 void
20040 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20042 rtx ret;
20044 gcc_assert (GET_MODE (dest) == QImode);
20046 ret = ix86_expand_compare (code, op0, op1);
20047 PUT_MODE (ret, QImode);
20048 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20051 /* Expand comparison setting or clearing carry flag. Return true when
20052 successful and set pop for the operation. */
20053 static bool
20054 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20056 enum machine_mode mode =
20057 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20059 /* Do not handle double-mode compares that go through special path. */
20060 if (mode == (TARGET_64BIT ? TImode : DImode))
20061 return false;
20063 if (SCALAR_FLOAT_MODE_P (mode))
20065 rtx compare_op, compare_seq;
20067 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20069 /* Shortcut: following common codes never translate
20070 into carry flag compares. */
20071 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20072 || code == ORDERED || code == UNORDERED)
20073 return false;
20075 /* These comparisons require zero flag; swap operands so they won't. */
20076 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20077 && !TARGET_IEEE_FP)
20079 rtx tmp = op0;
20080 op0 = op1;
20081 op1 = tmp;
20082 code = swap_condition (code);
20085 /* Try to expand the comparison and verify that we end up with
20086 carry flag based comparison. This fails to be true only when
20087 we decide to expand comparison using arithmetic that is not
20088 too common scenario. */
20089 start_sequence ();
20090 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20091 compare_seq = get_insns ();
20092 end_sequence ();
20094 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20095 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20096 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20097 else
20098 code = GET_CODE (compare_op);
20100 if (code != LTU && code != GEU)
20101 return false;
20103 emit_insn (compare_seq);
20104 *pop = compare_op;
20105 return true;
20108 if (!INTEGRAL_MODE_P (mode))
20109 return false;
20111 switch (code)
20113 case LTU:
20114 case GEU:
20115 break;
20117 /* Convert a==0 into (unsigned)a<1. */
20118 case EQ:
20119 case NE:
20120 if (op1 != const0_rtx)
20121 return false;
20122 op1 = const1_rtx;
20123 code = (code == EQ ? LTU : GEU);
20124 break;
20126 /* Convert a>b into b<a or a>=b-1. */
20127 case GTU:
20128 case LEU:
20129 if (CONST_INT_P (op1))
20131 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20132 /* Bail out on overflow. We still can swap operands but that
20133 would force loading of the constant into register. */
20134 if (op1 == const0_rtx
20135 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20136 return false;
20137 code = (code == GTU ? GEU : LTU);
20139 else
20141 rtx tmp = op1;
20142 op1 = op0;
20143 op0 = tmp;
20144 code = (code == GTU ? LTU : GEU);
20146 break;
20148 /* Convert a>=0 into (unsigned)a<0x80000000. */
20149 case LT:
20150 case GE:
20151 if (mode == DImode || op1 != const0_rtx)
20152 return false;
20153 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20154 code = (code == LT ? GEU : LTU);
20155 break;
20156 case LE:
20157 case GT:
20158 if (mode == DImode || op1 != constm1_rtx)
20159 return false;
20160 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20161 code = (code == LE ? GEU : LTU);
20162 break;
20164 default:
20165 return false;
20167 /* Swapping operands may cause constant to appear as first operand. */
20168 if (!nonimmediate_operand (op0, VOIDmode))
20170 if (!can_create_pseudo_p ())
20171 return false;
20172 op0 = force_reg (mode, op0);
20174 *pop = ix86_expand_compare (code, op0, op1);
20175 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20176 return true;
20179 bool
20180 ix86_expand_int_movcc (rtx operands[])
20182 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20183 rtx compare_seq, compare_op;
20184 enum machine_mode mode = GET_MODE (operands[0]);
20185 bool sign_bit_compare_p = false;
20186 rtx op0 = XEXP (operands[1], 0);
20187 rtx op1 = XEXP (operands[1], 1);
20189 if (GET_MODE (op0) == TImode
20190 || (GET_MODE (op0) == DImode
20191 && !TARGET_64BIT))
20192 return false;
20194 start_sequence ();
20195 compare_op = ix86_expand_compare (code, op0, op1);
20196 compare_seq = get_insns ();
20197 end_sequence ();
20199 compare_code = GET_CODE (compare_op);
20201 if ((op1 == const0_rtx && (code == GE || code == LT))
20202 || (op1 == constm1_rtx && (code == GT || code == LE)))
20203 sign_bit_compare_p = true;
20205 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20206 HImode insns, we'd be swallowed in word prefix ops. */
20208 if ((mode != HImode || TARGET_FAST_PREFIX)
20209 && (mode != (TARGET_64BIT ? TImode : DImode))
20210 && CONST_INT_P (operands[2])
20211 && CONST_INT_P (operands[3]))
20213 rtx out = operands[0];
20214 HOST_WIDE_INT ct = INTVAL (operands[2]);
20215 HOST_WIDE_INT cf = INTVAL (operands[3]);
20216 HOST_WIDE_INT diff;
20218 diff = ct - cf;
20219 /* Sign bit compares are better done using shifts than we do by using
20220 sbb. */
20221 if (sign_bit_compare_p
20222 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20224 /* Detect overlap between destination and compare sources. */
20225 rtx tmp = out;
20227 if (!sign_bit_compare_p)
20229 rtx flags;
20230 bool fpcmp = false;
20232 compare_code = GET_CODE (compare_op);
20234 flags = XEXP (compare_op, 0);
20236 if (GET_MODE (flags) == CCFPmode
20237 || GET_MODE (flags) == CCFPUmode)
20239 fpcmp = true;
20240 compare_code
20241 = ix86_fp_compare_code_to_integer (compare_code);
20244 /* To simplify rest of code, restrict to the GEU case. */
20245 if (compare_code == LTU)
20247 HOST_WIDE_INT tmp = ct;
20248 ct = cf;
20249 cf = tmp;
20250 compare_code = reverse_condition (compare_code);
20251 code = reverse_condition (code);
20253 else
20255 if (fpcmp)
20256 PUT_CODE (compare_op,
20257 reverse_condition_maybe_unordered
20258 (GET_CODE (compare_op)));
20259 else
20260 PUT_CODE (compare_op,
20261 reverse_condition (GET_CODE (compare_op)));
20263 diff = ct - cf;
20265 if (reg_overlap_mentioned_p (out, op0)
20266 || reg_overlap_mentioned_p (out, op1))
20267 tmp = gen_reg_rtx (mode);
20269 if (mode == DImode)
20270 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20271 else
20272 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20273 flags, compare_op));
20275 else
20277 if (code == GT || code == GE)
20278 code = reverse_condition (code);
20279 else
20281 HOST_WIDE_INT tmp = ct;
20282 ct = cf;
20283 cf = tmp;
20284 diff = ct - cf;
20286 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20289 if (diff == 1)
20292 * cmpl op0,op1
20293 * sbbl dest,dest
20294 * [addl dest, ct]
20296 * Size 5 - 8.
20298 if (ct)
20299 tmp = expand_simple_binop (mode, PLUS,
20300 tmp, GEN_INT (ct),
20301 copy_rtx (tmp), 1, OPTAB_DIRECT);
20303 else if (cf == -1)
20306 * cmpl op0,op1
20307 * sbbl dest,dest
20308 * orl $ct, dest
20310 * Size 8.
20312 tmp = expand_simple_binop (mode, IOR,
20313 tmp, GEN_INT (ct),
20314 copy_rtx (tmp), 1, OPTAB_DIRECT);
20316 else if (diff == -1 && ct)
20319 * cmpl op0,op1
20320 * sbbl dest,dest
20321 * notl dest
20322 * [addl dest, cf]
20324 * Size 8 - 11.
20326 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20327 if (cf)
20328 tmp = expand_simple_binop (mode, PLUS,
20329 copy_rtx (tmp), GEN_INT (cf),
20330 copy_rtx (tmp), 1, OPTAB_DIRECT);
20332 else
20335 * cmpl op0,op1
20336 * sbbl dest,dest
20337 * [notl dest]
20338 * andl cf - ct, dest
20339 * [addl dest, ct]
20341 * Size 8 - 11.
20344 if (cf == 0)
20346 cf = ct;
20347 ct = 0;
20348 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20351 tmp = expand_simple_binop (mode, AND,
20352 copy_rtx (tmp),
20353 gen_int_mode (cf - ct, mode),
20354 copy_rtx (tmp), 1, OPTAB_DIRECT);
20355 if (ct)
20356 tmp = expand_simple_binop (mode, PLUS,
20357 copy_rtx (tmp), GEN_INT (ct),
20358 copy_rtx (tmp), 1, OPTAB_DIRECT);
20361 if (!rtx_equal_p (tmp, out))
20362 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20364 return true;
20367 if (diff < 0)
20369 enum machine_mode cmp_mode = GET_MODE (op0);
20371 HOST_WIDE_INT tmp;
20372 tmp = ct, ct = cf, cf = tmp;
20373 diff = -diff;
20375 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20377 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20379 /* We may be reversing unordered compare to normal compare, that
20380 is not valid in general (we may convert non-trapping condition
20381 to trapping one), however on i386 we currently emit all
20382 comparisons unordered. */
20383 compare_code = reverse_condition_maybe_unordered (compare_code);
20384 code = reverse_condition_maybe_unordered (code);
20386 else
20388 compare_code = reverse_condition (compare_code);
20389 code = reverse_condition (code);
20393 compare_code = UNKNOWN;
20394 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20395 && CONST_INT_P (op1))
20397 if (op1 == const0_rtx
20398 && (code == LT || code == GE))
20399 compare_code = code;
20400 else if (op1 == constm1_rtx)
20402 if (code == LE)
20403 compare_code = LT;
20404 else if (code == GT)
20405 compare_code = GE;
20409 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20410 if (compare_code != UNKNOWN
20411 && GET_MODE (op0) == GET_MODE (out)
20412 && (cf == -1 || ct == -1))
20414 /* If lea code below could be used, only optimize
20415 if it results in a 2 insn sequence. */
20417 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20418 || diff == 3 || diff == 5 || diff == 9)
20419 || (compare_code == LT && ct == -1)
20420 || (compare_code == GE && cf == -1))
20423 * notl op1 (if necessary)
20424 * sarl $31, op1
20425 * orl cf, op1
20427 if (ct != -1)
20429 cf = ct;
20430 ct = -1;
20431 code = reverse_condition (code);
20434 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20436 out = expand_simple_binop (mode, IOR,
20437 out, GEN_INT (cf),
20438 out, 1, OPTAB_DIRECT);
20439 if (out != operands[0])
20440 emit_move_insn (operands[0], out);
20442 return true;
20447 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20448 || diff == 3 || diff == 5 || diff == 9)
20449 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20450 && (mode != DImode
20451 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20454 * xorl dest,dest
20455 * cmpl op1,op2
20456 * setcc dest
20457 * lea cf(dest*(ct-cf)),dest
20459 * Size 14.
20461 * This also catches the degenerate setcc-only case.
20464 rtx tmp;
20465 int nops;
20467 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20469 nops = 0;
20470 /* On x86_64 the lea instruction operates on Pmode, so we need
20471 to get arithmetics done in proper mode to match. */
20472 if (diff == 1)
20473 tmp = copy_rtx (out);
20474 else
20476 rtx out1;
20477 out1 = copy_rtx (out);
20478 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20479 nops++;
20480 if (diff & 1)
20482 tmp = gen_rtx_PLUS (mode, tmp, out1);
20483 nops++;
20486 if (cf != 0)
20488 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20489 nops++;
20491 if (!rtx_equal_p (tmp, out))
20493 if (nops == 1)
20494 out = force_operand (tmp, copy_rtx (out));
20495 else
20496 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20498 if (!rtx_equal_p (out, operands[0]))
20499 emit_move_insn (operands[0], copy_rtx (out));
20501 return true;
20505 * General case: Jumpful:
20506 * xorl dest,dest cmpl op1, op2
20507 * cmpl op1, op2 movl ct, dest
20508 * setcc dest jcc 1f
20509 * decl dest movl cf, dest
20510 * andl (cf-ct),dest 1:
20511 * addl ct,dest
20513 * Size 20. Size 14.
20515 * This is reasonably steep, but branch mispredict costs are
20516 * high on modern cpus, so consider failing only if optimizing
20517 * for space.
20520 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20521 && BRANCH_COST (optimize_insn_for_speed_p (),
20522 false) >= 2)
20524 if (cf == 0)
20526 enum machine_mode cmp_mode = GET_MODE (op0);
20528 cf = ct;
20529 ct = 0;
20531 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20533 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20535 /* We may be reversing unordered compare to normal compare,
20536 that is not valid in general (we may convert non-trapping
20537 condition to trapping one), however on i386 we currently
20538 emit all comparisons unordered. */
20539 code = reverse_condition_maybe_unordered (code);
20541 else
20543 code = reverse_condition (code);
20544 if (compare_code != UNKNOWN)
20545 compare_code = reverse_condition (compare_code);
20549 if (compare_code != UNKNOWN)
20551 /* notl op1 (if needed)
20552 sarl $31, op1
20553 andl (cf-ct), op1
20554 addl ct, op1
20556 For x < 0 (resp. x <= -1) there will be no notl,
20557 so if possible swap the constants to get rid of the
20558 complement.
20559 True/false will be -1/0 while code below (store flag
20560 followed by decrement) is 0/-1, so the constants need
20561 to be exchanged once more. */
20563 if (compare_code == GE || !cf)
20565 code = reverse_condition (code);
20566 compare_code = LT;
20568 else
20570 HOST_WIDE_INT tmp = cf;
20571 cf = ct;
20572 ct = tmp;
20575 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20577 else
20579 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20581 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20582 constm1_rtx,
20583 copy_rtx (out), 1, OPTAB_DIRECT);
20586 out = expand_simple_binop (mode, AND, copy_rtx (out),
20587 gen_int_mode (cf - ct, mode),
20588 copy_rtx (out), 1, OPTAB_DIRECT);
20589 if (ct)
20590 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20591 copy_rtx (out), 1, OPTAB_DIRECT);
20592 if (!rtx_equal_p (out, operands[0]))
20593 emit_move_insn (operands[0], copy_rtx (out));
20595 return true;
20599 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20601 /* Try a few things more with specific constants and a variable. */
20603 optab op;
20604 rtx var, orig_out, out, tmp;
20606 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20607 return false;
20609 /* If one of the two operands is an interesting constant, load a
20610 constant with the above and mask it in with a logical operation. */
20612 if (CONST_INT_P (operands[2]))
20614 var = operands[3];
20615 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20616 operands[3] = constm1_rtx, op = and_optab;
20617 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20618 operands[3] = const0_rtx, op = ior_optab;
20619 else
20620 return false;
20622 else if (CONST_INT_P (operands[3]))
20624 var = operands[2];
20625 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20626 operands[2] = constm1_rtx, op = and_optab;
20627 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20628 operands[2] = const0_rtx, op = ior_optab;
20629 else
20630 return false;
20632 else
20633 return false;
20635 orig_out = operands[0];
20636 tmp = gen_reg_rtx (mode);
20637 operands[0] = tmp;
20639 /* Recurse to get the constant loaded. */
20640 if (ix86_expand_int_movcc (operands) == 0)
20641 return false;
20643 /* Mask in the interesting variable. */
20644 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20645 OPTAB_WIDEN);
20646 if (!rtx_equal_p (out, orig_out))
20647 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20649 return true;
20653 * For comparison with above,
20655 * movl cf,dest
20656 * movl ct,tmp
20657 * cmpl op1,op2
20658 * cmovcc tmp,dest
20660 * Size 15.
20663 if (! nonimmediate_operand (operands[2], mode))
20664 operands[2] = force_reg (mode, operands[2]);
20665 if (! nonimmediate_operand (operands[3], mode))
20666 operands[3] = force_reg (mode, operands[3]);
20668 if (! register_operand (operands[2], VOIDmode)
20669 && (mode == QImode
20670 || ! register_operand (operands[3], VOIDmode)))
20671 operands[2] = force_reg (mode, operands[2]);
20673 if (mode == QImode
20674 && ! register_operand (operands[3], VOIDmode))
20675 operands[3] = force_reg (mode, operands[3]);
20677 emit_insn (compare_seq);
20678 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20679 gen_rtx_IF_THEN_ELSE (mode,
20680 compare_op, operands[2],
20681 operands[3])));
20682 return true;
20685 /* Swap, force into registers, or otherwise massage the two operands
20686 to an sse comparison with a mask result. Thus we differ a bit from
20687 ix86_prepare_fp_compare_args which expects to produce a flags result.
20689 The DEST operand exists to help determine whether to commute commutative
20690 operators. The POP0/POP1 operands are updated in place. The new
20691 comparison code is returned, or UNKNOWN if not implementable. */
20693 static enum rtx_code
20694 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20695 rtx *pop0, rtx *pop1)
20697 rtx tmp;
20699 switch (code)
20701 case LTGT:
20702 case UNEQ:
20703 /* AVX supports all the needed comparisons. */
20704 if (TARGET_AVX)
20705 break;
20706 /* We have no LTGT as an operator. We could implement it with
20707 NE & ORDERED, but this requires an extra temporary. It's
20708 not clear that it's worth it. */
20709 return UNKNOWN;
20711 case LT:
20712 case LE:
20713 case UNGT:
20714 case UNGE:
20715 /* These are supported directly. */
20716 break;
20718 case EQ:
20719 case NE:
20720 case UNORDERED:
20721 case ORDERED:
20722 /* AVX has 3 operand comparisons, no need to swap anything. */
20723 if (TARGET_AVX)
20724 break;
20725 /* For commutative operators, try to canonicalize the destination
20726 operand to be first in the comparison - this helps reload to
20727 avoid extra moves. */
20728 if (!dest || !rtx_equal_p (dest, *pop1))
20729 break;
20730 /* FALLTHRU */
20732 case GE:
20733 case GT:
20734 case UNLE:
20735 case UNLT:
20736 /* These are not supported directly before AVX, and furthermore
20737 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20738 comparison operands to transform into something that is
20739 supported. */
20740 tmp = *pop0;
20741 *pop0 = *pop1;
20742 *pop1 = tmp;
20743 code = swap_condition (code);
20744 break;
20746 default:
20747 gcc_unreachable ();
20750 return code;
20753 /* Detect conditional moves that exactly match min/max operational
20754 semantics. Note that this is IEEE safe, as long as we don't
20755 interchange the operands.
20757 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20758 and TRUE if the operation is successful and instructions are emitted. */
20760 static bool
20761 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20762 rtx cmp_op1, rtx if_true, rtx if_false)
20764 enum machine_mode mode;
20765 bool is_min;
20766 rtx tmp;
20768 if (code == LT)
20770 else if (code == UNGE)
20772 tmp = if_true;
20773 if_true = if_false;
20774 if_false = tmp;
20776 else
20777 return false;
20779 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20780 is_min = true;
20781 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20782 is_min = false;
20783 else
20784 return false;
20786 mode = GET_MODE (dest);
20788 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20789 but MODE may be a vector mode and thus not appropriate. */
20790 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20792 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20793 rtvec v;
20795 if_true = force_reg (mode, if_true);
20796 v = gen_rtvec (2, if_true, if_false);
20797 tmp = gen_rtx_UNSPEC (mode, v, u);
20799 else
20801 code = is_min ? SMIN : SMAX;
20802 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20805 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20806 return true;
20809 /* Expand an sse vector comparison. Return the register with the result. */
20811 static rtx
20812 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20813 rtx op_true, rtx op_false)
20815 enum machine_mode mode = GET_MODE (dest);
20816 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20818 /* In general case result of comparison can differ from operands' type. */
20819 enum machine_mode cmp_mode;
20821 /* In AVX512F the result of comparison is an integer mask. */
20822 bool maskcmp = false;
20823 rtx x;
20825 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20827 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20828 gcc_assert (cmp_mode != BLKmode);
20830 maskcmp = true;
20832 else
20833 cmp_mode = cmp_ops_mode;
20836 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20837 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20838 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20840 if (optimize
20841 || reg_overlap_mentioned_p (dest, op_true)
20842 || reg_overlap_mentioned_p (dest, op_false))
20843 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20845 /* Compare patterns for int modes are unspec in AVX512F only. */
20846 if (maskcmp && (code == GT || code == EQ))
20848 rtx (*gen)(rtx, rtx, rtx);
20850 switch (cmp_ops_mode)
20852 case V16SImode:
20853 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20854 break;
20855 case V8DImode:
20856 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20857 break;
20858 default:
20859 gen = NULL;
20862 if (gen)
20864 emit_insn (gen (dest, cmp_op0, cmp_op1));
20865 return dest;
20868 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20870 if (cmp_mode != mode && !maskcmp)
20872 x = force_reg (cmp_ops_mode, x);
20873 convert_move (dest, x, false);
20875 else
20876 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20878 return dest;
20881 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20882 operations. This is used for both scalar and vector conditional moves. */
20884 static void
20885 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20887 enum machine_mode mode = GET_MODE (dest);
20888 enum machine_mode cmpmode = GET_MODE (cmp);
20890 /* In AVX512F the result of comparison is an integer mask. */
20891 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20893 rtx t2, t3, x;
20895 if (vector_all_ones_operand (op_true, mode)
20896 && rtx_equal_p (op_false, CONST0_RTX (mode))
20897 && !maskcmp)
20899 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20901 else if (op_false == CONST0_RTX (mode)
20902 && !maskcmp)
20904 op_true = force_reg (mode, op_true);
20905 x = gen_rtx_AND (mode, cmp, op_true);
20906 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20908 else if (op_true == CONST0_RTX (mode)
20909 && !maskcmp)
20911 op_false = force_reg (mode, op_false);
20912 x = gen_rtx_NOT (mode, cmp);
20913 x = gen_rtx_AND (mode, x, op_false);
20914 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20916 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20917 && !maskcmp)
20919 op_false = force_reg (mode, op_false);
20920 x = gen_rtx_IOR (mode, cmp, op_false);
20921 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20923 else if (TARGET_XOP
20924 && !maskcmp)
20926 op_true = force_reg (mode, op_true);
20928 if (!nonimmediate_operand (op_false, mode))
20929 op_false = force_reg (mode, op_false);
20931 emit_insn (gen_rtx_SET (mode, dest,
20932 gen_rtx_IF_THEN_ELSE (mode, cmp,
20933 op_true,
20934 op_false)));
20936 else
20938 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20939 rtx d = dest;
20941 if (!nonimmediate_operand (op_true, mode))
20942 op_true = force_reg (mode, op_true);
20944 op_false = force_reg (mode, op_false);
20946 switch (mode)
20948 case V4SFmode:
20949 if (TARGET_SSE4_1)
20950 gen = gen_sse4_1_blendvps;
20951 break;
20952 case V2DFmode:
20953 if (TARGET_SSE4_1)
20954 gen = gen_sse4_1_blendvpd;
20955 break;
20956 case V16QImode:
20957 case V8HImode:
20958 case V4SImode:
20959 case V2DImode:
20960 if (TARGET_SSE4_1)
20962 gen = gen_sse4_1_pblendvb;
20963 if (mode != V16QImode)
20964 d = gen_reg_rtx (V16QImode);
20965 op_false = gen_lowpart (V16QImode, op_false);
20966 op_true = gen_lowpart (V16QImode, op_true);
20967 cmp = gen_lowpart (V16QImode, cmp);
20969 break;
20970 case V8SFmode:
20971 if (TARGET_AVX)
20972 gen = gen_avx_blendvps256;
20973 break;
20974 case V4DFmode:
20975 if (TARGET_AVX)
20976 gen = gen_avx_blendvpd256;
20977 break;
20978 case V32QImode:
20979 case V16HImode:
20980 case V8SImode:
20981 case V4DImode:
20982 if (TARGET_AVX2)
20984 gen = gen_avx2_pblendvb;
20985 if (mode != V32QImode)
20986 d = gen_reg_rtx (V32QImode);
20987 op_false = gen_lowpart (V32QImode, op_false);
20988 op_true = gen_lowpart (V32QImode, op_true);
20989 cmp = gen_lowpart (V32QImode, cmp);
20991 break;
20993 case V16SImode:
20994 gen = gen_avx512f_blendmv16si;
20995 break;
20996 case V8DImode:
20997 gen = gen_avx512f_blendmv8di;
20998 break;
20999 case V8DFmode:
21000 gen = gen_avx512f_blendmv8df;
21001 break;
21002 case V16SFmode:
21003 gen = gen_avx512f_blendmv16sf;
21004 break;
21006 default:
21007 break;
21010 if (gen != NULL)
21012 emit_insn (gen (d, op_false, op_true, cmp));
21013 if (d != dest)
21014 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21016 else
21018 op_true = force_reg (mode, op_true);
21020 t2 = gen_reg_rtx (mode);
21021 if (optimize)
21022 t3 = gen_reg_rtx (mode);
21023 else
21024 t3 = dest;
21026 x = gen_rtx_AND (mode, op_true, cmp);
21027 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21029 x = gen_rtx_NOT (mode, cmp);
21030 x = gen_rtx_AND (mode, x, op_false);
21031 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21033 x = gen_rtx_IOR (mode, t3, t2);
21034 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21039 /* Expand a floating-point conditional move. Return true if successful. */
21041 bool
21042 ix86_expand_fp_movcc (rtx operands[])
21044 enum machine_mode mode = GET_MODE (operands[0]);
21045 enum rtx_code code = GET_CODE (operands[1]);
21046 rtx tmp, compare_op;
21047 rtx op0 = XEXP (operands[1], 0);
21048 rtx op1 = XEXP (operands[1], 1);
21050 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21052 enum machine_mode cmode;
21054 /* Since we've no cmove for sse registers, don't force bad register
21055 allocation just to gain access to it. Deny movcc when the
21056 comparison mode doesn't match the move mode. */
21057 cmode = GET_MODE (op0);
21058 if (cmode == VOIDmode)
21059 cmode = GET_MODE (op1);
21060 if (cmode != mode)
21061 return false;
21063 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21064 if (code == UNKNOWN)
21065 return false;
21067 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21068 operands[2], operands[3]))
21069 return true;
21071 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21072 operands[2], operands[3]);
21073 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21074 return true;
21077 if (GET_MODE (op0) == TImode
21078 || (GET_MODE (op0) == DImode
21079 && !TARGET_64BIT))
21080 return false;
21082 /* The floating point conditional move instructions don't directly
21083 support conditions resulting from a signed integer comparison. */
21085 compare_op = ix86_expand_compare (code, op0, op1);
21086 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21088 tmp = gen_reg_rtx (QImode);
21089 ix86_expand_setcc (tmp, code, op0, op1);
21091 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21094 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21095 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21096 operands[2], operands[3])));
21098 return true;
21101 /* Expand a floating-point vector conditional move; a vcond operation
21102 rather than a movcc operation. */
21104 bool
21105 ix86_expand_fp_vcond (rtx operands[])
21107 enum rtx_code code = GET_CODE (operands[3]);
21108 rtx cmp;
21110 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21111 &operands[4], &operands[5]);
21112 if (code == UNKNOWN)
21114 rtx temp;
21115 switch (GET_CODE (operands[3]))
21117 case LTGT:
21118 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21119 operands[5], operands[0], operands[0]);
21120 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21121 operands[5], operands[1], operands[2]);
21122 code = AND;
21123 break;
21124 case UNEQ:
21125 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21126 operands[5], operands[0], operands[0]);
21127 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21128 operands[5], operands[1], operands[2]);
21129 code = IOR;
21130 break;
21131 default:
21132 gcc_unreachable ();
21134 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21135 OPTAB_DIRECT);
21136 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21137 return true;
21140 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21141 operands[5], operands[1], operands[2]))
21142 return true;
21144 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21145 operands[1], operands[2]);
21146 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21147 return true;
21150 /* Expand a signed/unsigned integral vector conditional move. */
21152 bool
21153 ix86_expand_int_vcond (rtx operands[])
21155 enum machine_mode data_mode = GET_MODE (operands[0]);
21156 enum machine_mode mode = GET_MODE (operands[4]);
21157 enum rtx_code code = GET_CODE (operands[3]);
21158 bool negate = false;
21159 rtx x, cop0, cop1;
21161 cop0 = operands[4];
21162 cop1 = operands[5];
21164 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21165 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21166 if ((code == LT || code == GE)
21167 && data_mode == mode
21168 && cop1 == CONST0_RTX (mode)
21169 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21170 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21171 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21172 && (GET_MODE_SIZE (data_mode) == 16
21173 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21175 rtx negop = operands[2 - (code == LT)];
21176 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21177 if (negop == CONST1_RTX (data_mode))
21179 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21180 operands[0], 1, OPTAB_DIRECT);
21181 if (res != operands[0])
21182 emit_move_insn (operands[0], res);
21183 return true;
21185 else if (GET_MODE_INNER (data_mode) != DImode
21186 && vector_all_ones_operand (negop, data_mode))
21188 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21189 operands[0], 0, OPTAB_DIRECT);
21190 if (res != operands[0])
21191 emit_move_insn (operands[0], res);
21192 return true;
21196 if (!nonimmediate_operand (cop1, mode))
21197 cop1 = force_reg (mode, cop1);
21198 if (!general_operand (operands[1], data_mode))
21199 operands[1] = force_reg (data_mode, operands[1]);
21200 if (!general_operand (operands[2], data_mode))
21201 operands[2] = force_reg (data_mode, operands[2]);
21203 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21204 if (TARGET_XOP
21205 && (mode == V16QImode || mode == V8HImode
21206 || mode == V4SImode || mode == V2DImode))
21208 else
21210 /* Canonicalize the comparison to EQ, GT, GTU. */
21211 switch (code)
21213 case EQ:
21214 case GT:
21215 case GTU:
21216 break;
21218 case NE:
21219 case LE:
21220 case LEU:
21221 code = reverse_condition (code);
21222 negate = true;
21223 break;
21225 case GE:
21226 case GEU:
21227 code = reverse_condition (code);
21228 negate = true;
21229 /* FALLTHRU */
21231 case LT:
21232 case LTU:
21233 code = swap_condition (code);
21234 x = cop0, cop0 = cop1, cop1 = x;
21235 break;
21237 default:
21238 gcc_unreachable ();
21241 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21242 if (mode == V2DImode)
21244 switch (code)
21246 case EQ:
21247 /* SSE4.1 supports EQ. */
21248 if (!TARGET_SSE4_1)
21249 return false;
21250 break;
21252 case GT:
21253 case GTU:
21254 /* SSE4.2 supports GT/GTU. */
21255 if (!TARGET_SSE4_2)
21256 return false;
21257 break;
21259 default:
21260 gcc_unreachable ();
21264 /* Unsigned parallel compare is not supported by the hardware.
21265 Play some tricks to turn this into a signed comparison
21266 against 0. */
21267 if (code == GTU)
21269 cop0 = force_reg (mode, cop0);
21271 switch (mode)
21273 case V16SImode:
21274 case V8DImode:
21275 case V8SImode:
21276 case V4DImode:
21277 case V4SImode:
21278 case V2DImode:
21280 rtx t1, t2, mask;
21281 rtx (*gen_sub3) (rtx, rtx, rtx);
21283 switch (mode)
21285 case V16SImode: gen_sub3 = gen_subv16si3; break;
21286 case V8DImode: gen_sub3 = gen_subv8di3; break;
21287 case V8SImode: gen_sub3 = gen_subv8si3; break;
21288 case V4DImode: gen_sub3 = gen_subv4di3; break;
21289 case V4SImode: gen_sub3 = gen_subv4si3; break;
21290 case V2DImode: gen_sub3 = gen_subv2di3; break;
21291 default:
21292 gcc_unreachable ();
21294 /* Subtract (-(INT MAX) - 1) from both operands to make
21295 them signed. */
21296 mask = ix86_build_signbit_mask (mode, true, false);
21297 t1 = gen_reg_rtx (mode);
21298 emit_insn (gen_sub3 (t1, cop0, mask));
21300 t2 = gen_reg_rtx (mode);
21301 emit_insn (gen_sub3 (t2, cop1, mask));
21303 cop0 = t1;
21304 cop1 = t2;
21305 code = GT;
21307 break;
21309 case V32QImode:
21310 case V16HImode:
21311 case V16QImode:
21312 case V8HImode:
21313 /* Perform a parallel unsigned saturating subtraction. */
21314 x = gen_reg_rtx (mode);
21315 emit_insn (gen_rtx_SET (VOIDmode, x,
21316 gen_rtx_US_MINUS (mode, cop0, cop1)));
21318 cop0 = x;
21319 cop1 = CONST0_RTX (mode);
21320 code = EQ;
21321 negate = !negate;
21322 break;
21324 default:
21325 gcc_unreachable ();
21330 /* Allow the comparison to be done in one mode, but the movcc to
21331 happen in another mode. */
21332 if (data_mode == mode)
21334 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21335 operands[1+negate], operands[2-negate]);
21337 else
21339 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21340 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21341 operands[1+negate], operands[2-negate]);
21342 if (GET_MODE (x) == mode)
21343 x = gen_lowpart (data_mode, x);
21346 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21347 operands[2-negate]);
21348 return true;
21351 static bool
21352 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21354 enum machine_mode mode = GET_MODE (op0);
21355 switch (mode)
21357 case V16SImode:
21358 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21359 force_reg (V16SImode, mask),
21360 op1));
21361 return true;
21362 case V16SFmode:
21363 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21364 force_reg (V16SImode, mask),
21365 op1));
21366 return true;
21367 case V8DImode:
21368 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21369 force_reg (V8DImode, mask), op1));
21370 return true;
21371 case V8DFmode:
21372 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21373 force_reg (V8DImode, mask), op1));
21374 return true;
21375 default:
21376 return false;
21380 /* Expand a variable vector permutation. */
21382 void
21383 ix86_expand_vec_perm (rtx operands[])
21385 rtx target = operands[0];
21386 rtx op0 = operands[1];
21387 rtx op1 = operands[2];
21388 rtx mask = operands[3];
21389 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21390 enum machine_mode mode = GET_MODE (op0);
21391 enum machine_mode maskmode = GET_MODE (mask);
21392 int w, e, i;
21393 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21395 /* Number of elements in the vector. */
21396 w = GET_MODE_NUNITS (mode);
21397 e = GET_MODE_UNIT_SIZE (mode);
21398 gcc_assert (w <= 64);
21400 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21401 return;
21403 if (TARGET_AVX2)
21405 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21407 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21408 an constant shuffle operand. With a tiny bit of effort we can
21409 use VPERMD instead. A re-interpretation stall for V4DFmode is
21410 unfortunate but there's no avoiding it.
21411 Similarly for V16HImode we don't have instructions for variable
21412 shuffling, while for V32QImode we can use after preparing suitable
21413 masks vpshufb; vpshufb; vpermq; vpor. */
21415 if (mode == V16HImode)
21417 maskmode = mode = V32QImode;
21418 w = 32;
21419 e = 1;
21421 else
21423 maskmode = mode = V8SImode;
21424 w = 8;
21425 e = 4;
21427 t1 = gen_reg_rtx (maskmode);
21429 /* Replicate the low bits of the V4DImode mask into V8SImode:
21430 mask = { A B C D }
21431 t1 = { A A B B C C D D }. */
21432 for (i = 0; i < w / 2; ++i)
21433 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21434 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21435 vt = force_reg (maskmode, vt);
21436 mask = gen_lowpart (maskmode, mask);
21437 if (maskmode == V8SImode)
21438 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21439 else
21440 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21442 /* Multiply the shuffle indicies by two. */
21443 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21444 OPTAB_DIRECT);
21446 /* Add one to the odd shuffle indicies:
21447 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21448 for (i = 0; i < w / 2; ++i)
21450 vec[i * 2] = const0_rtx;
21451 vec[i * 2 + 1] = const1_rtx;
21453 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21454 vt = validize_mem (force_const_mem (maskmode, vt));
21455 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21456 OPTAB_DIRECT);
21458 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21459 operands[3] = mask = t1;
21460 target = gen_reg_rtx (mode);
21461 op0 = gen_lowpart (mode, op0);
21462 op1 = gen_lowpart (mode, op1);
21465 switch (mode)
21467 case V8SImode:
21468 /* The VPERMD and VPERMPS instructions already properly ignore
21469 the high bits of the shuffle elements. No need for us to
21470 perform an AND ourselves. */
21471 if (one_operand_shuffle)
21473 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21474 if (target != operands[0])
21475 emit_move_insn (operands[0],
21476 gen_lowpart (GET_MODE (operands[0]), target));
21478 else
21480 t1 = gen_reg_rtx (V8SImode);
21481 t2 = gen_reg_rtx (V8SImode);
21482 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21483 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21484 goto merge_two;
21486 return;
21488 case V8SFmode:
21489 mask = gen_lowpart (V8SImode, mask);
21490 if (one_operand_shuffle)
21491 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21492 else
21494 t1 = gen_reg_rtx (V8SFmode);
21495 t2 = gen_reg_rtx (V8SFmode);
21496 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21497 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21498 goto merge_two;
21500 return;
21502 case V4SImode:
21503 /* By combining the two 128-bit input vectors into one 256-bit
21504 input vector, we can use VPERMD and VPERMPS for the full
21505 two-operand shuffle. */
21506 t1 = gen_reg_rtx (V8SImode);
21507 t2 = gen_reg_rtx (V8SImode);
21508 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21509 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21510 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21511 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21512 return;
21514 case V4SFmode:
21515 t1 = gen_reg_rtx (V8SFmode);
21516 t2 = gen_reg_rtx (V8SImode);
21517 mask = gen_lowpart (V4SImode, mask);
21518 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21519 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21520 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21521 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21522 return;
21524 case V32QImode:
21525 t1 = gen_reg_rtx (V32QImode);
21526 t2 = gen_reg_rtx (V32QImode);
21527 t3 = gen_reg_rtx (V32QImode);
21528 vt2 = GEN_INT (128);
21529 for (i = 0; i < 32; i++)
21530 vec[i] = vt2;
21531 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21532 vt = force_reg (V32QImode, vt);
21533 for (i = 0; i < 32; i++)
21534 vec[i] = i < 16 ? vt2 : const0_rtx;
21535 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21536 vt2 = force_reg (V32QImode, vt2);
21537 /* From mask create two adjusted masks, which contain the same
21538 bits as mask in the low 7 bits of each vector element.
21539 The first mask will have the most significant bit clear
21540 if it requests element from the same 128-bit lane
21541 and MSB set if it requests element from the other 128-bit lane.
21542 The second mask will have the opposite values of the MSB,
21543 and additionally will have its 128-bit lanes swapped.
21544 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21545 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21546 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21547 stands for other 12 bytes. */
21548 /* The bit whether element is from the same lane or the other
21549 lane is bit 4, so shift it up by 3 to the MSB position. */
21550 t5 = gen_reg_rtx (V4DImode);
21551 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21552 GEN_INT (3)));
21553 /* Clear MSB bits from the mask just in case it had them set. */
21554 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21555 /* After this t1 will have MSB set for elements from other lane. */
21556 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21557 /* Clear bits other than MSB. */
21558 emit_insn (gen_andv32qi3 (t1, t1, vt));
21559 /* Or in the lower bits from mask into t3. */
21560 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21561 /* And invert MSB bits in t1, so MSB is set for elements from the same
21562 lane. */
21563 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21564 /* Swap 128-bit lanes in t3. */
21565 t6 = gen_reg_rtx (V4DImode);
21566 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21567 const2_rtx, GEN_INT (3),
21568 const0_rtx, const1_rtx));
21569 /* And or in the lower bits from mask into t1. */
21570 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21571 if (one_operand_shuffle)
21573 /* Each of these shuffles will put 0s in places where
21574 element from the other 128-bit lane is needed, otherwise
21575 will shuffle in the requested value. */
21576 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21577 gen_lowpart (V32QImode, t6)));
21578 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21579 /* For t3 the 128-bit lanes are swapped again. */
21580 t7 = gen_reg_rtx (V4DImode);
21581 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21582 const2_rtx, GEN_INT (3),
21583 const0_rtx, const1_rtx));
21584 /* And oring both together leads to the result. */
21585 emit_insn (gen_iorv32qi3 (target, t1,
21586 gen_lowpart (V32QImode, t7)));
21587 if (target != operands[0])
21588 emit_move_insn (operands[0],
21589 gen_lowpart (GET_MODE (operands[0]), target));
21590 return;
21593 t4 = gen_reg_rtx (V32QImode);
21594 /* Similarly to the above one_operand_shuffle code,
21595 just for repeated twice for each operand. merge_two:
21596 code will merge the two results together. */
21597 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21598 gen_lowpart (V32QImode, t6)));
21599 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21600 gen_lowpart (V32QImode, t6)));
21601 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21602 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21603 t7 = gen_reg_rtx (V4DImode);
21604 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21605 const2_rtx, GEN_INT (3),
21606 const0_rtx, const1_rtx));
21607 t8 = gen_reg_rtx (V4DImode);
21608 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21609 const2_rtx, GEN_INT (3),
21610 const0_rtx, const1_rtx));
21611 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21612 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21613 t1 = t4;
21614 t2 = t3;
21615 goto merge_two;
21617 default:
21618 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21619 break;
21623 if (TARGET_XOP)
21625 /* The XOP VPPERM insn supports three inputs. By ignoring the
21626 one_operand_shuffle special case, we avoid creating another
21627 set of constant vectors in memory. */
21628 one_operand_shuffle = false;
21630 /* mask = mask & {2*w-1, ...} */
21631 vt = GEN_INT (2*w - 1);
21633 else
21635 /* mask = mask & {w-1, ...} */
21636 vt = GEN_INT (w - 1);
21639 for (i = 0; i < w; i++)
21640 vec[i] = vt;
21641 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21642 mask = expand_simple_binop (maskmode, AND, mask, vt,
21643 NULL_RTX, 0, OPTAB_DIRECT);
21645 /* For non-QImode operations, convert the word permutation control
21646 into a byte permutation control. */
21647 if (mode != V16QImode)
21649 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21650 GEN_INT (exact_log2 (e)),
21651 NULL_RTX, 0, OPTAB_DIRECT);
21653 /* Convert mask to vector of chars. */
21654 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21656 /* Replicate each of the input bytes into byte positions:
21657 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21658 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21659 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21660 for (i = 0; i < 16; ++i)
21661 vec[i] = GEN_INT (i/e * e);
21662 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21663 vt = validize_mem (force_const_mem (V16QImode, vt));
21664 if (TARGET_XOP)
21665 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21666 else
21667 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21669 /* Convert it into the byte positions by doing
21670 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21671 for (i = 0; i < 16; ++i)
21672 vec[i] = GEN_INT (i % e);
21673 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21674 vt = validize_mem (force_const_mem (V16QImode, vt));
21675 emit_insn (gen_addv16qi3 (mask, mask, vt));
21678 /* The actual shuffle operations all operate on V16QImode. */
21679 op0 = gen_lowpart (V16QImode, op0);
21680 op1 = gen_lowpart (V16QImode, op1);
21682 if (TARGET_XOP)
21684 if (GET_MODE (target) != V16QImode)
21685 target = gen_reg_rtx (V16QImode);
21686 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21687 if (target != operands[0])
21688 emit_move_insn (operands[0],
21689 gen_lowpart (GET_MODE (operands[0]), target));
21691 else if (one_operand_shuffle)
21693 if (GET_MODE (target) != V16QImode)
21694 target = gen_reg_rtx (V16QImode);
21695 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21696 if (target != operands[0])
21697 emit_move_insn (operands[0],
21698 gen_lowpart (GET_MODE (operands[0]), target));
21700 else
21702 rtx xops[6];
21703 bool ok;
21705 /* Shuffle the two input vectors independently. */
21706 t1 = gen_reg_rtx (V16QImode);
21707 t2 = gen_reg_rtx (V16QImode);
21708 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21709 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21711 merge_two:
21712 /* Then merge them together. The key is whether any given control
21713 element contained a bit set that indicates the second word. */
21714 mask = operands[3];
21715 vt = GEN_INT (w);
21716 if (maskmode == V2DImode && !TARGET_SSE4_1)
21718 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21719 more shuffle to convert the V2DI input mask into a V4SI
21720 input mask. At which point the masking that expand_int_vcond
21721 will work as desired. */
21722 rtx t3 = gen_reg_rtx (V4SImode);
21723 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21724 const0_rtx, const0_rtx,
21725 const2_rtx, const2_rtx));
21726 mask = t3;
21727 maskmode = V4SImode;
21728 e = w = 4;
21731 for (i = 0; i < w; i++)
21732 vec[i] = vt;
21733 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21734 vt = force_reg (maskmode, vt);
21735 mask = expand_simple_binop (maskmode, AND, mask, vt,
21736 NULL_RTX, 0, OPTAB_DIRECT);
21738 if (GET_MODE (target) != mode)
21739 target = gen_reg_rtx (mode);
21740 xops[0] = target;
21741 xops[1] = gen_lowpart (mode, t2);
21742 xops[2] = gen_lowpart (mode, t1);
21743 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21744 xops[4] = mask;
21745 xops[5] = vt;
21746 ok = ix86_expand_int_vcond (xops);
21747 gcc_assert (ok);
21748 if (target != operands[0])
21749 emit_move_insn (operands[0],
21750 gen_lowpart (GET_MODE (operands[0]), target));
21754 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21755 true if we should do zero extension, else sign extension. HIGH_P is
21756 true if we want the N/2 high elements, else the low elements. */
21758 void
21759 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21761 enum machine_mode imode = GET_MODE (src);
21762 rtx tmp;
21764 if (TARGET_SSE4_1)
21766 rtx (*unpack)(rtx, rtx);
21767 rtx (*extract)(rtx, rtx) = NULL;
21768 enum machine_mode halfmode = BLKmode;
21770 switch (imode)
21772 case V32QImode:
21773 if (unsigned_p)
21774 unpack = gen_avx2_zero_extendv16qiv16hi2;
21775 else
21776 unpack = gen_avx2_sign_extendv16qiv16hi2;
21777 halfmode = V16QImode;
21778 extract
21779 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21780 break;
21781 case V32HImode:
21782 if (unsigned_p)
21783 unpack = gen_avx512f_zero_extendv16hiv16si2;
21784 else
21785 unpack = gen_avx512f_sign_extendv16hiv16si2;
21786 halfmode = V16HImode;
21787 extract
21788 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21789 break;
21790 case V16HImode:
21791 if (unsigned_p)
21792 unpack = gen_avx2_zero_extendv8hiv8si2;
21793 else
21794 unpack = gen_avx2_sign_extendv8hiv8si2;
21795 halfmode = V8HImode;
21796 extract
21797 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21798 break;
21799 case V16SImode:
21800 if (unsigned_p)
21801 unpack = gen_avx512f_zero_extendv8siv8di2;
21802 else
21803 unpack = gen_avx512f_sign_extendv8siv8di2;
21804 halfmode = V8SImode;
21805 extract
21806 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21807 break;
21808 case V8SImode:
21809 if (unsigned_p)
21810 unpack = gen_avx2_zero_extendv4siv4di2;
21811 else
21812 unpack = gen_avx2_sign_extendv4siv4di2;
21813 halfmode = V4SImode;
21814 extract
21815 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21816 break;
21817 case V16QImode:
21818 if (unsigned_p)
21819 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21820 else
21821 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21822 break;
21823 case V8HImode:
21824 if (unsigned_p)
21825 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21826 else
21827 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21828 break;
21829 case V4SImode:
21830 if (unsigned_p)
21831 unpack = gen_sse4_1_zero_extendv2siv2di2;
21832 else
21833 unpack = gen_sse4_1_sign_extendv2siv2di2;
21834 break;
21835 default:
21836 gcc_unreachable ();
21839 if (GET_MODE_SIZE (imode) >= 32)
21841 tmp = gen_reg_rtx (halfmode);
21842 emit_insn (extract (tmp, src));
21844 else if (high_p)
21846 /* Shift higher 8 bytes to lower 8 bytes. */
21847 tmp = gen_reg_rtx (V1TImode);
21848 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21849 GEN_INT (64)));
21850 tmp = gen_lowpart (imode, tmp);
21852 else
21853 tmp = src;
21855 emit_insn (unpack (dest, tmp));
21857 else
21859 rtx (*unpack)(rtx, rtx, rtx);
21861 switch (imode)
21863 case V16QImode:
21864 if (high_p)
21865 unpack = gen_vec_interleave_highv16qi;
21866 else
21867 unpack = gen_vec_interleave_lowv16qi;
21868 break;
21869 case V8HImode:
21870 if (high_p)
21871 unpack = gen_vec_interleave_highv8hi;
21872 else
21873 unpack = gen_vec_interleave_lowv8hi;
21874 break;
21875 case V4SImode:
21876 if (high_p)
21877 unpack = gen_vec_interleave_highv4si;
21878 else
21879 unpack = gen_vec_interleave_lowv4si;
21880 break;
21881 default:
21882 gcc_unreachable ();
21885 if (unsigned_p)
21886 tmp = force_reg (imode, CONST0_RTX (imode));
21887 else
21888 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21889 src, pc_rtx, pc_rtx);
21891 rtx tmp2 = gen_reg_rtx (imode);
21892 emit_insn (unpack (tmp2, src, tmp));
21893 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21897 /* Expand conditional increment or decrement using adb/sbb instructions.
21898 The default case using setcc followed by the conditional move can be
21899 done by generic code. */
21900 bool
21901 ix86_expand_int_addcc (rtx operands[])
21903 enum rtx_code code = GET_CODE (operands[1]);
21904 rtx flags;
21905 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21906 rtx compare_op;
21907 rtx val = const0_rtx;
21908 bool fpcmp = false;
21909 enum machine_mode mode;
21910 rtx op0 = XEXP (operands[1], 0);
21911 rtx op1 = XEXP (operands[1], 1);
21913 if (operands[3] != const1_rtx
21914 && operands[3] != constm1_rtx)
21915 return false;
21916 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21917 return false;
21918 code = GET_CODE (compare_op);
21920 flags = XEXP (compare_op, 0);
21922 if (GET_MODE (flags) == CCFPmode
21923 || GET_MODE (flags) == CCFPUmode)
21925 fpcmp = true;
21926 code = ix86_fp_compare_code_to_integer (code);
21929 if (code != LTU)
21931 val = constm1_rtx;
21932 if (fpcmp)
21933 PUT_CODE (compare_op,
21934 reverse_condition_maybe_unordered
21935 (GET_CODE (compare_op)));
21936 else
21937 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21940 mode = GET_MODE (operands[0]);
21942 /* Construct either adc or sbb insn. */
21943 if ((code == LTU) == (operands[3] == constm1_rtx))
21945 switch (mode)
21947 case QImode:
21948 insn = gen_subqi3_carry;
21949 break;
21950 case HImode:
21951 insn = gen_subhi3_carry;
21952 break;
21953 case SImode:
21954 insn = gen_subsi3_carry;
21955 break;
21956 case DImode:
21957 insn = gen_subdi3_carry;
21958 break;
21959 default:
21960 gcc_unreachable ();
21963 else
21965 switch (mode)
21967 case QImode:
21968 insn = gen_addqi3_carry;
21969 break;
21970 case HImode:
21971 insn = gen_addhi3_carry;
21972 break;
21973 case SImode:
21974 insn = gen_addsi3_carry;
21975 break;
21976 case DImode:
21977 insn = gen_adddi3_carry;
21978 break;
21979 default:
21980 gcc_unreachable ();
21983 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21985 return true;
21989 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21990 but works for floating pointer parameters and nonoffsetable memories.
21991 For pushes, it returns just stack offsets; the values will be saved
21992 in the right order. Maximally three parts are generated. */
21994 static int
21995 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21997 int size;
21999 if (!TARGET_64BIT)
22000 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22001 else
22002 size = (GET_MODE_SIZE (mode) + 4) / 8;
22004 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22005 gcc_assert (size >= 2 && size <= 4);
22007 /* Optimize constant pool reference to immediates. This is used by fp
22008 moves, that force all constants to memory to allow combining. */
22009 if (MEM_P (operand) && MEM_READONLY_P (operand))
22011 rtx tmp = maybe_get_pool_constant (operand);
22012 if (tmp)
22013 operand = tmp;
22016 if (MEM_P (operand) && !offsettable_memref_p (operand))
22018 /* The only non-offsetable memories we handle are pushes. */
22019 int ok = push_operand (operand, VOIDmode);
22021 gcc_assert (ok);
22023 operand = copy_rtx (operand);
22024 PUT_MODE (operand, word_mode);
22025 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22026 return size;
22029 if (GET_CODE (operand) == CONST_VECTOR)
22031 enum machine_mode imode = int_mode_for_mode (mode);
22032 /* Caution: if we looked through a constant pool memory above,
22033 the operand may actually have a different mode now. That's
22034 ok, since we want to pun this all the way back to an integer. */
22035 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22036 gcc_assert (operand != NULL);
22037 mode = imode;
22040 if (!TARGET_64BIT)
22042 if (mode == DImode)
22043 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22044 else
22046 int i;
22048 if (REG_P (operand))
22050 gcc_assert (reload_completed);
22051 for (i = 0; i < size; i++)
22052 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22054 else if (offsettable_memref_p (operand))
22056 operand = adjust_address (operand, SImode, 0);
22057 parts[0] = operand;
22058 for (i = 1; i < size; i++)
22059 parts[i] = adjust_address (operand, SImode, 4 * i);
22061 else if (GET_CODE (operand) == CONST_DOUBLE)
22063 REAL_VALUE_TYPE r;
22064 long l[4];
22066 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22067 switch (mode)
22069 case TFmode:
22070 real_to_target (l, &r, mode);
22071 parts[3] = gen_int_mode (l[3], SImode);
22072 parts[2] = gen_int_mode (l[2], SImode);
22073 break;
22074 case XFmode:
22075 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22076 long double may not be 80-bit. */
22077 real_to_target (l, &r, mode);
22078 parts[2] = gen_int_mode (l[2], SImode);
22079 break;
22080 case DFmode:
22081 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22082 break;
22083 default:
22084 gcc_unreachable ();
22086 parts[1] = gen_int_mode (l[1], SImode);
22087 parts[0] = gen_int_mode (l[0], SImode);
22089 else
22090 gcc_unreachable ();
22093 else
22095 if (mode == TImode)
22096 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22097 if (mode == XFmode || mode == TFmode)
22099 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22100 if (REG_P (operand))
22102 gcc_assert (reload_completed);
22103 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22104 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22106 else if (offsettable_memref_p (operand))
22108 operand = adjust_address (operand, DImode, 0);
22109 parts[0] = operand;
22110 parts[1] = adjust_address (operand, upper_mode, 8);
22112 else if (GET_CODE (operand) == CONST_DOUBLE)
22114 REAL_VALUE_TYPE r;
22115 long l[4];
22117 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22118 real_to_target (l, &r, mode);
22120 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22121 if (HOST_BITS_PER_WIDE_INT >= 64)
22122 parts[0]
22123 = gen_int_mode
22124 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22125 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22126 DImode);
22127 else
22128 parts[0] = immed_double_const (l[0], l[1], DImode);
22130 if (upper_mode == SImode)
22131 parts[1] = gen_int_mode (l[2], SImode);
22132 else if (HOST_BITS_PER_WIDE_INT >= 64)
22133 parts[1]
22134 = gen_int_mode
22135 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22136 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22137 DImode);
22138 else
22139 parts[1] = immed_double_const (l[2], l[3], DImode);
22141 else
22142 gcc_unreachable ();
22146 return size;
22149 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22150 Return false when normal moves are needed; true when all required
22151 insns have been emitted. Operands 2-4 contain the input values
22152 int the correct order; operands 5-7 contain the output values. */
22154 void
22155 ix86_split_long_move (rtx operands[])
22157 rtx part[2][4];
22158 int nparts, i, j;
22159 int push = 0;
22160 int collisions = 0;
22161 enum machine_mode mode = GET_MODE (operands[0]);
22162 bool collisionparts[4];
22164 /* The DFmode expanders may ask us to move double.
22165 For 64bit target this is single move. By hiding the fact
22166 here we simplify i386.md splitters. */
22167 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22169 /* Optimize constant pool reference to immediates. This is used by
22170 fp moves, that force all constants to memory to allow combining. */
22172 if (MEM_P (operands[1])
22173 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22174 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22175 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22176 if (push_operand (operands[0], VOIDmode))
22178 operands[0] = copy_rtx (operands[0]);
22179 PUT_MODE (operands[0], word_mode);
22181 else
22182 operands[0] = gen_lowpart (DImode, operands[0]);
22183 operands[1] = gen_lowpart (DImode, operands[1]);
22184 emit_move_insn (operands[0], operands[1]);
22185 return;
22188 /* The only non-offsettable memory we handle is push. */
22189 if (push_operand (operands[0], VOIDmode))
22190 push = 1;
22191 else
22192 gcc_assert (!MEM_P (operands[0])
22193 || offsettable_memref_p (operands[0]));
22195 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22196 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22198 /* When emitting push, take care for source operands on the stack. */
22199 if (push && MEM_P (operands[1])
22200 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22202 rtx src_base = XEXP (part[1][nparts - 1], 0);
22204 /* Compensate for the stack decrement by 4. */
22205 if (!TARGET_64BIT && nparts == 3
22206 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22207 src_base = plus_constant (Pmode, src_base, 4);
22209 /* src_base refers to the stack pointer and is
22210 automatically decreased by emitted push. */
22211 for (i = 0; i < nparts; i++)
22212 part[1][i] = change_address (part[1][i],
22213 GET_MODE (part[1][i]), src_base);
22216 /* We need to do copy in the right order in case an address register
22217 of the source overlaps the destination. */
22218 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22220 rtx tmp;
22222 for (i = 0; i < nparts; i++)
22224 collisionparts[i]
22225 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22226 if (collisionparts[i])
22227 collisions++;
22230 /* Collision in the middle part can be handled by reordering. */
22231 if (collisions == 1 && nparts == 3 && collisionparts [1])
22233 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22234 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22236 else if (collisions == 1
22237 && nparts == 4
22238 && (collisionparts [1] || collisionparts [2]))
22240 if (collisionparts [1])
22242 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22243 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22245 else
22247 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22248 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22252 /* If there are more collisions, we can't handle it by reordering.
22253 Do an lea to the last part and use only one colliding move. */
22254 else if (collisions > 1)
22256 rtx base;
22258 collisions = 1;
22260 base = part[0][nparts - 1];
22262 /* Handle the case when the last part isn't valid for lea.
22263 Happens in 64-bit mode storing the 12-byte XFmode. */
22264 if (GET_MODE (base) != Pmode)
22265 base = gen_rtx_REG (Pmode, REGNO (base));
22267 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22268 part[1][0] = replace_equiv_address (part[1][0], base);
22269 for (i = 1; i < nparts; i++)
22271 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22272 part[1][i] = replace_equiv_address (part[1][i], tmp);
22277 if (push)
22279 if (!TARGET_64BIT)
22281 if (nparts == 3)
22283 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22284 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22285 stack_pointer_rtx, GEN_INT (-4)));
22286 emit_move_insn (part[0][2], part[1][2]);
22288 else if (nparts == 4)
22290 emit_move_insn (part[0][3], part[1][3]);
22291 emit_move_insn (part[0][2], part[1][2]);
22294 else
22296 /* In 64bit mode we don't have 32bit push available. In case this is
22297 register, it is OK - we will just use larger counterpart. We also
22298 retype memory - these comes from attempt to avoid REX prefix on
22299 moving of second half of TFmode value. */
22300 if (GET_MODE (part[1][1]) == SImode)
22302 switch (GET_CODE (part[1][1]))
22304 case MEM:
22305 part[1][1] = adjust_address (part[1][1], DImode, 0);
22306 break;
22308 case REG:
22309 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22310 break;
22312 default:
22313 gcc_unreachable ();
22316 if (GET_MODE (part[1][0]) == SImode)
22317 part[1][0] = part[1][1];
22320 emit_move_insn (part[0][1], part[1][1]);
22321 emit_move_insn (part[0][0], part[1][0]);
22322 return;
22325 /* Choose correct order to not overwrite the source before it is copied. */
22326 if ((REG_P (part[0][0])
22327 && REG_P (part[1][1])
22328 && (REGNO (part[0][0]) == REGNO (part[1][1])
22329 || (nparts == 3
22330 && REGNO (part[0][0]) == REGNO (part[1][2]))
22331 || (nparts == 4
22332 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22333 || (collisions > 0
22334 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22336 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22338 operands[2 + i] = part[0][j];
22339 operands[6 + i] = part[1][j];
22342 else
22344 for (i = 0; i < nparts; i++)
22346 operands[2 + i] = part[0][i];
22347 operands[6 + i] = part[1][i];
22351 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22352 if (optimize_insn_for_size_p ())
22354 for (j = 0; j < nparts - 1; j++)
22355 if (CONST_INT_P (operands[6 + j])
22356 && operands[6 + j] != const0_rtx
22357 && REG_P (operands[2 + j]))
22358 for (i = j; i < nparts - 1; i++)
22359 if (CONST_INT_P (operands[7 + i])
22360 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22361 operands[7 + i] = operands[2 + j];
22364 for (i = 0; i < nparts; i++)
22365 emit_move_insn (operands[2 + i], operands[6 + i]);
22367 return;
22370 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22371 left shift by a constant, either using a single shift or
22372 a sequence of add instructions. */
22374 static void
22375 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22377 rtx (*insn)(rtx, rtx, rtx);
22379 if (count == 1
22380 || (count * ix86_cost->add <= ix86_cost->shift_const
22381 && !optimize_insn_for_size_p ()))
22383 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22384 while (count-- > 0)
22385 emit_insn (insn (operand, operand, operand));
22387 else
22389 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22390 emit_insn (insn (operand, operand, GEN_INT (count)));
22394 void
22395 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22397 rtx (*gen_ashl3)(rtx, rtx, rtx);
22398 rtx (*gen_shld)(rtx, rtx, rtx);
22399 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22401 rtx low[2], high[2];
22402 int count;
22404 if (CONST_INT_P (operands[2]))
22406 split_double_mode (mode, operands, 2, low, high);
22407 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22409 if (count >= half_width)
22411 emit_move_insn (high[0], low[1]);
22412 emit_move_insn (low[0], const0_rtx);
22414 if (count > half_width)
22415 ix86_expand_ashl_const (high[0], count - half_width, mode);
22417 else
22419 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22421 if (!rtx_equal_p (operands[0], operands[1]))
22422 emit_move_insn (operands[0], operands[1]);
22424 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22425 ix86_expand_ashl_const (low[0], count, mode);
22427 return;
22430 split_double_mode (mode, operands, 1, low, high);
22432 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22434 if (operands[1] == const1_rtx)
22436 /* Assuming we've chosen a QImode capable registers, then 1 << N
22437 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22438 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22440 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22442 ix86_expand_clear (low[0]);
22443 ix86_expand_clear (high[0]);
22444 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22446 d = gen_lowpart (QImode, low[0]);
22447 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22448 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22449 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22451 d = gen_lowpart (QImode, high[0]);
22452 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22453 s = gen_rtx_NE (QImode, flags, const0_rtx);
22454 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22457 /* Otherwise, we can get the same results by manually performing
22458 a bit extract operation on bit 5/6, and then performing the two
22459 shifts. The two methods of getting 0/1 into low/high are exactly
22460 the same size. Avoiding the shift in the bit extract case helps
22461 pentium4 a bit; no one else seems to care much either way. */
22462 else
22464 enum machine_mode half_mode;
22465 rtx (*gen_lshr3)(rtx, rtx, rtx);
22466 rtx (*gen_and3)(rtx, rtx, rtx);
22467 rtx (*gen_xor3)(rtx, rtx, rtx);
22468 HOST_WIDE_INT bits;
22469 rtx x;
22471 if (mode == DImode)
22473 half_mode = SImode;
22474 gen_lshr3 = gen_lshrsi3;
22475 gen_and3 = gen_andsi3;
22476 gen_xor3 = gen_xorsi3;
22477 bits = 5;
22479 else
22481 half_mode = DImode;
22482 gen_lshr3 = gen_lshrdi3;
22483 gen_and3 = gen_anddi3;
22484 gen_xor3 = gen_xordi3;
22485 bits = 6;
22488 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22489 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22490 else
22491 x = gen_lowpart (half_mode, operands[2]);
22492 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22494 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22495 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22496 emit_move_insn (low[0], high[0]);
22497 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22500 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22501 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22502 return;
22505 if (operands[1] == constm1_rtx)
22507 /* For -1 << N, we can avoid the shld instruction, because we
22508 know that we're shifting 0...31/63 ones into a -1. */
22509 emit_move_insn (low[0], constm1_rtx);
22510 if (optimize_insn_for_size_p ())
22511 emit_move_insn (high[0], low[0]);
22512 else
22513 emit_move_insn (high[0], constm1_rtx);
22515 else
22517 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22519 if (!rtx_equal_p (operands[0], operands[1]))
22520 emit_move_insn (operands[0], operands[1]);
22522 split_double_mode (mode, operands, 1, low, high);
22523 emit_insn (gen_shld (high[0], low[0], operands[2]));
22526 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22528 if (TARGET_CMOVE && scratch)
22530 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22531 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22533 ix86_expand_clear (scratch);
22534 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22536 else
22538 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22539 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22541 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22545 void
22546 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22548 rtx (*gen_ashr3)(rtx, rtx, rtx)
22549 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22550 rtx (*gen_shrd)(rtx, rtx, rtx);
22551 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22553 rtx low[2], high[2];
22554 int count;
22556 if (CONST_INT_P (operands[2]))
22558 split_double_mode (mode, operands, 2, low, high);
22559 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22561 if (count == GET_MODE_BITSIZE (mode) - 1)
22563 emit_move_insn (high[0], high[1]);
22564 emit_insn (gen_ashr3 (high[0], high[0],
22565 GEN_INT (half_width - 1)));
22566 emit_move_insn (low[0], high[0]);
22569 else if (count >= half_width)
22571 emit_move_insn (low[0], high[1]);
22572 emit_move_insn (high[0], low[0]);
22573 emit_insn (gen_ashr3 (high[0], high[0],
22574 GEN_INT (half_width - 1)));
22576 if (count > half_width)
22577 emit_insn (gen_ashr3 (low[0], low[0],
22578 GEN_INT (count - half_width)));
22580 else
22582 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22584 if (!rtx_equal_p (operands[0], operands[1]))
22585 emit_move_insn (operands[0], operands[1]);
22587 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22588 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22591 else
22593 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22595 if (!rtx_equal_p (operands[0], operands[1]))
22596 emit_move_insn (operands[0], operands[1]);
22598 split_double_mode (mode, operands, 1, low, high);
22600 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22601 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22603 if (TARGET_CMOVE && scratch)
22605 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22606 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22608 emit_move_insn (scratch, high[0]);
22609 emit_insn (gen_ashr3 (scratch, scratch,
22610 GEN_INT (half_width - 1)));
22611 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22612 scratch));
22614 else
22616 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22617 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22619 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22624 void
22625 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22627 rtx (*gen_lshr3)(rtx, rtx, rtx)
22628 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22629 rtx (*gen_shrd)(rtx, rtx, rtx);
22630 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22632 rtx low[2], high[2];
22633 int count;
22635 if (CONST_INT_P (operands[2]))
22637 split_double_mode (mode, operands, 2, low, high);
22638 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22640 if (count >= half_width)
22642 emit_move_insn (low[0], high[1]);
22643 ix86_expand_clear (high[0]);
22645 if (count > half_width)
22646 emit_insn (gen_lshr3 (low[0], low[0],
22647 GEN_INT (count - half_width)));
22649 else
22651 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22653 if (!rtx_equal_p (operands[0], operands[1]))
22654 emit_move_insn (operands[0], operands[1]);
22656 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22657 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22660 else
22662 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22664 if (!rtx_equal_p (operands[0], operands[1]))
22665 emit_move_insn (operands[0], operands[1]);
22667 split_double_mode (mode, operands, 1, low, high);
22669 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22670 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22672 if (TARGET_CMOVE && scratch)
22674 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22675 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22677 ix86_expand_clear (scratch);
22678 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22679 scratch));
22681 else
22683 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22684 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22686 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22691 /* Predict just emitted jump instruction to be taken with probability PROB. */
22692 static void
22693 predict_jump (int prob)
22695 rtx insn = get_last_insn ();
22696 gcc_assert (JUMP_P (insn));
22697 add_int_reg_note (insn, REG_BR_PROB, prob);
22700 /* Helper function for the string operations below. Dest VARIABLE whether
22701 it is aligned to VALUE bytes. If true, jump to the label. */
22702 static rtx
22703 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22705 rtx label = gen_label_rtx ();
22706 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22707 if (GET_MODE (variable) == DImode)
22708 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22709 else
22710 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22711 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22712 1, label);
22713 if (epilogue)
22714 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22715 else
22716 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22717 return label;
22720 /* Adjust COUNTER by the VALUE. */
22721 static void
22722 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22724 rtx (*gen_add)(rtx, rtx, rtx)
22725 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22727 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22730 /* Zero extend possibly SImode EXP to Pmode register. */
22732 ix86_zero_extend_to_Pmode (rtx exp)
22734 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22737 /* Divide COUNTREG by SCALE. */
22738 static rtx
22739 scale_counter (rtx countreg, int scale)
22741 rtx sc;
22743 if (scale == 1)
22744 return countreg;
22745 if (CONST_INT_P (countreg))
22746 return GEN_INT (INTVAL (countreg) / scale);
22747 gcc_assert (REG_P (countreg));
22749 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22750 GEN_INT (exact_log2 (scale)),
22751 NULL, 1, OPTAB_DIRECT);
22752 return sc;
22755 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22756 DImode for constant loop counts. */
22758 static enum machine_mode
22759 counter_mode (rtx count_exp)
22761 if (GET_MODE (count_exp) != VOIDmode)
22762 return GET_MODE (count_exp);
22763 if (!CONST_INT_P (count_exp))
22764 return Pmode;
22765 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22766 return DImode;
22767 return SImode;
22770 /* Copy the address to a Pmode register. This is used for x32 to
22771 truncate DImode TLS address to a SImode register. */
22773 static rtx
22774 ix86_copy_addr_to_reg (rtx addr)
22776 if (GET_MODE (addr) == Pmode)
22777 return copy_addr_to_reg (addr);
22778 else
22780 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22781 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22785 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22786 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22787 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22788 memory by VALUE (supposed to be in MODE).
22790 The size is rounded down to whole number of chunk size moved at once.
22791 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22794 static void
22795 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22796 rtx destptr, rtx srcptr, rtx value,
22797 rtx count, enum machine_mode mode, int unroll,
22798 int expected_size, bool issetmem)
22800 rtx out_label, top_label, iter, tmp;
22801 enum machine_mode iter_mode = counter_mode (count);
22802 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22803 rtx piece_size = GEN_INT (piece_size_n);
22804 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22805 rtx size;
22806 int i;
22808 top_label = gen_label_rtx ();
22809 out_label = gen_label_rtx ();
22810 iter = gen_reg_rtx (iter_mode);
22812 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22813 NULL, 1, OPTAB_DIRECT);
22814 /* Those two should combine. */
22815 if (piece_size == const1_rtx)
22817 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22818 true, out_label);
22819 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22821 emit_move_insn (iter, const0_rtx);
22823 emit_label (top_label);
22825 tmp = convert_modes (Pmode, iter_mode, iter, true);
22827 /* This assert could be relaxed - in this case we'll need to compute
22828 smallest power of two, containing in PIECE_SIZE_N and pass it to
22829 offset_address. */
22830 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22831 destmem = offset_address (destmem, tmp, piece_size_n);
22832 destmem = adjust_address (destmem, mode, 0);
22834 if (!issetmem)
22836 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22837 srcmem = adjust_address (srcmem, mode, 0);
22839 /* When unrolling for chips that reorder memory reads and writes,
22840 we can save registers by using single temporary.
22841 Also using 4 temporaries is overkill in 32bit mode. */
22842 if (!TARGET_64BIT && 0)
22844 for (i = 0; i < unroll; i++)
22846 if (i)
22848 destmem =
22849 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22850 srcmem =
22851 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22853 emit_move_insn (destmem, srcmem);
22856 else
22858 rtx tmpreg[4];
22859 gcc_assert (unroll <= 4);
22860 for (i = 0; i < unroll; i++)
22862 tmpreg[i] = gen_reg_rtx (mode);
22863 if (i)
22865 srcmem =
22866 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22868 emit_move_insn (tmpreg[i], srcmem);
22870 for (i = 0; i < unroll; i++)
22872 if (i)
22874 destmem =
22875 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22877 emit_move_insn (destmem, tmpreg[i]);
22881 else
22882 for (i = 0; i < unroll; i++)
22884 if (i)
22885 destmem =
22886 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22887 emit_move_insn (destmem, value);
22890 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22891 true, OPTAB_LIB_WIDEN);
22892 if (tmp != iter)
22893 emit_move_insn (iter, tmp);
22895 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22896 true, top_label);
22897 if (expected_size != -1)
22899 expected_size /= GET_MODE_SIZE (mode) * unroll;
22900 if (expected_size == 0)
22901 predict_jump (0);
22902 else if (expected_size > REG_BR_PROB_BASE)
22903 predict_jump (REG_BR_PROB_BASE - 1);
22904 else
22905 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22907 else
22908 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22909 iter = ix86_zero_extend_to_Pmode (iter);
22910 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22911 true, OPTAB_LIB_WIDEN);
22912 if (tmp != destptr)
22913 emit_move_insn (destptr, tmp);
22914 if (!issetmem)
22916 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22917 true, OPTAB_LIB_WIDEN);
22918 if (tmp != srcptr)
22919 emit_move_insn (srcptr, tmp);
22921 emit_label (out_label);
22924 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22925 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22926 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22927 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22928 ORIG_VALUE is the original value passed to memset to fill the memory with.
22929 Other arguments have same meaning as for previous function. */
22931 static void
22932 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22933 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22934 rtx count,
22935 enum machine_mode mode, bool issetmem)
22937 rtx destexp;
22938 rtx srcexp;
22939 rtx countreg;
22940 HOST_WIDE_INT rounded_count;
22942 /* If possible, it is shorter to use rep movs.
22943 TODO: Maybe it is better to move this logic to decide_alg. */
22944 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22945 && (!issetmem || orig_value == const0_rtx))
22946 mode = SImode;
22948 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22949 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22951 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22952 GET_MODE_SIZE (mode)));
22953 if (mode != QImode)
22955 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22956 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22957 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22959 else
22960 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22961 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22963 rounded_count = (INTVAL (count)
22964 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22965 destmem = shallow_copy_rtx (destmem);
22966 set_mem_size (destmem, rounded_count);
22968 else if (MEM_SIZE_KNOWN_P (destmem))
22969 clear_mem_size (destmem);
22971 if (issetmem)
22973 value = force_reg (mode, gen_lowpart (mode, value));
22974 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22976 else
22978 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22979 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22980 if (mode != QImode)
22982 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22983 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22984 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22986 else
22987 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22988 if (CONST_INT_P (count))
22990 rounded_count = (INTVAL (count)
22991 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22992 srcmem = shallow_copy_rtx (srcmem);
22993 set_mem_size (srcmem, rounded_count);
22995 else
22997 if (MEM_SIZE_KNOWN_P (srcmem))
22998 clear_mem_size (srcmem);
23000 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23001 destexp, srcexp));
23005 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23006 DESTMEM.
23007 SRC is passed by pointer to be updated on return.
23008 Return value is updated DST. */
23009 static rtx
23010 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23011 HOST_WIDE_INT size_to_move)
23013 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23014 enum insn_code code;
23015 enum machine_mode move_mode;
23016 int piece_size, i;
23018 /* Find the widest mode in which we could perform moves.
23019 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23020 it until move of such size is supported. */
23021 piece_size = 1 << floor_log2 (size_to_move);
23022 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23023 code = optab_handler (mov_optab, move_mode);
23024 while (code == CODE_FOR_nothing && piece_size > 1)
23026 piece_size >>= 1;
23027 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23028 code = optab_handler (mov_optab, move_mode);
23031 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23032 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23033 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23035 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23036 move_mode = mode_for_vector (word_mode, nunits);
23037 code = optab_handler (mov_optab, move_mode);
23038 if (code == CODE_FOR_nothing)
23040 move_mode = word_mode;
23041 piece_size = GET_MODE_SIZE (move_mode);
23042 code = optab_handler (mov_optab, move_mode);
23045 gcc_assert (code != CODE_FOR_nothing);
23047 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23048 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23050 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23051 gcc_assert (size_to_move % piece_size == 0);
23052 adjust = GEN_INT (piece_size);
23053 for (i = 0; i < size_to_move; i += piece_size)
23055 /* We move from memory to memory, so we'll need to do it via
23056 a temporary register. */
23057 tempreg = gen_reg_rtx (move_mode);
23058 emit_insn (GEN_FCN (code) (tempreg, src));
23059 emit_insn (GEN_FCN (code) (dst, tempreg));
23061 emit_move_insn (destptr,
23062 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23063 emit_move_insn (srcptr,
23064 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23066 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23067 piece_size);
23068 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23069 piece_size);
23072 /* Update DST and SRC rtx. */
23073 *srcmem = src;
23074 return dst;
23077 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23078 static void
23079 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23080 rtx destptr, rtx srcptr, rtx count, int max_size)
23082 rtx src, dest;
23083 if (CONST_INT_P (count))
23085 HOST_WIDE_INT countval = INTVAL (count);
23086 HOST_WIDE_INT epilogue_size = countval % max_size;
23087 int i;
23089 /* For now MAX_SIZE should be a power of 2. This assert could be
23090 relaxed, but it'll require a bit more complicated epilogue
23091 expanding. */
23092 gcc_assert ((max_size & (max_size - 1)) == 0);
23093 for (i = max_size; i >= 1; i >>= 1)
23095 if (epilogue_size & i)
23096 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23098 return;
23100 if (max_size > 8)
23102 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23103 count, 1, OPTAB_DIRECT);
23104 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23105 count, QImode, 1, 4, false);
23106 return;
23109 /* When there are stringops, we can cheaply increase dest and src pointers.
23110 Otherwise we save code size by maintaining offset (zero is readily
23111 available from preceding rep operation) and using x86 addressing modes.
23113 if (TARGET_SINGLE_STRINGOP)
23115 if (max_size > 4)
23117 rtx label = ix86_expand_aligntest (count, 4, true);
23118 src = change_address (srcmem, SImode, srcptr);
23119 dest = change_address (destmem, SImode, destptr);
23120 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23121 emit_label (label);
23122 LABEL_NUSES (label) = 1;
23124 if (max_size > 2)
23126 rtx label = ix86_expand_aligntest (count, 2, true);
23127 src = change_address (srcmem, HImode, srcptr);
23128 dest = change_address (destmem, HImode, destptr);
23129 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23130 emit_label (label);
23131 LABEL_NUSES (label) = 1;
23133 if (max_size > 1)
23135 rtx label = ix86_expand_aligntest (count, 1, true);
23136 src = change_address (srcmem, QImode, srcptr);
23137 dest = change_address (destmem, QImode, destptr);
23138 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23139 emit_label (label);
23140 LABEL_NUSES (label) = 1;
23143 else
23145 rtx offset = force_reg (Pmode, const0_rtx);
23146 rtx tmp;
23148 if (max_size > 4)
23150 rtx label = ix86_expand_aligntest (count, 4, true);
23151 src = change_address (srcmem, SImode, srcptr);
23152 dest = change_address (destmem, SImode, destptr);
23153 emit_move_insn (dest, src);
23154 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23155 true, OPTAB_LIB_WIDEN);
23156 if (tmp != offset)
23157 emit_move_insn (offset, tmp);
23158 emit_label (label);
23159 LABEL_NUSES (label) = 1;
23161 if (max_size > 2)
23163 rtx label = ix86_expand_aligntest (count, 2, true);
23164 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23165 src = change_address (srcmem, HImode, tmp);
23166 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23167 dest = change_address (destmem, HImode, tmp);
23168 emit_move_insn (dest, src);
23169 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23170 true, OPTAB_LIB_WIDEN);
23171 if (tmp != offset)
23172 emit_move_insn (offset, tmp);
23173 emit_label (label);
23174 LABEL_NUSES (label) = 1;
23176 if (max_size > 1)
23178 rtx label = ix86_expand_aligntest (count, 1, true);
23179 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23180 src = change_address (srcmem, QImode, tmp);
23181 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23182 dest = change_address (destmem, QImode, tmp);
23183 emit_move_insn (dest, src);
23184 emit_label (label);
23185 LABEL_NUSES (label) = 1;
23190 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23191 with value PROMOTED_VAL.
23192 SRC is passed by pointer to be updated on return.
23193 Return value is updated DST. */
23194 static rtx
23195 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23196 HOST_WIDE_INT size_to_move)
23198 rtx dst = destmem, adjust;
23199 enum insn_code code;
23200 enum machine_mode move_mode;
23201 int piece_size, i;
23203 /* Find the widest mode in which we could perform moves.
23204 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23205 it until move of such size is supported. */
23206 move_mode = GET_MODE (promoted_val);
23207 if (move_mode == VOIDmode)
23208 move_mode = QImode;
23209 if (size_to_move < GET_MODE_SIZE (move_mode))
23211 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23212 promoted_val = gen_lowpart (move_mode, promoted_val);
23214 piece_size = GET_MODE_SIZE (move_mode);
23215 code = optab_handler (mov_optab, move_mode);
23216 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23218 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23220 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23221 gcc_assert (size_to_move % piece_size == 0);
23222 adjust = GEN_INT (piece_size);
23223 for (i = 0; i < size_to_move; i += piece_size)
23225 if (piece_size <= GET_MODE_SIZE (word_mode))
23227 emit_insn (gen_strset (destptr, dst, promoted_val));
23228 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23229 piece_size);
23230 continue;
23233 emit_insn (GEN_FCN (code) (dst, promoted_val));
23235 emit_move_insn (destptr,
23236 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23238 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23239 piece_size);
23242 /* Update DST rtx. */
23243 return dst;
23245 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23246 static void
23247 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23248 rtx count, int max_size)
23250 count =
23251 expand_simple_binop (counter_mode (count), AND, count,
23252 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23253 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23254 gen_lowpart (QImode, value), count, QImode,
23255 1, max_size / 2, true);
23258 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23259 static void
23260 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23261 rtx count, int max_size)
23263 rtx dest;
23265 if (CONST_INT_P (count))
23267 HOST_WIDE_INT countval = INTVAL (count);
23268 HOST_WIDE_INT epilogue_size = countval % max_size;
23269 int i;
23271 /* For now MAX_SIZE should be a power of 2. This assert could be
23272 relaxed, but it'll require a bit more complicated epilogue
23273 expanding. */
23274 gcc_assert ((max_size & (max_size - 1)) == 0);
23275 for (i = max_size; i >= 1; i >>= 1)
23277 if (epilogue_size & i)
23279 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23280 destmem = emit_memset (destmem, destptr, vec_value, i);
23281 else
23282 destmem = emit_memset (destmem, destptr, value, i);
23285 return;
23287 if (max_size > 32)
23289 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23290 return;
23292 if (max_size > 16)
23294 rtx label = ix86_expand_aligntest (count, 16, true);
23295 if (TARGET_64BIT)
23297 dest = change_address (destmem, DImode, destptr);
23298 emit_insn (gen_strset (destptr, dest, value));
23299 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23300 emit_insn (gen_strset (destptr, dest, value));
23302 else
23304 dest = change_address (destmem, SImode, destptr);
23305 emit_insn (gen_strset (destptr, dest, value));
23306 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23307 emit_insn (gen_strset (destptr, dest, value));
23308 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23309 emit_insn (gen_strset (destptr, dest, value));
23310 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23311 emit_insn (gen_strset (destptr, dest, value));
23313 emit_label (label);
23314 LABEL_NUSES (label) = 1;
23316 if (max_size > 8)
23318 rtx label = ix86_expand_aligntest (count, 8, true);
23319 if (TARGET_64BIT)
23321 dest = change_address (destmem, DImode, destptr);
23322 emit_insn (gen_strset (destptr, dest, value));
23324 else
23326 dest = change_address (destmem, SImode, destptr);
23327 emit_insn (gen_strset (destptr, dest, value));
23328 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23329 emit_insn (gen_strset (destptr, dest, value));
23331 emit_label (label);
23332 LABEL_NUSES (label) = 1;
23334 if (max_size > 4)
23336 rtx label = ix86_expand_aligntest (count, 4, true);
23337 dest = change_address (destmem, SImode, destptr);
23338 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23339 emit_label (label);
23340 LABEL_NUSES (label) = 1;
23342 if (max_size > 2)
23344 rtx label = ix86_expand_aligntest (count, 2, true);
23345 dest = change_address (destmem, HImode, destptr);
23346 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23347 emit_label (label);
23348 LABEL_NUSES (label) = 1;
23350 if (max_size > 1)
23352 rtx label = ix86_expand_aligntest (count, 1, true);
23353 dest = change_address (destmem, QImode, destptr);
23354 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23355 emit_label (label);
23356 LABEL_NUSES (label) = 1;
23360 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23361 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23362 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23363 ignored.
23364 Return value is updated DESTMEM. */
23365 static rtx
23366 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23367 rtx destptr, rtx srcptr, rtx value,
23368 rtx vec_value, rtx count, int align,
23369 int desired_alignment, bool issetmem)
23371 int i;
23372 for (i = 1; i < desired_alignment; i <<= 1)
23374 if (align <= i)
23376 rtx label = ix86_expand_aligntest (destptr, i, false);
23377 if (issetmem)
23379 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23380 destmem = emit_memset (destmem, destptr, vec_value, i);
23381 else
23382 destmem = emit_memset (destmem, destptr, value, i);
23384 else
23385 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23386 ix86_adjust_counter (count, i);
23387 emit_label (label);
23388 LABEL_NUSES (label) = 1;
23389 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23392 return destmem;
23395 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23396 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23397 and jump to DONE_LABEL. */
23398 static void
23399 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23400 rtx destptr, rtx srcptr,
23401 rtx value, rtx vec_value,
23402 rtx count, int size,
23403 rtx done_label, bool issetmem)
23405 rtx label = ix86_expand_aligntest (count, size, false);
23406 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23407 rtx modesize;
23408 int n;
23410 /* If we do not have vector value to copy, we must reduce size. */
23411 if (issetmem)
23413 if (!vec_value)
23415 if (GET_MODE (value) == VOIDmode && size > 8)
23416 mode = Pmode;
23417 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23418 mode = GET_MODE (value);
23420 else
23421 mode = GET_MODE (vec_value), value = vec_value;
23423 else
23425 /* Choose appropriate vector mode. */
23426 if (size >= 32)
23427 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23428 else if (size >= 16)
23429 mode = TARGET_SSE ? V16QImode : DImode;
23430 srcmem = change_address (srcmem, mode, srcptr);
23432 destmem = change_address (destmem, mode, destptr);
23433 modesize = GEN_INT (GET_MODE_SIZE (mode));
23434 gcc_assert (GET_MODE_SIZE (mode) <= size);
23435 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23437 if (issetmem)
23438 emit_move_insn (destmem, gen_lowpart (mode, value));
23439 else
23441 emit_move_insn (destmem, srcmem);
23442 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23444 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23447 destmem = offset_address (destmem, count, 1);
23448 destmem = offset_address (destmem, GEN_INT (-2 * size),
23449 GET_MODE_SIZE (mode));
23450 if (!issetmem)
23452 srcmem = offset_address (srcmem, count, 1);
23453 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23454 GET_MODE_SIZE (mode));
23456 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23458 if (issetmem)
23459 emit_move_insn (destmem, gen_lowpart (mode, value));
23460 else
23462 emit_move_insn (destmem, srcmem);
23463 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23465 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23467 emit_jump_insn (gen_jump (done_label));
23468 emit_barrier ();
23470 emit_label (label);
23471 LABEL_NUSES (label) = 1;
23474 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23475 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23476 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23477 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23478 DONE_LABEL is a label after the whole copying sequence. The label is created
23479 on demand if *DONE_LABEL is NULL.
23480 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23481 bounds after the initial copies.
23483 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23484 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23485 we will dispatch to a library call for large blocks.
23487 In pseudocode we do:
23489 if (COUNT < SIZE)
23491 Assume that SIZE is 4. Bigger sizes are handled analogously
23492 if (COUNT & 4)
23494 copy 4 bytes from SRCPTR to DESTPTR
23495 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23496 goto done_label
23498 if (!COUNT)
23499 goto done_label;
23500 copy 1 byte from SRCPTR to DESTPTR
23501 if (COUNT & 2)
23503 copy 2 bytes from SRCPTR to DESTPTR
23504 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23507 else
23509 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23510 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23512 OLD_DESPTR = DESTPTR;
23513 Align DESTPTR up to DESIRED_ALIGN
23514 SRCPTR += DESTPTR - OLD_DESTPTR
23515 COUNT -= DEST_PTR - OLD_DESTPTR
23516 if (DYNAMIC_CHECK)
23517 Round COUNT down to multiple of SIZE
23518 << optional caller supplied zero size guard is here >>
23519 << optional caller suppplied dynamic check is here >>
23520 << caller supplied main copy loop is here >>
23522 done_label:
23524 static void
23525 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23526 rtx *destptr, rtx *srcptr,
23527 enum machine_mode mode,
23528 rtx value, rtx vec_value,
23529 rtx *count,
23530 rtx *done_label,
23531 int size,
23532 int desired_align,
23533 int align,
23534 unsigned HOST_WIDE_INT *min_size,
23535 bool dynamic_check,
23536 bool issetmem)
23538 rtx loop_label = NULL, label;
23539 int n;
23540 rtx modesize;
23541 int prolog_size = 0;
23542 rtx mode_value;
23544 /* Chose proper value to copy. */
23545 if (issetmem && VECTOR_MODE_P (mode))
23546 mode_value = vec_value;
23547 else
23548 mode_value = value;
23549 gcc_assert (GET_MODE_SIZE (mode) <= size);
23551 /* See if block is big or small, handle small blocks. */
23552 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23554 int size2 = size;
23555 loop_label = gen_label_rtx ();
23557 if (!*done_label)
23558 *done_label = gen_label_rtx ();
23560 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23561 1, loop_label);
23562 size2 >>= 1;
23564 /* Handle sizes > 3. */
23565 for (;size2 > 2; size2 >>= 1)
23566 expand_small_movmem_or_setmem (destmem, srcmem,
23567 *destptr, *srcptr,
23568 value, vec_value,
23569 *count,
23570 size2, *done_label, issetmem);
23571 /* Nothing to copy? Jump to DONE_LABEL if so */
23572 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23573 1, *done_label);
23575 /* Do a byte copy. */
23576 destmem = change_address (destmem, QImode, *destptr);
23577 if (issetmem)
23578 emit_move_insn (destmem, gen_lowpart (QImode, value));
23579 else
23581 srcmem = change_address (srcmem, QImode, *srcptr);
23582 emit_move_insn (destmem, srcmem);
23585 /* Handle sizes 2 and 3. */
23586 label = ix86_expand_aligntest (*count, 2, false);
23587 destmem = change_address (destmem, HImode, *destptr);
23588 destmem = offset_address (destmem, *count, 1);
23589 destmem = offset_address (destmem, GEN_INT (-2), 2);
23590 if (issetmem)
23591 emit_move_insn (destmem, gen_lowpart (HImode, value));
23592 else
23594 srcmem = change_address (srcmem, HImode, *srcptr);
23595 srcmem = offset_address (srcmem, *count, 1);
23596 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23597 emit_move_insn (destmem, srcmem);
23600 emit_label (label);
23601 LABEL_NUSES (label) = 1;
23602 emit_jump_insn (gen_jump (*done_label));
23603 emit_barrier ();
23605 else
23606 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23607 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23609 /* Start memcpy for COUNT >= SIZE. */
23610 if (loop_label)
23612 emit_label (loop_label);
23613 LABEL_NUSES (loop_label) = 1;
23616 /* Copy first desired_align bytes. */
23617 if (!issetmem)
23618 srcmem = change_address (srcmem, mode, *srcptr);
23619 destmem = change_address (destmem, mode, *destptr);
23620 modesize = GEN_INT (GET_MODE_SIZE (mode));
23621 for (n = 0; prolog_size < desired_align - align; n++)
23623 if (issetmem)
23624 emit_move_insn (destmem, mode_value);
23625 else
23627 emit_move_insn (destmem, srcmem);
23628 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23630 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23631 prolog_size += GET_MODE_SIZE (mode);
23635 /* Copy last SIZE bytes. */
23636 destmem = offset_address (destmem, *count, 1);
23637 destmem = offset_address (destmem,
23638 GEN_INT (-size - prolog_size),
23640 if (issetmem)
23641 emit_move_insn (destmem, mode_value);
23642 else
23644 srcmem = offset_address (srcmem, *count, 1);
23645 srcmem = offset_address (srcmem,
23646 GEN_INT (-size - prolog_size),
23648 emit_move_insn (destmem, srcmem);
23650 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23652 destmem = offset_address (destmem, modesize, 1);
23653 if (issetmem)
23654 emit_move_insn (destmem, mode_value);
23655 else
23657 srcmem = offset_address (srcmem, modesize, 1);
23658 emit_move_insn (destmem, srcmem);
23662 /* Align destination. */
23663 if (desired_align > 1 && desired_align > align)
23665 rtx saveddest = *destptr;
23667 gcc_assert (desired_align <= size);
23668 /* Align destptr up, place it to new register. */
23669 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23670 GEN_INT (prolog_size),
23671 NULL_RTX, 1, OPTAB_DIRECT);
23672 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23673 GEN_INT (-desired_align),
23674 *destptr, 1, OPTAB_DIRECT);
23675 /* See how many bytes we skipped. */
23676 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23677 *destptr,
23678 saveddest, 1, OPTAB_DIRECT);
23679 /* Adjust srcptr and count. */
23680 if (!issetmem)
23681 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23682 *srcptr, 1, OPTAB_DIRECT);
23683 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23684 saveddest, *count, 1, OPTAB_DIRECT);
23685 /* We copied at most size + prolog_size. */
23686 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23687 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23688 else
23689 *min_size = 0;
23691 /* Our loops always round down the bock size, but for dispatch to library
23692 we need precise value. */
23693 if (dynamic_check)
23694 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23695 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23697 else
23699 gcc_assert (prolog_size == 0);
23700 /* Decrease count, so we won't end up copying last word twice. */
23701 if (!CONST_INT_P (*count))
23702 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23703 constm1_rtx, *count, 1, OPTAB_DIRECT);
23704 else
23705 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23706 if (*min_size)
23707 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23712 /* This function is like the previous one, except here we know how many bytes
23713 need to be copied. That allows us to update alignment not only of DST, which
23714 is returned, but also of SRC, which is passed as a pointer for that
23715 reason. */
23716 static rtx
23717 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23718 rtx srcreg, rtx value, rtx vec_value,
23719 int desired_align, int align_bytes,
23720 bool issetmem)
23722 rtx src = NULL;
23723 rtx orig_dst = dst;
23724 rtx orig_src = NULL;
23725 int piece_size = 1;
23726 int copied_bytes = 0;
23728 if (!issetmem)
23730 gcc_assert (srcp != NULL);
23731 src = *srcp;
23732 orig_src = src;
23735 for (piece_size = 1;
23736 piece_size <= desired_align && copied_bytes < align_bytes;
23737 piece_size <<= 1)
23739 if (align_bytes & piece_size)
23741 if (issetmem)
23743 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23744 dst = emit_memset (dst, destreg, vec_value, piece_size);
23745 else
23746 dst = emit_memset (dst, destreg, value, piece_size);
23748 else
23749 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23750 copied_bytes += piece_size;
23753 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23754 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23755 if (MEM_SIZE_KNOWN_P (orig_dst))
23756 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23758 if (!issetmem)
23760 int src_align_bytes = get_mem_align_offset (src, desired_align
23761 * BITS_PER_UNIT);
23762 if (src_align_bytes >= 0)
23763 src_align_bytes = desired_align - src_align_bytes;
23764 if (src_align_bytes >= 0)
23766 unsigned int src_align;
23767 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23769 if ((src_align_bytes & (src_align - 1))
23770 == (align_bytes & (src_align - 1)))
23771 break;
23773 if (src_align > (unsigned int) desired_align)
23774 src_align = desired_align;
23775 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23776 set_mem_align (src, src_align * BITS_PER_UNIT);
23778 if (MEM_SIZE_KNOWN_P (orig_src))
23779 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23780 *srcp = src;
23783 return dst;
23786 /* Return true if ALG can be used in current context.
23787 Assume we expand memset if MEMSET is true. */
23788 static bool
23789 alg_usable_p (enum stringop_alg alg, bool memset)
23791 if (alg == no_stringop)
23792 return false;
23793 if (alg == vector_loop)
23794 return TARGET_SSE || TARGET_AVX;
23795 /* Algorithms using the rep prefix want at least edi and ecx;
23796 additionally, memset wants eax and memcpy wants esi. Don't
23797 consider such algorithms if the user has appropriated those
23798 registers for their own purposes. */
23799 if (alg == rep_prefix_1_byte
23800 || alg == rep_prefix_4_byte
23801 || alg == rep_prefix_8_byte)
23802 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23803 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23804 return true;
23807 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23808 static enum stringop_alg
23809 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23810 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23811 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23813 const struct stringop_algs * algs;
23814 bool optimize_for_speed;
23815 int max = -1;
23816 const struct processor_costs *cost;
23817 int i;
23818 bool any_alg_usable_p = false;
23820 *noalign = false;
23821 *dynamic_check = -1;
23823 /* Even if the string operation call is cold, we still might spend a lot
23824 of time processing large blocks. */
23825 if (optimize_function_for_size_p (cfun)
23826 || (optimize_insn_for_size_p ()
23827 && (max_size < 256
23828 || (expected_size != -1 && expected_size < 256))))
23829 optimize_for_speed = false;
23830 else
23831 optimize_for_speed = true;
23833 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23834 if (memset)
23835 algs = &cost->memset[TARGET_64BIT != 0];
23836 else
23837 algs = &cost->memcpy[TARGET_64BIT != 0];
23839 /* See maximal size for user defined algorithm. */
23840 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23842 enum stringop_alg candidate = algs->size[i].alg;
23843 bool usable = alg_usable_p (candidate, memset);
23844 any_alg_usable_p |= usable;
23846 if (candidate != libcall && candidate && usable)
23847 max = algs->size[i].max;
23850 /* If expected size is not known but max size is small enough
23851 so inline version is a win, set expected size into
23852 the range. */
23853 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23854 && expected_size == -1)
23855 expected_size = min_size / 2 + max_size / 2;
23857 /* If user specified the algorithm, honnor it if possible. */
23858 if (ix86_stringop_alg != no_stringop
23859 && alg_usable_p (ix86_stringop_alg, memset))
23860 return ix86_stringop_alg;
23861 /* rep; movq or rep; movl is the smallest variant. */
23862 else if (!optimize_for_speed)
23864 *noalign = true;
23865 if (!count || (count & 3) || (memset && !zero_memset))
23866 return alg_usable_p (rep_prefix_1_byte, memset)
23867 ? rep_prefix_1_byte : loop_1_byte;
23868 else
23869 return alg_usable_p (rep_prefix_4_byte, memset)
23870 ? rep_prefix_4_byte : loop;
23872 /* Very tiny blocks are best handled via the loop, REP is expensive to
23873 setup. */
23874 else if (expected_size != -1 && expected_size < 4)
23875 return loop_1_byte;
23876 else if (expected_size != -1)
23878 enum stringop_alg alg = libcall;
23879 bool alg_noalign = false;
23880 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23882 /* We get here if the algorithms that were not libcall-based
23883 were rep-prefix based and we are unable to use rep prefixes
23884 based on global register usage. Break out of the loop and
23885 use the heuristic below. */
23886 if (algs->size[i].max == 0)
23887 break;
23888 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23890 enum stringop_alg candidate = algs->size[i].alg;
23892 if (candidate != libcall && alg_usable_p (candidate, memset))
23894 alg = candidate;
23895 alg_noalign = algs->size[i].noalign;
23897 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23898 last non-libcall inline algorithm. */
23899 if (TARGET_INLINE_ALL_STRINGOPS)
23901 /* When the current size is best to be copied by a libcall,
23902 but we are still forced to inline, run the heuristic below
23903 that will pick code for medium sized blocks. */
23904 if (alg != libcall)
23906 *noalign = alg_noalign;
23907 return alg;
23909 break;
23911 else if (alg_usable_p (candidate, memset))
23913 *noalign = algs->size[i].noalign;
23914 return candidate;
23919 /* When asked to inline the call anyway, try to pick meaningful choice.
23920 We look for maximal size of block that is faster to copy by hand and
23921 take blocks of at most of that size guessing that average size will
23922 be roughly half of the block.
23924 If this turns out to be bad, we might simply specify the preferred
23925 choice in ix86_costs. */
23926 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23927 && (algs->unknown_size == libcall
23928 || !alg_usable_p (algs->unknown_size, memset)))
23930 enum stringop_alg alg;
23932 /* If there aren't any usable algorithms, then recursing on
23933 smaller sizes isn't going to find anything. Just return the
23934 simple byte-at-a-time copy loop. */
23935 if (!any_alg_usable_p)
23937 /* Pick something reasonable. */
23938 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23939 *dynamic_check = 128;
23940 return loop_1_byte;
23942 if (max == -1)
23943 max = 4096;
23944 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23945 zero_memset, dynamic_check, noalign);
23946 gcc_assert (*dynamic_check == -1);
23947 gcc_assert (alg != libcall);
23948 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23949 *dynamic_check = max;
23950 return alg;
23952 return (alg_usable_p (algs->unknown_size, memset)
23953 ? algs->unknown_size : libcall);
23956 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23957 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23958 static int
23959 decide_alignment (int align,
23960 enum stringop_alg alg,
23961 int expected_size,
23962 enum machine_mode move_mode)
23964 int desired_align = 0;
23966 gcc_assert (alg != no_stringop);
23968 if (alg == libcall)
23969 return 0;
23970 if (move_mode == VOIDmode)
23971 return 0;
23973 desired_align = GET_MODE_SIZE (move_mode);
23974 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23975 copying whole cacheline at once. */
23976 if (TARGET_PENTIUMPRO
23977 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23978 desired_align = 8;
23980 if (optimize_size)
23981 desired_align = 1;
23982 if (desired_align < align)
23983 desired_align = align;
23984 if (expected_size != -1 && expected_size < 4)
23985 desired_align = align;
23987 return desired_align;
23991 /* Helper function for memcpy. For QImode value 0xXY produce
23992 0xXYXYXYXY of wide specified by MODE. This is essentially
23993 a * 0x10101010, but we can do slightly better than
23994 synth_mult by unwinding the sequence by hand on CPUs with
23995 slow multiply. */
23996 static rtx
23997 promote_duplicated_reg (enum machine_mode mode, rtx val)
23999 enum machine_mode valmode = GET_MODE (val);
24000 rtx tmp;
24001 int nops = mode == DImode ? 3 : 2;
24003 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24004 if (val == const0_rtx)
24005 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24006 if (CONST_INT_P (val))
24008 HOST_WIDE_INT v = INTVAL (val) & 255;
24010 v |= v << 8;
24011 v |= v << 16;
24012 if (mode == DImode)
24013 v |= (v << 16) << 16;
24014 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24017 if (valmode == VOIDmode)
24018 valmode = QImode;
24019 if (valmode != QImode)
24020 val = gen_lowpart (QImode, val);
24021 if (mode == QImode)
24022 return val;
24023 if (!TARGET_PARTIAL_REG_STALL)
24024 nops--;
24025 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24026 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24027 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24028 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24030 rtx reg = convert_modes (mode, QImode, val, true);
24031 tmp = promote_duplicated_reg (mode, const1_rtx);
24032 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24033 OPTAB_DIRECT);
24035 else
24037 rtx reg = convert_modes (mode, QImode, val, true);
24039 if (!TARGET_PARTIAL_REG_STALL)
24040 if (mode == SImode)
24041 emit_insn (gen_movsi_insv_1 (reg, reg));
24042 else
24043 emit_insn (gen_movdi_insv_1 (reg, reg));
24044 else
24046 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24047 NULL, 1, OPTAB_DIRECT);
24048 reg =
24049 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24051 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24052 NULL, 1, OPTAB_DIRECT);
24053 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24054 if (mode == SImode)
24055 return reg;
24056 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24057 NULL, 1, OPTAB_DIRECT);
24058 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24059 return reg;
24063 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24064 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24065 alignment from ALIGN to DESIRED_ALIGN. */
24066 static rtx
24067 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24068 int align)
24070 rtx promoted_val;
24072 if (TARGET_64BIT
24073 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24074 promoted_val = promote_duplicated_reg (DImode, val);
24075 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24076 promoted_val = promote_duplicated_reg (SImode, val);
24077 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24078 promoted_val = promote_duplicated_reg (HImode, val);
24079 else
24080 promoted_val = val;
24082 return promoted_val;
24085 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24086 operations when profitable. The code depends upon architecture, block size
24087 and alignment, but always has one of the following overall structures:
24089 Aligned move sequence:
24091 1) Prologue guard: Conditional that jumps up to epilogues for small
24092 blocks that can be handled by epilogue alone. This is faster
24093 but also needed for correctness, since prologue assume the block
24094 is larger than the desired alignment.
24096 Optional dynamic check for size and libcall for large
24097 blocks is emitted here too, with -minline-stringops-dynamically.
24099 2) Prologue: copy first few bytes in order to get destination
24100 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24101 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24102 copied. We emit either a jump tree on power of two sized
24103 blocks, or a byte loop.
24105 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24106 with specified algorithm.
24108 4) Epilogue: code copying tail of the block that is too small to be
24109 handled by main body (or up to size guarded by prologue guard).
24111 Misaligned move sequence
24113 1) missaligned move prologue/epilogue containing:
24114 a) Prologue handling small memory blocks and jumping to done_label
24115 (skipped if blocks are known to be large enough)
24116 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24117 needed by single possibly misaligned move
24118 (skipped if alignment is not needed)
24119 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24121 2) Zero size guard dispatching to done_label, if needed
24123 3) dispatch to library call, if needed,
24125 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24126 with specified algorithm. */
24127 bool
24128 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24129 rtx align_exp, rtx expected_align_exp,
24130 rtx expected_size_exp, rtx min_size_exp,
24131 rtx max_size_exp, rtx probable_max_size_exp,
24132 bool issetmem)
24134 rtx destreg;
24135 rtx srcreg = NULL;
24136 rtx label = NULL;
24137 rtx tmp;
24138 rtx jump_around_label = NULL;
24139 HOST_WIDE_INT align = 1;
24140 unsigned HOST_WIDE_INT count = 0;
24141 HOST_WIDE_INT expected_size = -1;
24142 int size_needed = 0, epilogue_size_needed;
24143 int desired_align = 0, align_bytes = 0;
24144 enum stringop_alg alg;
24145 rtx promoted_val = NULL;
24146 rtx vec_promoted_val = NULL;
24147 bool force_loopy_epilogue = false;
24148 int dynamic_check;
24149 bool need_zero_guard = false;
24150 bool noalign;
24151 enum machine_mode move_mode = VOIDmode;
24152 int unroll_factor = 1;
24153 /* TODO: Once value ranges are available, fill in proper data. */
24154 unsigned HOST_WIDE_INT min_size = 0;
24155 unsigned HOST_WIDE_INT max_size = -1;
24156 unsigned HOST_WIDE_INT probable_max_size = -1;
24157 bool misaligned_prologue_used = false;
24159 if (CONST_INT_P (align_exp))
24160 align = INTVAL (align_exp);
24161 /* i386 can do misaligned access on reasonably increased cost. */
24162 if (CONST_INT_P (expected_align_exp)
24163 && INTVAL (expected_align_exp) > align)
24164 align = INTVAL (expected_align_exp);
24165 /* ALIGN is the minimum of destination and source alignment, but we care here
24166 just about destination alignment. */
24167 else if (!issetmem
24168 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24169 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24171 if (CONST_INT_P (count_exp))
24172 min_size = max_size = probable_max_size = count = expected_size
24173 = INTVAL (count_exp);
24174 else
24176 if (min_size_exp)
24177 min_size = INTVAL (min_size_exp);
24178 if (max_size_exp)
24179 max_size = INTVAL (max_size_exp);
24180 if (probable_max_size_exp)
24181 probable_max_size = INTVAL (probable_max_size_exp);
24182 if (CONST_INT_P (expected_size_exp) && count == 0)
24183 expected_size = INTVAL (expected_size_exp);
24186 /* Make sure we don't need to care about overflow later on. */
24187 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24188 return false;
24190 /* Step 0: Decide on preferred algorithm, desired alignment and
24191 size of chunks to be copied by main loop. */
24192 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24193 issetmem,
24194 issetmem && val_exp == const0_rtx,
24195 &dynamic_check, &noalign);
24196 if (alg == libcall)
24197 return false;
24198 gcc_assert (alg != no_stringop);
24200 /* For now vector-version of memset is generated only for memory zeroing, as
24201 creating of promoted vector value is very cheap in this case. */
24202 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24203 alg = unrolled_loop;
24205 if (!count)
24206 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24207 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24208 if (!issetmem)
24209 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24211 unroll_factor = 1;
24212 move_mode = word_mode;
24213 switch (alg)
24215 case libcall:
24216 case no_stringop:
24217 case last_alg:
24218 gcc_unreachable ();
24219 case loop_1_byte:
24220 need_zero_guard = true;
24221 move_mode = QImode;
24222 break;
24223 case loop:
24224 need_zero_guard = true;
24225 break;
24226 case unrolled_loop:
24227 need_zero_guard = true;
24228 unroll_factor = (TARGET_64BIT ? 4 : 2);
24229 break;
24230 case vector_loop:
24231 need_zero_guard = true;
24232 unroll_factor = 4;
24233 /* Find the widest supported mode. */
24234 move_mode = word_mode;
24235 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24236 != CODE_FOR_nothing)
24237 move_mode = GET_MODE_WIDER_MODE (move_mode);
24239 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24240 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24241 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24243 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24244 move_mode = mode_for_vector (word_mode, nunits);
24245 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24246 move_mode = word_mode;
24248 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24249 break;
24250 case rep_prefix_8_byte:
24251 move_mode = DImode;
24252 break;
24253 case rep_prefix_4_byte:
24254 move_mode = SImode;
24255 break;
24256 case rep_prefix_1_byte:
24257 move_mode = QImode;
24258 break;
24260 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24261 epilogue_size_needed = size_needed;
24263 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24264 if (!TARGET_ALIGN_STRINGOPS || noalign)
24265 align = desired_align;
24267 /* Step 1: Prologue guard. */
24269 /* Alignment code needs count to be in register. */
24270 if (CONST_INT_P (count_exp) && desired_align > align)
24272 if (INTVAL (count_exp) > desired_align
24273 && INTVAL (count_exp) > size_needed)
24275 align_bytes
24276 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24277 if (align_bytes <= 0)
24278 align_bytes = 0;
24279 else
24280 align_bytes = desired_align - align_bytes;
24282 if (align_bytes == 0)
24283 count_exp = force_reg (counter_mode (count_exp), count_exp);
24285 gcc_assert (desired_align >= 1 && align >= 1);
24287 /* Misaligned move sequences handle both prologue and epilogue at once.
24288 Default code generation results in a smaller code for large alignments
24289 and also avoids redundant job when sizes are known precisely. */
24290 misaligned_prologue_used
24291 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24292 && MAX (desired_align, epilogue_size_needed) <= 32
24293 && desired_align <= epilogue_size_needed
24294 && ((desired_align > align && !align_bytes)
24295 || (!count && epilogue_size_needed > 1)));
24297 /* Do the cheap promotion to allow better CSE across the
24298 main loop and epilogue (ie one load of the big constant in the
24299 front of all code.
24300 For now the misaligned move sequences do not have fast path
24301 without broadcasting. */
24302 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24304 if (alg == vector_loop)
24306 gcc_assert (val_exp == const0_rtx);
24307 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24308 promoted_val = promote_duplicated_reg_to_size (val_exp,
24309 GET_MODE_SIZE (word_mode),
24310 desired_align, align);
24312 else
24314 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24315 desired_align, align);
24318 /* Misaligned move sequences handles both prologues and epilogues at once.
24319 Default code generation results in smaller code for large alignments and
24320 also avoids redundant job when sizes are known precisely. */
24321 if (misaligned_prologue_used)
24323 /* Misaligned move prologue handled small blocks by itself. */
24324 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24325 (dst, src, &destreg, &srcreg,
24326 move_mode, promoted_val, vec_promoted_val,
24327 &count_exp,
24328 &jump_around_label,
24329 desired_align < align
24330 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24331 desired_align, align, &min_size, dynamic_check, issetmem);
24332 if (!issetmem)
24333 src = change_address (src, BLKmode, srcreg);
24334 dst = change_address (dst, BLKmode, destreg);
24335 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24336 epilogue_size_needed = 0;
24337 if (need_zero_guard && !min_size)
24339 /* It is possible that we copied enough so the main loop will not
24340 execute. */
24341 gcc_assert (size_needed > 1);
24342 if (jump_around_label == NULL_RTX)
24343 jump_around_label = gen_label_rtx ();
24344 emit_cmp_and_jump_insns (count_exp,
24345 GEN_INT (size_needed),
24346 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24347 if (expected_size == -1
24348 || expected_size < (desired_align - align) / 2 + size_needed)
24349 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24350 else
24351 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24354 /* Ensure that alignment prologue won't copy past end of block. */
24355 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24357 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24358 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24359 Make sure it is power of 2. */
24360 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24362 /* To improve performance of small blocks, we jump around the VAL
24363 promoting mode. This mean that if the promoted VAL is not constant,
24364 we might not use it in the epilogue and have to use byte
24365 loop variant. */
24366 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24367 force_loopy_epilogue = true;
24368 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24369 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24371 /* If main algorithm works on QImode, no epilogue is needed.
24372 For small sizes just don't align anything. */
24373 if (size_needed == 1)
24374 desired_align = align;
24375 else
24376 goto epilogue;
24378 else if (!count
24379 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24381 label = gen_label_rtx ();
24382 emit_cmp_and_jump_insns (count_exp,
24383 GEN_INT (epilogue_size_needed),
24384 LTU, 0, counter_mode (count_exp), 1, label);
24385 if (expected_size == -1 || expected_size < epilogue_size_needed)
24386 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24387 else
24388 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24392 /* Emit code to decide on runtime whether library call or inline should be
24393 used. */
24394 if (dynamic_check != -1)
24396 if (!issetmem && CONST_INT_P (count_exp))
24398 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24400 emit_block_move_via_libcall (dst, src, count_exp, false);
24401 count_exp = const0_rtx;
24402 goto epilogue;
24405 else
24407 rtx hot_label = gen_label_rtx ();
24408 if (jump_around_label == NULL_RTX)
24409 jump_around_label = gen_label_rtx ();
24410 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24411 LEU, 0, GET_MODE (count_exp), 1, hot_label);
24412 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24413 if (issetmem)
24414 set_storage_via_libcall (dst, count_exp, val_exp, false);
24415 else
24416 emit_block_move_via_libcall (dst, src, count_exp, false);
24417 emit_jump (jump_around_label);
24418 emit_label (hot_label);
24422 /* Step 2: Alignment prologue. */
24423 /* Do the expensive promotion once we branched off the small blocks. */
24424 if (issetmem && !promoted_val)
24425 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24426 desired_align, align);
24428 if (desired_align > align && !misaligned_prologue_used)
24430 if (align_bytes == 0)
24432 /* Except for the first move in prologue, we no longer know
24433 constant offset in aliasing info. It don't seems to worth
24434 the pain to maintain it for the first move, so throw away
24435 the info early. */
24436 dst = change_address (dst, BLKmode, destreg);
24437 if (!issetmem)
24438 src = change_address (src, BLKmode, srcreg);
24439 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24440 promoted_val, vec_promoted_val,
24441 count_exp, align, desired_align,
24442 issetmem);
24443 /* At most desired_align - align bytes are copied. */
24444 if (min_size < (unsigned)(desired_align - align))
24445 min_size = 0;
24446 else
24447 min_size -= desired_align - align;
24449 else
24451 /* If we know how many bytes need to be stored before dst is
24452 sufficiently aligned, maintain aliasing info accurately. */
24453 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24454 srcreg,
24455 promoted_val,
24456 vec_promoted_val,
24457 desired_align,
24458 align_bytes,
24459 issetmem);
24461 count_exp = plus_constant (counter_mode (count_exp),
24462 count_exp, -align_bytes);
24463 count -= align_bytes;
24464 min_size -= align_bytes;
24465 max_size -= align_bytes;
24467 if (need_zero_guard
24468 && !min_size
24469 && (count < (unsigned HOST_WIDE_INT) size_needed
24470 || (align_bytes == 0
24471 && count < ((unsigned HOST_WIDE_INT) size_needed
24472 + desired_align - align))))
24474 /* It is possible that we copied enough so the main loop will not
24475 execute. */
24476 gcc_assert (size_needed > 1);
24477 if (label == NULL_RTX)
24478 label = gen_label_rtx ();
24479 emit_cmp_and_jump_insns (count_exp,
24480 GEN_INT (size_needed),
24481 LTU, 0, counter_mode (count_exp), 1, label);
24482 if (expected_size == -1
24483 || expected_size < (desired_align - align) / 2 + size_needed)
24484 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24485 else
24486 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24489 if (label && size_needed == 1)
24491 emit_label (label);
24492 LABEL_NUSES (label) = 1;
24493 label = NULL;
24494 epilogue_size_needed = 1;
24495 if (issetmem)
24496 promoted_val = val_exp;
24498 else if (label == NULL_RTX && !misaligned_prologue_used)
24499 epilogue_size_needed = size_needed;
24501 /* Step 3: Main loop. */
24503 switch (alg)
24505 case libcall:
24506 case no_stringop:
24507 case last_alg:
24508 gcc_unreachable ();
24509 case loop_1_byte:
24510 case loop:
24511 case unrolled_loop:
24512 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24513 count_exp, move_mode, unroll_factor,
24514 expected_size, issetmem);
24515 break;
24516 case vector_loop:
24517 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24518 vec_promoted_val, count_exp, move_mode,
24519 unroll_factor, expected_size, issetmem);
24520 break;
24521 case rep_prefix_8_byte:
24522 case rep_prefix_4_byte:
24523 case rep_prefix_1_byte:
24524 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24525 val_exp, count_exp, move_mode, issetmem);
24526 break;
24528 /* Adjust properly the offset of src and dest memory for aliasing. */
24529 if (CONST_INT_P (count_exp))
24531 if (!issetmem)
24532 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24533 (count / size_needed) * size_needed);
24534 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24535 (count / size_needed) * size_needed);
24537 else
24539 if (!issetmem)
24540 src = change_address (src, BLKmode, srcreg);
24541 dst = change_address (dst, BLKmode, destreg);
24544 /* Step 4: Epilogue to copy the remaining bytes. */
24545 epilogue:
24546 if (label)
24548 /* When the main loop is done, COUNT_EXP might hold original count,
24549 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24550 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24551 bytes. Compensate if needed. */
24553 if (size_needed < epilogue_size_needed)
24555 tmp =
24556 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24557 GEN_INT (size_needed - 1), count_exp, 1,
24558 OPTAB_DIRECT);
24559 if (tmp != count_exp)
24560 emit_move_insn (count_exp, tmp);
24562 emit_label (label);
24563 LABEL_NUSES (label) = 1;
24566 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24568 if (force_loopy_epilogue)
24569 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24570 epilogue_size_needed);
24571 else
24573 if (issetmem)
24574 expand_setmem_epilogue (dst, destreg, promoted_val,
24575 vec_promoted_val, count_exp,
24576 epilogue_size_needed);
24577 else
24578 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24579 epilogue_size_needed);
24582 if (jump_around_label)
24583 emit_label (jump_around_label);
24584 return true;
24588 /* Expand the appropriate insns for doing strlen if not just doing
24589 repnz; scasb
24591 out = result, initialized with the start address
24592 align_rtx = alignment of the address.
24593 scratch = scratch register, initialized with the startaddress when
24594 not aligned, otherwise undefined
24596 This is just the body. It needs the initializations mentioned above and
24597 some address computing at the end. These things are done in i386.md. */
24599 static void
24600 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24602 int align;
24603 rtx tmp;
24604 rtx align_2_label = NULL_RTX;
24605 rtx align_3_label = NULL_RTX;
24606 rtx align_4_label = gen_label_rtx ();
24607 rtx end_0_label = gen_label_rtx ();
24608 rtx mem;
24609 rtx tmpreg = gen_reg_rtx (SImode);
24610 rtx scratch = gen_reg_rtx (SImode);
24611 rtx cmp;
24613 align = 0;
24614 if (CONST_INT_P (align_rtx))
24615 align = INTVAL (align_rtx);
24617 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24619 /* Is there a known alignment and is it less than 4? */
24620 if (align < 4)
24622 rtx scratch1 = gen_reg_rtx (Pmode);
24623 emit_move_insn (scratch1, out);
24624 /* Is there a known alignment and is it not 2? */
24625 if (align != 2)
24627 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24628 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24630 /* Leave just the 3 lower bits. */
24631 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24632 NULL_RTX, 0, OPTAB_WIDEN);
24634 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24635 Pmode, 1, align_4_label);
24636 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24637 Pmode, 1, align_2_label);
24638 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24639 Pmode, 1, align_3_label);
24641 else
24643 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24644 check if is aligned to 4 - byte. */
24646 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24647 NULL_RTX, 0, OPTAB_WIDEN);
24649 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24650 Pmode, 1, align_4_label);
24653 mem = change_address (src, QImode, out);
24655 /* Now compare the bytes. */
24657 /* Compare the first n unaligned byte on a byte per byte basis. */
24658 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24659 QImode, 1, end_0_label);
24661 /* Increment the address. */
24662 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24664 /* Not needed with an alignment of 2 */
24665 if (align != 2)
24667 emit_label (align_2_label);
24669 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24670 end_0_label);
24672 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24674 emit_label (align_3_label);
24677 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24678 end_0_label);
24680 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24683 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24684 align this loop. It gives only huge programs, but does not help to
24685 speed up. */
24686 emit_label (align_4_label);
24688 mem = change_address (src, SImode, out);
24689 emit_move_insn (scratch, mem);
24690 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24692 /* This formula yields a nonzero result iff one of the bytes is zero.
24693 This saves three branches inside loop and many cycles. */
24695 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24696 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24697 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24698 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24699 gen_int_mode (0x80808080, SImode)));
24700 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24701 align_4_label);
24703 if (TARGET_CMOVE)
24705 rtx reg = gen_reg_rtx (SImode);
24706 rtx reg2 = gen_reg_rtx (Pmode);
24707 emit_move_insn (reg, tmpreg);
24708 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24710 /* If zero is not in the first two bytes, move two bytes forward. */
24711 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24712 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24713 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24714 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24715 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24716 reg,
24717 tmpreg)));
24718 /* Emit lea manually to avoid clobbering of flags. */
24719 emit_insn (gen_rtx_SET (SImode, reg2,
24720 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24722 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24723 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24724 emit_insn (gen_rtx_SET (VOIDmode, out,
24725 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24726 reg2,
24727 out)));
24729 else
24731 rtx end_2_label = gen_label_rtx ();
24732 /* Is zero in the first two bytes? */
24734 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24735 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24736 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24737 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24738 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24739 pc_rtx);
24740 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24741 JUMP_LABEL (tmp) = end_2_label;
24743 /* Not in the first two. Move two bytes forward. */
24744 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24745 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24747 emit_label (end_2_label);
24751 /* Avoid branch in fixing the byte. */
24752 tmpreg = gen_lowpart (QImode, tmpreg);
24753 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24754 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24755 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24756 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24758 emit_label (end_0_label);
24761 /* Expand strlen. */
24763 bool
24764 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24766 rtx addr, scratch1, scratch2, scratch3, scratch4;
24768 /* The generic case of strlen expander is long. Avoid it's
24769 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24771 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24772 && !TARGET_INLINE_ALL_STRINGOPS
24773 && !optimize_insn_for_size_p ()
24774 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24775 return false;
24777 addr = force_reg (Pmode, XEXP (src, 0));
24778 scratch1 = gen_reg_rtx (Pmode);
24780 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24781 && !optimize_insn_for_size_p ())
24783 /* Well it seems that some optimizer does not combine a call like
24784 foo(strlen(bar), strlen(bar));
24785 when the move and the subtraction is done here. It does calculate
24786 the length just once when these instructions are done inside of
24787 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24788 often used and I use one fewer register for the lifetime of
24789 output_strlen_unroll() this is better. */
24791 emit_move_insn (out, addr);
24793 ix86_expand_strlensi_unroll_1 (out, src, align);
24795 /* strlensi_unroll_1 returns the address of the zero at the end of
24796 the string, like memchr(), so compute the length by subtracting
24797 the start address. */
24798 emit_insn (ix86_gen_sub3 (out, out, addr));
24800 else
24802 rtx unspec;
24804 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24805 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24806 return false;
24808 scratch2 = gen_reg_rtx (Pmode);
24809 scratch3 = gen_reg_rtx (Pmode);
24810 scratch4 = force_reg (Pmode, constm1_rtx);
24812 emit_move_insn (scratch3, addr);
24813 eoschar = force_reg (QImode, eoschar);
24815 src = replace_equiv_address_nv (src, scratch3);
24817 /* If .md starts supporting :P, this can be done in .md. */
24818 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24819 scratch4), UNSPEC_SCAS);
24820 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24821 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24822 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24824 return true;
24827 /* For given symbol (function) construct code to compute address of it's PLT
24828 entry in large x86-64 PIC model. */
24829 static rtx
24830 construct_plt_address (rtx symbol)
24832 rtx tmp, unspec;
24834 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24835 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24836 gcc_assert (Pmode == DImode);
24838 tmp = gen_reg_rtx (Pmode);
24839 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24841 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24842 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24843 return tmp;
24847 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24848 rtx callarg2,
24849 rtx pop, bool sibcall)
24851 unsigned int const cregs_size
24852 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24853 rtx vec[3 + cregs_size];
24854 rtx use = NULL, call;
24855 unsigned int vec_len = 0;
24857 if (pop == const0_rtx)
24858 pop = NULL;
24859 gcc_assert (!TARGET_64BIT || !pop);
24861 if (TARGET_MACHO && !TARGET_64BIT)
24863 #if TARGET_MACHO
24864 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24865 fnaddr = machopic_indirect_call_target (fnaddr);
24866 #endif
24868 else
24870 /* Static functions and indirect calls don't need the pic register. */
24871 if (flag_pic
24872 && (!TARGET_64BIT
24873 || (ix86_cmodel == CM_LARGE_PIC
24874 && DEFAULT_ABI != MS_ABI))
24875 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24876 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24877 use_reg (&use, pic_offset_table_rtx);
24880 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24882 rtx al = gen_rtx_REG (QImode, AX_REG);
24883 emit_move_insn (al, callarg2);
24884 use_reg (&use, al);
24887 if (ix86_cmodel == CM_LARGE_PIC
24888 && !TARGET_PECOFF
24889 && MEM_P (fnaddr)
24890 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24891 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24892 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24893 else if (sibcall
24894 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24895 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24897 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24898 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24901 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24902 if (retval)
24903 call = gen_rtx_SET (VOIDmode, retval, call);
24904 vec[vec_len++] = call;
24906 if (pop)
24908 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24909 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24910 vec[vec_len++] = pop;
24913 if (TARGET_64BIT_MS_ABI
24914 && (!callarg2 || INTVAL (callarg2) != -2))
24916 unsigned i;
24918 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24919 UNSPEC_MS_TO_SYSV_CALL);
24921 for (i = 0; i < cregs_size; i++)
24923 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24924 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24926 vec[vec_len++]
24927 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24931 if (vec_len > 1)
24932 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24933 call = emit_call_insn (call);
24934 if (use)
24935 CALL_INSN_FUNCTION_USAGE (call) = use;
24937 return call;
24940 /* Output the assembly for a call instruction. */
24942 const char *
24943 ix86_output_call_insn (rtx insn, rtx call_op)
24945 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24946 bool seh_nop_p = false;
24947 const char *xasm;
24949 if (SIBLING_CALL_P (insn))
24951 if (direct_p)
24952 xasm = "jmp\t%P0";
24953 /* SEH epilogue detection requires the indirect branch case
24954 to include REX.W. */
24955 else if (TARGET_SEH)
24956 xasm = "rex.W jmp %A0";
24957 else
24958 xasm = "jmp\t%A0";
24960 output_asm_insn (xasm, &call_op);
24961 return "";
24964 /* SEH unwinding can require an extra nop to be emitted in several
24965 circumstances. Determine if we have one of those. */
24966 if (TARGET_SEH)
24968 rtx i;
24970 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24972 /* If we get to another real insn, we don't need the nop. */
24973 if (INSN_P (i))
24974 break;
24976 /* If we get to the epilogue note, prevent a catch region from
24977 being adjacent to the standard epilogue sequence. If non-
24978 call-exceptions, we'll have done this during epilogue emission. */
24979 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24980 && !flag_non_call_exceptions
24981 && !can_throw_internal (insn))
24983 seh_nop_p = true;
24984 break;
24988 /* If we didn't find a real insn following the call, prevent the
24989 unwinder from looking into the next function. */
24990 if (i == NULL)
24991 seh_nop_p = true;
24994 if (direct_p)
24995 xasm = "call\t%P0";
24996 else
24997 xasm = "call\t%A0";
24999 output_asm_insn (xasm, &call_op);
25001 if (seh_nop_p)
25002 return "nop";
25004 return "";
25007 /* Clear stack slot assignments remembered from previous functions.
25008 This is called from INIT_EXPANDERS once before RTL is emitted for each
25009 function. */
25011 static struct machine_function *
25012 ix86_init_machine_status (void)
25014 struct machine_function *f;
25016 f = ggc_alloc_cleared_machine_function ();
25017 f->use_fast_prologue_epilogue_nregs = -1;
25018 f->call_abi = ix86_abi;
25020 return f;
25023 /* Return a MEM corresponding to a stack slot with mode MODE.
25024 Allocate a new slot if necessary.
25026 The RTL for a function can have several slots available: N is
25027 which slot to use. */
25030 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25032 struct stack_local_entry *s;
25034 gcc_assert (n < MAX_386_STACK_LOCALS);
25036 for (s = ix86_stack_locals; s; s = s->next)
25037 if (s->mode == mode && s->n == n)
25038 return validize_mem (copy_rtx (s->rtl));
25040 s = ggc_alloc_stack_local_entry ();
25041 s->n = n;
25042 s->mode = mode;
25043 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25045 s->next = ix86_stack_locals;
25046 ix86_stack_locals = s;
25047 return validize_mem (s->rtl);
25050 static void
25051 ix86_instantiate_decls (void)
25053 struct stack_local_entry *s;
25055 for (s = ix86_stack_locals; s; s = s->next)
25056 if (s->rtl != NULL_RTX)
25057 instantiate_decl_rtl (s->rtl);
25060 /* Check whether x86 address PARTS is a pc-relative address. */
25062 static bool
25063 rip_relative_addr_p (struct ix86_address *parts)
25065 rtx base, index, disp;
25067 base = parts->base;
25068 index = parts->index;
25069 disp = parts->disp;
25071 if (disp && !base && !index)
25073 if (TARGET_64BIT)
25075 rtx symbol = disp;
25077 if (GET_CODE (disp) == CONST)
25078 symbol = XEXP (disp, 0);
25079 if (GET_CODE (symbol) == PLUS
25080 && CONST_INT_P (XEXP (symbol, 1)))
25081 symbol = XEXP (symbol, 0);
25083 if (GET_CODE (symbol) == LABEL_REF
25084 || (GET_CODE (symbol) == SYMBOL_REF
25085 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25086 || (GET_CODE (symbol) == UNSPEC
25087 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25088 || XINT (symbol, 1) == UNSPEC_PCREL
25089 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25090 return true;
25093 return false;
25096 /* Calculate the length of the memory address in the instruction encoding.
25097 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25098 or other prefixes. We never generate addr32 prefix for LEA insn. */
25101 memory_address_length (rtx addr, bool lea)
25103 struct ix86_address parts;
25104 rtx base, index, disp;
25105 int len;
25106 int ok;
25108 if (GET_CODE (addr) == PRE_DEC
25109 || GET_CODE (addr) == POST_INC
25110 || GET_CODE (addr) == PRE_MODIFY
25111 || GET_CODE (addr) == POST_MODIFY)
25112 return 0;
25114 ok = ix86_decompose_address (addr, &parts);
25115 gcc_assert (ok);
25117 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25119 /* If this is not LEA instruction, add the length of addr32 prefix. */
25120 if (TARGET_64BIT && !lea
25121 && (SImode_address_operand (addr, VOIDmode)
25122 || (parts.base && GET_MODE (parts.base) == SImode)
25123 || (parts.index && GET_MODE (parts.index) == SImode)))
25124 len++;
25126 base = parts.base;
25127 index = parts.index;
25128 disp = parts.disp;
25130 if (base && GET_CODE (base) == SUBREG)
25131 base = SUBREG_REG (base);
25132 if (index && GET_CODE (index) == SUBREG)
25133 index = SUBREG_REG (index);
25135 gcc_assert (base == NULL_RTX || REG_P (base));
25136 gcc_assert (index == NULL_RTX || REG_P (index));
25138 /* Rule of thumb:
25139 - esp as the base always wants an index,
25140 - ebp as the base always wants a displacement,
25141 - r12 as the base always wants an index,
25142 - r13 as the base always wants a displacement. */
25144 /* Register Indirect. */
25145 if (base && !index && !disp)
25147 /* esp (for its index) and ebp (for its displacement) need
25148 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25149 code. */
25150 if (base == arg_pointer_rtx
25151 || base == frame_pointer_rtx
25152 || REGNO (base) == SP_REG
25153 || REGNO (base) == BP_REG
25154 || REGNO (base) == R12_REG
25155 || REGNO (base) == R13_REG)
25156 len++;
25159 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25160 is not disp32, but disp32(%rip), so for disp32
25161 SIB byte is needed, unless print_operand_address
25162 optimizes it into disp32(%rip) or (%rip) is implied
25163 by UNSPEC. */
25164 else if (disp && !base && !index)
25166 len += 4;
25167 if (rip_relative_addr_p (&parts))
25168 len++;
25170 else
25172 /* Find the length of the displacement constant. */
25173 if (disp)
25175 if (base && satisfies_constraint_K (disp))
25176 len += 1;
25177 else
25178 len += 4;
25180 /* ebp always wants a displacement. Similarly r13. */
25181 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25182 len++;
25184 /* An index requires the two-byte modrm form.... */
25185 if (index
25186 /* ...like esp (or r12), which always wants an index. */
25187 || base == arg_pointer_rtx
25188 || base == frame_pointer_rtx
25189 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25190 len++;
25193 return len;
25196 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25197 is set, expect that insn have 8bit immediate alternative. */
25199 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25201 int len = 0;
25202 int i;
25203 extract_insn_cached (insn);
25204 for (i = recog_data.n_operands - 1; i >= 0; --i)
25205 if (CONSTANT_P (recog_data.operand[i]))
25207 enum attr_mode mode = get_attr_mode (insn);
25209 gcc_assert (!len);
25210 if (shortform && CONST_INT_P (recog_data.operand[i]))
25212 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25213 switch (mode)
25215 case MODE_QI:
25216 len = 1;
25217 continue;
25218 case MODE_HI:
25219 ival = trunc_int_for_mode (ival, HImode);
25220 break;
25221 case MODE_SI:
25222 ival = trunc_int_for_mode (ival, SImode);
25223 break;
25224 default:
25225 break;
25227 if (IN_RANGE (ival, -128, 127))
25229 len = 1;
25230 continue;
25233 switch (mode)
25235 case MODE_QI:
25236 len = 1;
25237 break;
25238 case MODE_HI:
25239 len = 2;
25240 break;
25241 case MODE_SI:
25242 len = 4;
25243 break;
25244 /* Immediates for DImode instructions are encoded
25245 as 32bit sign extended values. */
25246 case MODE_DI:
25247 len = 4;
25248 break;
25249 default:
25250 fatal_insn ("unknown insn mode", insn);
25253 return len;
25256 /* Compute default value for "length_address" attribute. */
25258 ix86_attr_length_address_default (rtx insn)
25260 int i;
25262 if (get_attr_type (insn) == TYPE_LEA)
25264 rtx set = PATTERN (insn), addr;
25266 if (GET_CODE (set) == PARALLEL)
25267 set = XVECEXP (set, 0, 0);
25269 gcc_assert (GET_CODE (set) == SET);
25271 addr = SET_SRC (set);
25273 return memory_address_length (addr, true);
25276 extract_insn_cached (insn);
25277 for (i = recog_data.n_operands - 1; i >= 0; --i)
25278 if (MEM_P (recog_data.operand[i]))
25280 constrain_operands_cached (reload_completed);
25281 if (which_alternative != -1)
25283 const char *constraints = recog_data.constraints[i];
25284 int alt = which_alternative;
25286 while (*constraints == '=' || *constraints == '+')
25287 constraints++;
25288 while (alt-- > 0)
25289 while (*constraints++ != ',')
25291 /* Skip ignored operands. */
25292 if (*constraints == 'X')
25293 continue;
25295 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25297 return 0;
25300 /* Compute default value for "length_vex" attribute. It includes
25301 2 or 3 byte VEX prefix and 1 opcode byte. */
25304 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25306 int i;
25308 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25309 byte VEX prefix. */
25310 if (!has_0f_opcode || has_vex_w)
25311 return 3 + 1;
25313 /* We can always use 2 byte VEX prefix in 32bit. */
25314 if (!TARGET_64BIT)
25315 return 2 + 1;
25317 extract_insn_cached (insn);
25319 for (i = recog_data.n_operands - 1; i >= 0; --i)
25320 if (REG_P (recog_data.operand[i]))
25322 /* REX.W bit uses 3 byte VEX prefix. */
25323 if (GET_MODE (recog_data.operand[i]) == DImode
25324 && GENERAL_REG_P (recog_data.operand[i]))
25325 return 3 + 1;
25327 else
25329 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25330 if (MEM_P (recog_data.operand[i])
25331 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25332 return 3 + 1;
25335 return 2 + 1;
25338 /* Return the maximum number of instructions a cpu can issue. */
25340 static int
25341 ix86_issue_rate (void)
25343 switch (ix86_tune)
25345 case PROCESSOR_PENTIUM:
25346 case PROCESSOR_BONNELL:
25347 case PROCESSOR_SILVERMONT:
25348 case PROCESSOR_INTEL:
25349 case PROCESSOR_K6:
25350 case PROCESSOR_BTVER2:
25351 case PROCESSOR_PENTIUM4:
25352 case PROCESSOR_NOCONA:
25353 return 2;
25355 case PROCESSOR_PENTIUMPRO:
25356 case PROCESSOR_ATHLON:
25357 case PROCESSOR_K8:
25358 case PROCESSOR_AMDFAM10:
25359 case PROCESSOR_GENERIC:
25360 case PROCESSOR_BTVER1:
25361 return 3;
25363 case PROCESSOR_BDVER1:
25364 case PROCESSOR_BDVER2:
25365 case PROCESSOR_BDVER3:
25366 case PROCESSOR_BDVER4:
25367 case PROCESSOR_CORE2:
25368 case PROCESSOR_NEHALEM:
25369 case PROCESSOR_SANDYBRIDGE:
25370 case PROCESSOR_HASWELL:
25371 return 4;
25373 default:
25374 return 1;
25378 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25379 by DEP_INSN and nothing set by DEP_INSN. */
25381 static bool
25382 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25384 rtx set, set2;
25386 /* Simplify the test for uninteresting insns. */
25387 if (insn_type != TYPE_SETCC
25388 && insn_type != TYPE_ICMOV
25389 && insn_type != TYPE_FCMOV
25390 && insn_type != TYPE_IBR)
25391 return false;
25393 if ((set = single_set (dep_insn)) != 0)
25395 set = SET_DEST (set);
25396 set2 = NULL_RTX;
25398 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25399 && XVECLEN (PATTERN (dep_insn), 0) == 2
25400 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25401 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25403 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25404 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25406 else
25407 return false;
25409 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25410 return false;
25412 /* This test is true if the dependent insn reads the flags but
25413 not any other potentially set register. */
25414 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25415 return false;
25417 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25418 return false;
25420 return true;
25423 /* Return true iff USE_INSN has a memory address with operands set by
25424 SET_INSN. */
25426 bool
25427 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25429 int i;
25430 extract_insn_cached (use_insn);
25431 for (i = recog_data.n_operands - 1; i >= 0; --i)
25432 if (MEM_P (recog_data.operand[i]))
25434 rtx addr = XEXP (recog_data.operand[i], 0);
25435 return modified_in_p (addr, set_insn) != 0;
25437 return false;
25440 /* Helper function for exact_store_load_dependency.
25441 Return true if addr is found in insn. */
25442 static bool
25443 exact_dependency_1 (rtx addr, rtx insn)
25445 enum rtx_code code;
25446 const char *format_ptr;
25447 int i, j;
25449 code = GET_CODE (insn);
25450 switch (code)
25452 case MEM:
25453 if (rtx_equal_p (addr, insn))
25454 return true;
25455 break;
25456 case REG:
25457 CASE_CONST_ANY:
25458 case SYMBOL_REF:
25459 case CODE_LABEL:
25460 case PC:
25461 case CC0:
25462 case EXPR_LIST:
25463 return false;
25464 default:
25465 break;
25468 format_ptr = GET_RTX_FORMAT (code);
25469 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25471 switch (*format_ptr++)
25473 case 'e':
25474 if (exact_dependency_1 (addr, XEXP (insn, i)))
25475 return true;
25476 break;
25477 case 'E':
25478 for (j = 0; j < XVECLEN (insn, i); j++)
25479 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25480 return true;
25481 break;
25484 return false;
25487 /* Return true if there exists exact dependency for store & load, i.e.
25488 the same memory address is used in them. */
25489 static bool
25490 exact_store_load_dependency (rtx store, rtx load)
25492 rtx set1, set2;
25494 set1 = single_set (store);
25495 if (!set1)
25496 return false;
25497 if (!MEM_P (SET_DEST (set1)))
25498 return false;
25499 set2 = single_set (load);
25500 if (!set2)
25501 return false;
25502 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25503 return true;
25504 return false;
25507 static int
25508 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25510 enum attr_type insn_type, dep_insn_type;
25511 enum attr_memory memory;
25512 rtx set, set2;
25513 int dep_insn_code_number;
25515 /* Anti and output dependencies have zero cost on all CPUs. */
25516 if (REG_NOTE_KIND (link) != 0)
25517 return 0;
25519 dep_insn_code_number = recog_memoized (dep_insn);
25521 /* If we can't recognize the insns, we can't really do anything. */
25522 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25523 return cost;
25525 insn_type = get_attr_type (insn);
25526 dep_insn_type = get_attr_type (dep_insn);
25528 switch (ix86_tune)
25530 case PROCESSOR_PENTIUM:
25531 /* Address Generation Interlock adds a cycle of latency. */
25532 if (insn_type == TYPE_LEA)
25534 rtx addr = PATTERN (insn);
25536 if (GET_CODE (addr) == PARALLEL)
25537 addr = XVECEXP (addr, 0, 0);
25539 gcc_assert (GET_CODE (addr) == SET);
25541 addr = SET_SRC (addr);
25542 if (modified_in_p (addr, dep_insn))
25543 cost += 1;
25545 else if (ix86_agi_dependent (dep_insn, insn))
25546 cost += 1;
25548 /* ??? Compares pair with jump/setcc. */
25549 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25550 cost = 0;
25552 /* Floating point stores require value to be ready one cycle earlier. */
25553 if (insn_type == TYPE_FMOV
25554 && get_attr_memory (insn) == MEMORY_STORE
25555 && !ix86_agi_dependent (dep_insn, insn))
25556 cost += 1;
25557 break;
25559 case PROCESSOR_PENTIUMPRO:
25560 /* INT->FP conversion is expensive. */
25561 if (get_attr_fp_int_src (dep_insn))
25562 cost += 5;
25564 /* There is one cycle extra latency between an FP op and a store. */
25565 if (insn_type == TYPE_FMOV
25566 && (set = single_set (dep_insn)) != NULL_RTX
25567 && (set2 = single_set (insn)) != NULL_RTX
25568 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25569 && MEM_P (SET_DEST (set2)))
25570 cost += 1;
25572 memory = get_attr_memory (insn);
25574 /* Show ability of reorder buffer to hide latency of load by executing
25575 in parallel with previous instruction in case
25576 previous instruction is not needed to compute the address. */
25577 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25578 && !ix86_agi_dependent (dep_insn, insn))
25580 /* Claim moves to take one cycle, as core can issue one load
25581 at time and the next load can start cycle later. */
25582 if (dep_insn_type == TYPE_IMOV
25583 || dep_insn_type == TYPE_FMOV)
25584 cost = 1;
25585 else if (cost > 1)
25586 cost--;
25588 break;
25590 case PROCESSOR_K6:
25591 /* The esp dependency is resolved before
25592 the instruction is really finished. */
25593 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25594 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25595 return 1;
25597 /* INT->FP conversion is expensive. */
25598 if (get_attr_fp_int_src (dep_insn))
25599 cost += 5;
25601 memory = get_attr_memory (insn);
25603 /* Show ability of reorder buffer to hide latency of load by executing
25604 in parallel with previous instruction in case
25605 previous instruction is not needed to compute the address. */
25606 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25607 && !ix86_agi_dependent (dep_insn, insn))
25609 /* Claim moves to take one cycle, as core can issue one load
25610 at time and the next load can start cycle later. */
25611 if (dep_insn_type == TYPE_IMOV
25612 || dep_insn_type == TYPE_FMOV)
25613 cost = 1;
25614 else if (cost > 2)
25615 cost -= 2;
25616 else
25617 cost = 1;
25619 break;
25621 case PROCESSOR_AMDFAM10:
25622 case PROCESSOR_BDVER1:
25623 case PROCESSOR_BDVER2:
25624 case PROCESSOR_BDVER3:
25625 case PROCESSOR_BDVER4:
25626 case PROCESSOR_BTVER1:
25627 case PROCESSOR_BTVER2:
25628 case PROCESSOR_GENERIC:
25629 /* Stack engine allows to execute push&pop instructions in parall. */
25630 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25631 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25632 return 0;
25633 /* FALLTHRU */
25635 case PROCESSOR_ATHLON:
25636 case PROCESSOR_K8:
25637 memory = get_attr_memory (insn);
25639 /* Show ability of reorder buffer to hide latency of load by executing
25640 in parallel with previous instruction in case
25641 previous instruction is not needed to compute the address. */
25642 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25643 && !ix86_agi_dependent (dep_insn, insn))
25645 enum attr_unit unit = get_attr_unit (insn);
25646 int loadcost = 3;
25648 /* Because of the difference between the length of integer and
25649 floating unit pipeline preparation stages, the memory operands
25650 for floating point are cheaper.
25652 ??? For Athlon it the difference is most probably 2. */
25653 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25654 loadcost = 3;
25655 else
25656 loadcost = TARGET_ATHLON ? 2 : 0;
25658 if (cost >= loadcost)
25659 cost -= loadcost;
25660 else
25661 cost = 0;
25663 break;
25665 case PROCESSOR_CORE2:
25666 case PROCESSOR_NEHALEM:
25667 case PROCESSOR_SANDYBRIDGE:
25668 case PROCESSOR_HASWELL:
25669 /* Stack engine allows to execute push&pop instructions in parall. */
25670 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25671 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25672 return 0;
25674 memory = get_attr_memory (insn);
25676 /* Show ability of reorder buffer to hide latency of load by executing
25677 in parallel with previous instruction in case
25678 previous instruction is not needed to compute the address. */
25679 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25680 && !ix86_agi_dependent (dep_insn, insn))
25682 if (cost >= 4)
25683 cost -= 4;
25684 else
25685 cost = 0;
25687 break;
25689 case PROCESSOR_SILVERMONT:
25690 case PROCESSOR_INTEL:
25691 if (!reload_completed)
25692 return cost;
25694 /* Increase cost of integer loads. */
25695 memory = get_attr_memory (dep_insn);
25696 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25698 enum attr_unit unit = get_attr_unit (dep_insn);
25699 if (unit == UNIT_INTEGER && cost == 1)
25701 if (memory == MEMORY_LOAD)
25702 cost = 3;
25703 else
25705 /* Increase cost of ld/st for short int types only
25706 because of store forwarding issue. */
25707 rtx set = single_set (dep_insn);
25708 if (set && (GET_MODE (SET_DEST (set)) == QImode
25709 || GET_MODE (SET_DEST (set)) == HImode))
25711 /* Increase cost of store/load insn if exact
25712 dependence exists and it is load insn. */
25713 enum attr_memory insn_memory = get_attr_memory (insn);
25714 if (insn_memory == MEMORY_LOAD
25715 && exact_store_load_dependency (dep_insn, insn))
25716 cost = 3;
25722 default:
25723 break;
25726 return cost;
25729 /* How many alternative schedules to try. This should be as wide as the
25730 scheduling freedom in the DFA, but no wider. Making this value too
25731 large results extra work for the scheduler. */
25733 static int
25734 ia32_multipass_dfa_lookahead (void)
25736 switch (ix86_tune)
25738 case PROCESSOR_PENTIUM:
25739 return 2;
25741 case PROCESSOR_PENTIUMPRO:
25742 case PROCESSOR_K6:
25743 return 1;
25745 case PROCESSOR_BDVER1:
25746 case PROCESSOR_BDVER2:
25747 case PROCESSOR_BDVER3:
25748 case PROCESSOR_BDVER4:
25749 /* We use lookahead value 4 for BD both before and after reload
25750 schedules. Plan is to have value 8 included for O3. */
25751 return 4;
25753 case PROCESSOR_CORE2:
25754 case PROCESSOR_NEHALEM:
25755 case PROCESSOR_SANDYBRIDGE:
25756 case PROCESSOR_HASWELL:
25757 case PROCESSOR_BONNELL:
25758 case PROCESSOR_SILVERMONT:
25759 case PROCESSOR_INTEL:
25760 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25761 as many instructions can be executed on a cycle, i.e.,
25762 issue_rate. I wonder why tuning for many CPUs does not do this. */
25763 if (reload_completed)
25764 return ix86_issue_rate ();
25765 /* Don't use lookahead for pre-reload schedule to save compile time. */
25766 return 0;
25768 default:
25769 return 0;
25773 /* Return true if target platform supports macro-fusion. */
25775 static bool
25776 ix86_macro_fusion_p ()
25778 return TARGET_FUSE_CMP_AND_BRANCH;
25781 /* Check whether current microarchitecture support macro fusion
25782 for insn pair "CONDGEN + CONDJMP". Refer to
25783 "Intel Architectures Optimization Reference Manual". */
25785 static bool
25786 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25788 rtx src, dest;
25789 rtx single_set = single_set (condgen);
25790 enum rtx_code ccode;
25791 rtx compare_set = NULL_RTX, test_if, cond;
25792 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25794 if (get_attr_type (condgen) != TYPE_TEST
25795 && get_attr_type (condgen) != TYPE_ICMP
25796 && get_attr_type (condgen) != TYPE_INCDEC
25797 && get_attr_type (condgen) != TYPE_ALU)
25798 return false;
25800 if (single_set == NULL_RTX
25801 && !TARGET_FUSE_ALU_AND_BRANCH)
25802 return false;
25804 if (single_set != NULL_RTX)
25805 compare_set = single_set;
25806 else
25808 int i;
25809 rtx pat = PATTERN (condgen);
25810 for (i = 0; i < XVECLEN (pat, 0); i++)
25811 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25813 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25814 if (GET_CODE (set_src) == COMPARE)
25815 compare_set = XVECEXP (pat, 0, i);
25816 else
25817 alu_set = XVECEXP (pat, 0, i);
25820 if (compare_set == NULL_RTX)
25821 return false;
25822 src = SET_SRC (compare_set);
25823 if (GET_CODE (src) != COMPARE)
25824 return false;
25826 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25827 supported. */
25828 if ((MEM_P (XEXP (src, 0))
25829 && CONST_INT_P (XEXP (src, 1)))
25830 || (MEM_P (XEXP (src, 1))
25831 && CONST_INT_P (XEXP (src, 0))))
25832 return false;
25834 /* No fusion for RIP-relative address. */
25835 if (MEM_P (XEXP (src, 0)))
25836 addr = XEXP (XEXP (src, 0), 0);
25837 else if (MEM_P (XEXP (src, 1)))
25838 addr = XEXP (XEXP (src, 1), 0);
25840 if (addr) {
25841 ix86_address parts;
25842 int ok = ix86_decompose_address (addr, &parts);
25843 gcc_assert (ok);
25845 if (rip_relative_addr_p (&parts))
25846 return false;
25849 test_if = SET_SRC (pc_set (condjmp));
25850 cond = XEXP (test_if, 0);
25851 ccode = GET_CODE (cond);
25852 /* Check whether conditional jump use Sign or Overflow Flags. */
25853 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25854 && (ccode == GE
25855 || ccode == GT
25856 || ccode == LE
25857 || ccode == LT))
25858 return false;
25860 /* Return true for TYPE_TEST and TYPE_ICMP. */
25861 if (get_attr_type (condgen) == TYPE_TEST
25862 || get_attr_type (condgen) == TYPE_ICMP)
25863 return true;
25865 /* The following is the case that macro-fusion for alu + jmp. */
25866 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25867 return false;
25869 /* No fusion for alu op with memory destination operand. */
25870 dest = SET_DEST (alu_set);
25871 if (MEM_P (dest))
25872 return false;
25874 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25875 supported. */
25876 if (get_attr_type (condgen) == TYPE_INCDEC
25877 && (ccode == GEU
25878 || ccode == GTU
25879 || ccode == LEU
25880 || ccode == LTU))
25881 return false;
25883 return true;
25886 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25887 execution. It is applied if
25888 (1) IMUL instruction is on the top of list;
25889 (2) There exists the only producer of independent IMUL instruction in
25890 ready list.
25891 Return index of IMUL producer if it was found and -1 otherwise. */
25892 static int
25893 do_reorder_for_imul (rtx *ready, int n_ready)
25895 rtx insn, set, insn1, insn2;
25896 sd_iterator_def sd_it;
25897 dep_t dep;
25898 int index = -1;
25899 int i;
25901 if (!TARGET_BONNELL)
25902 return index;
25904 /* Check that IMUL instruction is on the top of ready list. */
25905 insn = ready[n_ready - 1];
25906 set = single_set (insn);
25907 if (!set)
25908 return index;
25909 if (!(GET_CODE (SET_SRC (set)) == MULT
25910 && GET_MODE (SET_SRC (set)) == SImode))
25911 return index;
25913 /* Search for producer of independent IMUL instruction. */
25914 for (i = n_ready - 2; i >= 0; i--)
25916 insn = ready[i];
25917 if (!NONDEBUG_INSN_P (insn))
25918 continue;
25919 /* Skip IMUL instruction. */
25920 insn2 = PATTERN (insn);
25921 if (GET_CODE (insn2) == PARALLEL)
25922 insn2 = XVECEXP (insn2, 0, 0);
25923 if (GET_CODE (insn2) == SET
25924 && GET_CODE (SET_SRC (insn2)) == MULT
25925 && GET_MODE (SET_SRC (insn2)) == SImode)
25926 continue;
25928 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25930 rtx con;
25931 con = DEP_CON (dep);
25932 if (!NONDEBUG_INSN_P (con))
25933 continue;
25934 insn1 = PATTERN (con);
25935 if (GET_CODE (insn1) == PARALLEL)
25936 insn1 = XVECEXP (insn1, 0, 0);
25938 if (GET_CODE (insn1) == SET
25939 && GET_CODE (SET_SRC (insn1)) == MULT
25940 && GET_MODE (SET_SRC (insn1)) == SImode)
25942 sd_iterator_def sd_it1;
25943 dep_t dep1;
25944 /* Check if there is no other dependee for IMUL. */
25945 index = i;
25946 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25948 rtx pro;
25949 pro = DEP_PRO (dep1);
25950 if (!NONDEBUG_INSN_P (pro))
25951 continue;
25952 if (pro != insn)
25953 index = -1;
25955 if (index >= 0)
25956 break;
25959 if (index >= 0)
25960 break;
25962 return index;
25965 /* Try to find the best candidate on the top of ready list if two insns
25966 have the same priority - candidate is best if its dependees were
25967 scheduled earlier. Applied for Silvermont only.
25968 Return true if top 2 insns must be interchanged. */
25969 static bool
25970 swap_top_of_ready_list (rtx *ready, int n_ready)
25972 rtx top = ready[n_ready - 1];
25973 rtx next = ready[n_ready - 2];
25974 rtx set;
25975 sd_iterator_def sd_it;
25976 dep_t dep;
25977 int clock1 = -1;
25978 int clock2 = -1;
25979 #define INSN_TICK(INSN) (HID (INSN)->tick)
25981 if (!TARGET_SILVERMONT && !TARGET_INTEL)
25982 return false;
25984 if (!NONDEBUG_INSN_P (top))
25985 return false;
25986 if (!NONJUMP_INSN_P (top))
25987 return false;
25988 if (!NONDEBUG_INSN_P (next))
25989 return false;
25990 if (!NONJUMP_INSN_P (next))
25991 return false;
25992 set = single_set (top);
25993 if (!set)
25994 return false;
25995 set = single_set (next);
25996 if (!set)
25997 return false;
25999 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26001 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26002 return false;
26003 /* Determine winner more precise. */
26004 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26006 rtx pro;
26007 pro = DEP_PRO (dep);
26008 if (!NONDEBUG_INSN_P (pro))
26009 continue;
26010 if (INSN_TICK (pro) > clock1)
26011 clock1 = INSN_TICK (pro);
26013 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26015 rtx pro;
26016 pro = DEP_PRO (dep);
26017 if (!NONDEBUG_INSN_P (pro))
26018 continue;
26019 if (INSN_TICK (pro) > clock2)
26020 clock2 = INSN_TICK (pro);
26023 if (clock1 == clock2)
26025 /* Determine winner - load must win. */
26026 enum attr_memory memory1, memory2;
26027 memory1 = get_attr_memory (top);
26028 memory2 = get_attr_memory (next);
26029 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26030 return true;
26032 return (bool) (clock2 < clock1);
26034 return false;
26035 #undef INSN_TICK
26038 /* Perform possible reodering of ready list for Atom/Silvermont only.
26039 Return issue rate. */
26040 static int
26041 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26042 int clock_var)
26044 int issue_rate = -1;
26045 int n_ready = *pn_ready;
26046 int i;
26047 rtx insn;
26048 int index = -1;
26050 /* Set up issue rate. */
26051 issue_rate = ix86_issue_rate ();
26053 /* Do reodering for BONNELL/SILVERMONT only. */
26054 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26055 return issue_rate;
26057 /* Nothing to do if ready list contains only 1 instruction. */
26058 if (n_ready <= 1)
26059 return issue_rate;
26061 /* Do reodering for post-reload scheduler only. */
26062 if (!reload_completed)
26063 return issue_rate;
26065 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26067 if (sched_verbose > 1)
26068 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26069 INSN_UID (ready[index]));
26071 /* Put IMUL producer (ready[index]) at the top of ready list. */
26072 insn = ready[index];
26073 for (i = index; i < n_ready - 1; i++)
26074 ready[i] = ready[i + 1];
26075 ready[n_ready - 1] = insn;
26076 return issue_rate;
26078 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26080 if (sched_verbose > 1)
26081 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26082 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26083 /* Swap 2 top elements of ready list. */
26084 insn = ready[n_ready - 1];
26085 ready[n_ready - 1] = ready[n_ready - 2];
26086 ready[n_ready - 2] = insn;
26088 return issue_rate;
26091 static bool
26092 ix86_class_likely_spilled_p (reg_class_t);
26094 /* Returns true if lhs of insn is HW function argument register and set up
26095 is_spilled to true if it is likely spilled HW register. */
26096 static bool
26097 insn_is_function_arg (rtx insn, bool* is_spilled)
26099 rtx dst;
26101 if (!NONDEBUG_INSN_P (insn))
26102 return false;
26103 /* Call instructions are not movable, ignore it. */
26104 if (CALL_P (insn))
26105 return false;
26106 insn = PATTERN (insn);
26107 if (GET_CODE (insn) == PARALLEL)
26108 insn = XVECEXP (insn, 0, 0);
26109 if (GET_CODE (insn) != SET)
26110 return false;
26111 dst = SET_DEST (insn);
26112 if (REG_P (dst) && HARD_REGISTER_P (dst)
26113 && ix86_function_arg_regno_p (REGNO (dst)))
26115 /* Is it likely spilled HW register? */
26116 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26117 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26118 *is_spilled = true;
26119 return true;
26121 return false;
26124 /* Add output dependencies for chain of function adjacent arguments if only
26125 there is a move to likely spilled HW register. Return first argument
26126 if at least one dependence was added or NULL otherwise. */
26127 static rtx
26128 add_parameter_dependencies (rtx call, rtx head)
26130 rtx insn;
26131 rtx last = call;
26132 rtx first_arg = NULL;
26133 bool is_spilled = false;
26135 head = PREV_INSN (head);
26137 /* Find nearest to call argument passing instruction. */
26138 while (true)
26140 last = PREV_INSN (last);
26141 if (last == head)
26142 return NULL;
26143 if (!NONDEBUG_INSN_P (last))
26144 continue;
26145 if (insn_is_function_arg (last, &is_spilled))
26146 break;
26147 return NULL;
26150 first_arg = last;
26151 while (true)
26153 insn = PREV_INSN (last);
26154 if (!INSN_P (insn))
26155 break;
26156 if (insn == head)
26157 break;
26158 if (!NONDEBUG_INSN_P (insn))
26160 last = insn;
26161 continue;
26163 if (insn_is_function_arg (insn, &is_spilled))
26165 /* Add output depdendence between two function arguments if chain
26166 of output arguments contains likely spilled HW registers. */
26167 if (is_spilled)
26168 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26169 first_arg = last = insn;
26171 else
26172 break;
26174 if (!is_spilled)
26175 return NULL;
26176 return first_arg;
26179 /* Add output or anti dependency from insn to first_arg to restrict its code
26180 motion. */
26181 static void
26182 avoid_func_arg_motion (rtx first_arg, rtx insn)
26184 rtx set;
26185 rtx tmp;
26187 set = single_set (insn);
26188 if (!set)
26189 return;
26190 tmp = SET_DEST (set);
26191 if (REG_P (tmp))
26193 /* Add output dependency to the first function argument. */
26194 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26195 return;
26197 /* Add anti dependency. */
26198 add_dependence (first_arg, insn, REG_DEP_ANTI);
26201 /* Avoid cross block motion of function argument through adding dependency
26202 from the first non-jump instruction in bb. */
26203 static void
26204 add_dependee_for_func_arg (rtx arg, basic_block bb)
26206 rtx insn = BB_END (bb);
26208 while (insn)
26210 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26212 rtx set = single_set (insn);
26213 if (set)
26215 avoid_func_arg_motion (arg, insn);
26216 return;
26219 if (insn == BB_HEAD (bb))
26220 return;
26221 insn = PREV_INSN (insn);
26225 /* Hook for pre-reload schedule - avoid motion of function arguments
26226 passed in likely spilled HW registers. */
26227 static void
26228 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26230 rtx insn;
26231 rtx first_arg = NULL;
26232 if (reload_completed)
26233 return;
26234 while (head != tail && DEBUG_INSN_P (head))
26235 head = NEXT_INSN (head);
26236 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26237 if (INSN_P (insn) && CALL_P (insn))
26239 first_arg = add_parameter_dependencies (insn, head);
26240 if (first_arg)
26242 /* Add dependee for first argument to predecessors if only
26243 region contains more than one block. */
26244 basic_block bb = BLOCK_FOR_INSN (insn);
26245 int rgn = CONTAINING_RGN (bb->index);
26246 int nr_blks = RGN_NR_BLOCKS (rgn);
26247 /* Skip trivial regions and region head blocks that can have
26248 predecessors outside of region. */
26249 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26251 edge e;
26252 edge_iterator ei;
26253 /* Assume that region is SCC, i.e. all immediate predecessors
26254 of non-head block are in the same region. */
26255 FOR_EACH_EDGE (e, ei, bb->preds)
26257 /* Avoid creating of loop-carried dependencies through
26258 using topological odering in region. */
26259 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26260 add_dependee_for_func_arg (first_arg, e->src);
26263 insn = first_arg;
26264 if (insn == head)
26265 break;
26268 else if (first_arg)
26269 avoid_func_arg_motion (first_arg, insn);
26272 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26273 HW registers to maximum, to schedule them at soon as possible. These are
26274 moves from function argument registers at the top of the function entry
26275 and moves from function return value registers after call. */
26276 static int
26277 ix86_adjust_priority (rtx insn, int priority)
26279 rtx set;
26281 if (reload_completed)
26282 return priority;
26284 if (!NONDEBUG_INSN_P (insn))
26285 return priority;
26287 set = single_set (insn);
26288 if (set)
26290 rtx tmp = SET_SRC (set);
26291 if (REG_P (tmp)
26292 && HARD_REGISTER_P (tmp)
26293 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26294 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26295 return current_sched_info->sched_max_insns_priority;
26298 return priority;
26301 /* Model decoder of Core 2/i7.
26302 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26303 track the instruction fetch block boundaries and make sure that long
26304 (9+ bytes) instructions are assigned to D0. */
26306 /* Maximum length of an insn that can be handled by
26307 a secondary decoder unit. '8' for Core 2/i7. */
26308 static int core2i7_secondary_decoder_max_insn_size;
26310 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26311 '16' for Core 2/i7. */
26312 static int core2i7_ifetch_block_size;
26314 /* Maximum number of instructions decoder can handle per cycle.
26315 '6' for Core 2/i7. */
26316 static int core2i7_ifetch_block_max_insns;
26318 typedef struct ix86_first_cycle_multipass_data_ *
26319 ix86_first_cycle_multipass_data_t;
26320 typedef const struct ix86_first_cycle_multipass_data_ *
26321 const_ix86_first_cycle_multipass_data_t;
26323 /* A variable to store target state across calls to max_issue within
26324 one cycle. */
26325 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26326 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26328 /* Initialize DATA. */
26329 static void
26330 core2i7_first_cycle_multipass_init (void *_data)
26332 ix86_first_cycle_multipass_data_t data
26333 = (ix86_first_cycle_multipass_data_t) _data;
26335 data->ifetch_block_len = 0;
26336 data->ifetch_block_n_insns = 0;
26337 data->ready_try_change = NULL;
26338 data->ready_try_change_size = 0;
26341 /* Advancing the cycle; reset ifetch block counts. */
26342 static void
26343 core2i7_dfa_post_advance_cycle (void)
26345 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26347 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26349 data->ifetch_block_len = 0;
26350 data->ifetch_block_n_insns = 0;
26353 static int min_insn_size (rtx);
26355 /* Filter out insns from ready_try that the core will not be able to issue
26356 on current cycle due to decoder. */
26357 static void
26358 core2i7_first_cycle_multipass_filter_ready_try
26359 (const_ix86_first_cycle_multipass_data_t data,
26360 char *ready_try, int n_ready, bool first_cycle_insn_p)
26362 while (n_ready--)
26364 rtx insn;
26365 int insn_size;
26367 if (ready_try[n_ready])
26368 continue;
26370 insn = get_ready_element (n_ready);
26371 insn_size = min_insn_size (insn);
26373 if (/* If this is a too long an insn for a secondary decoder ... */
26374 (!first_cycle_insn_p
26375 && insn_size > core2i7_secondary_decoder_max_insn_size)
26376 /* ... or it would not fit into the ifetch block ... */
26377 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26378 /* ... or the decoder is full already ... */
26379 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26380 /* ... mask the insn out. */
26382 ready_try[n_ready] = 1;
26384 if (data->ready_try_change)
26385 bitmap_set_bit (data->ready_try_change, n_ready);
26390 /* Prepare for a new round of multipass lookahead scheduling. */
26391 static void
26392 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26393 bool first_cycle_insn_p)
26395 ix86_first_cycle_multipass_data_t data
26396 = (ix86_first_cycle_multipass_data_t) _data;
26397 const_ix86_first_cycle_multipass_data_t prev_data
26398 = ix86_first_cycle_multipass_data;
26400 /* Restore the state from the end of the previous round. */
26401 data->ifetch_block_len = prev_data->ifetch_block_len;
26402 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26404 /* Filter instructions that cannot be issued on current cycle due to
26405 decoder restrictions. */
26406 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26407 first_cycle_insn_p);
26410 /* INSN is being issued in current solution. Account for its impact on
26411 the decoder model. */
26412 static void
26413 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26414 rtx insn, const void *_prev_data)
26416 ix86_first_cycle_multipass_data_t data
26417 = (ix86_first_cycle_multipass_data_t) _data;
26418 const_ix86_first_cycle_multipass_data_t prev_data
26419 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26421 int insn_size = min_insn_size (insn);
26423 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26424 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26425 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26426 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26428 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26429 if (!data->ready_try_change)
26431 data->ready_try_change = sbitmap_alloc (n_ready);
26432 data->ready_try_change_size = n_ready;
26434 else if (data->ready_try_change_size < n_ready)
26436 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26437 n_ready, 0);
26438 data->ready_try_change_size = n_ready;
26440 bitmap_clear (data->ready_try_change);
26442 /* Filter out insns from ready_try that the core will not be able to issue
26443 on current cycle due to decoder. */
26444 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26445 false);
26448 /* Revert the effect on ready_try. */
26449 static void
26450 core2i7_first_cycle_multipass_backtrack (const void *_data,
26451 char *ready_try,
26452 int n_ready ATTRIBUTE_UNUSED)
26454 const_ix86_first_cycle_multipass_data_t data
26455 = (const_ix86_first_cycle_multipass_data_t) _data;
26456 unsigned int i = 0;
26457 sbitmap_iterator sbi;
26459 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26460 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26462 ready_try[i] = 0;
26466 /* Save the result of multipass lookahead scheduling for the next round. */
26467 static void
26468 core2i7_first_cycle_multipass_end (const void *_data)
26470 const_ix86_first_cycle_multipass_data_t data
26471 = (const_ix86_first_cycle_multipass_data_t) _data;
26472 ix86_first_cycle_multipass_data_t next_data
26473 = ix86_first_cycle_multipass_data;
26475 if (data != NULL)
26477 next_data->ifetch_block_len = data->ifetch_block_len;
26478 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26482 /* Deallocate target data. */
26483 static void
26484 core2i7_first_cycle_multipass_fini (void *_data)
26486 ix86_first_cycle_multipass_data_t data
26487 = (ix86_first_cycle_multipass_data_t) _data;
26489 if (data->ready_try_change)
26491 sbitmap_free (data->ready_try_change);
26492 data->ready_try_change = NULL;
26493 data->ready_try_change_size = 0;
26497 /* Prepare for scheduling pass. */
26498 static void
26499 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26500 int verbose ATTRIBUTE_UNUSED,
26501 int max_uid ATTRIBUTE_UNUSED)
26503 /* Install scheduling hooks for current CPU. Some of these hooks are used
26504 in time-critical parts of the scheduler, so we only set them up when
26505 they are actually used. */
26506 switch (ix86_tune)
26508 case PROCESSOR_CORE2:
26509 case PROCESSOR_NEHALEM:
26510 case PROCESSOR_SANDYBRIDGE:
26511 case PROCESSOR_HASWELL:
26512 /* Do not perform multipass scheduling for pre-reload schedule
26513 to save compile time. */
26514 if (reload_completed)
26516 targetm.sched.dfa_post_advance_cycle
26517 = core2i7_dfa_post_advance_cycle;
26518 targetm.sched.first_cycle_multipass_init
26519 = core2i7_first_cycle_multipass_init;
26520 targetm.sched.first_cycle_multipass_begin
26521 = core2i7_first_cycle_multipass_begin;
26522 targetm.sched.first_cycle_multipass_issue
26523 = core2i7_first_cycle_multipass_issue;
26524 targetm.sched.first_cycle_multipass_backtrack
26525 = core2i7_first_cycle_multipass_backtrack;
26526 targetm.sched.first_cycle_multipass_end
26527 = core2i7_first_cycle_multipass_end;
26528 targetm.sched.first_cycle_multipass_fini
26529 = core2i7_first_cycle_multipass_fini;
26531 /* Set decoder parameters. */
26532 core2i7_secondary_decoder_max_insn_size = 8;
26533 core2i7_ifetch_block_size = 16;
26534 core2i7_ifetch_block_max_insns = 6;
26535 break;
26537 /* ... Fall through ... */
26538 default:
26539 targetm.sched.dfa_post_advance_cycle = NULL;
26540 targetm.sched.first_cycle_multipass_init = NULL;
26541 targetm.sched.first_cycle_multipass_begin = NULL;
26542 targetm.sched.first_cycle_multipass_issue = NULL;
26543 targetm.sched.first_cycle_multipass_backtrack = NULL;
26544 targetm.sched.first_cycle_multipass_end = NULL;
26545 targetm.sched.first_cycle_multipass_fini = NULL;
26546 break;
26551 /* Compute the alignment given to a constant that is being placed in memory.
26552 EXP is the constant and ALIGN is the alignment that the object would
26553 ordinarily have.
26554 The value of this function is used instead of that alignment to align
26555 the object. */
26558 ix86_constant_alignment (tree exp, int align)
26560 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26561 || TREE_CODE (exp) == INTEGER_CST)
26563 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26564 return 64;
26565 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26566 return 128;
26568 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26569 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26570 return BITS_PER_WORD;
26572 return align;
26575 /* Compute the alignment for a static variable.
26576 TYPE is the data type, and ALIGN is the alignment that
26577 the object would ordinarily have. The value of this function is used
26578 instead of that alignment to align the object. */
26581 ix86_data_alignment (tree type, int align, bool opt)
26583 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26584 for symbols from other compilation units or symbols that don't need
26585 to bind locally. In order to preserve some ABI compatibility with
26586 those compilers, ensure we don't decrease alignment from what we
26587 used to assume. */
26589 int max_align_compat
26590 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26592 /* A data structure, equal or greater than the size of a cache line
26593 (64 bytes in the Pentium 4 and other recent Intel processors, including
26594 processors based on Intel Core microarchitecture) should be aligned
26595 so that its base address is a multiple of a cache line size. */
26597 int max_align
26598 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26600 if (max_align < BITS_PER_WORD)
26601 max_align = BITS_PER_WORD;
26603 if (opt
26604 && AGGREGATE_TYPE_P (type)
26605 && TYPE_SIZE (type)
26606 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26608 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
26609 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26610 && align < max_align_compat)
26611 align = max_align_compat;
26612 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
26613 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26614 && align < max_align)
26615 align = max_align;
26618 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26619 to 16byte boundary. */
26620 if (TARGET_64BIT)
26622 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26623 && TYPE_SIZE (type)
26624 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26625 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
26626 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26627 return 128;
26630 if (!opt)
26631 return align;
26633 if (TREE_CODE (type) == ARRAY_TYPE)
26635 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26636 return 64;
26637 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26638 return 128;
26640 else if (TREE_CODE (type) == COMPLEX_TYPE)
26643 if (TYPE_MODE (type) == DCmode && align < 64)
26644 return 64;
26645 if ((TYPE_MODE (type) == XCmode
26646 || TYPE_MODE (type) == TCmode) && align < 128)
26647 return 128;
26649 else if ((TREE_CODE (type) == RECORD_TYPE
26650 || TREE_CODE (type) == UNION_TYPE
26651 || TREE_CODE (type) == QUAL_UNION_TYPE)
26652 && TYPE_FIELDS (type))
26654 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26655 return 64;
26656 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26657 return 128;
26659 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26660 || TREE_CODE (type) == INTEGER_TYPE)
26662 if (TYPE_MODE (type) == DFmode && align < 64)
26663 return 64;
26664 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26665 return 128;
26668 return align;
26671 /* Compute the alignment for a local variable or a stack slot. EXP is
26672 the data type or decl itself, MODE is the widest mode available and
26673 ALIGN is the alignment that the object would ordinarily have. The
26674 value of this macro is used instead of that alignment to align the
26675 object. */
26677 unsigned int
26678 ix86_local_alignment (tree exp, enum machine_mode mode,
26679 unsigned int align)
26681 tree type, decl;
26683 if (exp && DECL_P (exp))
26685 type = TREE_TYPE (exp);
26686 decl = exp;
26688 else
26690 type = exp;
26691 decl = NULL;
26694 /* Don't do dynamic stack realignment for long long objects with
26695 -mpreferred-stack-boundary=2. */
26696 if (!TARGET_64BIT
26697 && align == 64
26698 && ix86_preferred_stack_boundary < 64
26699 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26700 && (!type || !TYPE_USER_ALIGN (type))
26701 && (!decl || !DECL_USER_ALIGN (decl)))
26702 align = 32;
26704 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26705 register in MODE. We will return the largest alignment of XF
26706 and DF. */
26707 if (!type)
26709 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26710 align = GET_MODE_ALIGNMENT (DFmode);
26711 return align;
26714 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26715 to 16byte boundary. Exact wording is:
26717 An array uses the same alignment as its elements, except that a local or
26718 global array variable of length at least 16 bytes or
26719 a C99 variable-length array variable always has alignment of at least 16 bytes.
26721 This was added to allow use of aligned SSE instructions at arrays. This
26722 rule is meant for static storage (where compiler can not do the analysis
26723 by itself). We follow it for automatic variables only when convenient.
26724 We fully control everything in the function compiled and functions from
26725 other unit can not rely on the alignment.
26727 Exclude va_list type. It is the common case of local array where
26728 we can not benefit from the alignment.
26730 TODO: Probably one should optimize for size only when var is not escaping. */
26731 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26732 && TARGET_SSE)
26734 if (AGGREGATE_TYPE_P (type)
26735 && (va_list_type_node == NULL_TREE
26736 || (TYPE_MAIN_VARIANT (type)
26737 != TYPE_MAIN_VARIANT (va_list_type_node)))
26738 && TYPE_SIZE (type)
26739 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26740 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
26741 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26742 return 128;
26744 if (TREE_CODE (type) == ARRAY_TYPE)
26746 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26747 return 64;
26748 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26749 return 128;
26751 else if (TREE_CODE (type) == COMPLEX_TYPE)
26753 if (TYPE_MODE (type) == DCmode && align < 64)
26754 return 64;
26755 if ((TYPE_MODE (type) == XCmode
26756 || TYPE_MODE (type) == TCmode) && align < 128)
26757 return 128;
26759 else if ((TREE_CODE (type) == RECORD_TYPE
26760 || TREE_CODE (type) == UNION_TYPE
26761 || TREE_CODE (type) == QUAL_UNION_TYPE)
26762 && TYPE_FIELDS (type))
26764 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26765 return 64;
26766 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26767 return 128;
26769 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26770 || TREE_CODE (type) == INTEGER_TYPE)
26773 if (TYPE_MODE (type) == DFmode && align < 64)
26774 return 64;
26775 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26776 return 128;
26778 return align;
26781 /* Compute the minimum required alignment for dynamic stack realignment
26782 purposes for a local variable, parameter or a stack slot. EXP is
26783 the data type or decl itself, MODE is its mode and ALIGN is the
26784 alignment that the object would ordinarily have. */
26786 unsigned int
26787 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26788 unsigned int align)
26790 tree type, decl;
26792 if (exp && DECL_P (exp))
26794 type = TREE_TYPE (exp);
26795 decl = exp;
26797 else
26799 type = exp;
26800 decl = NULL;
26803 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26804 return align;
26806 /* Don't do dynamic stack realignment for long long objects with
26807 -mpreferred-stack-boundary=2. */
26808 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26809 && (!type || !TYPE_USER_ALIGN (type))
26810 && (!decl || !DECL_USER_ALIGN (decl)))
26811 return 32;
26813 return align;
26816 /* Find a location for the static chain incoming to a nested function.
26817 This is a register, unless all free registers are used by arguments. */
26819 static rtx
26820 ix86_static_chain (const_tree fndecl, bool incoming_p)
26822 unsigned regno;
26824 if (!DECL_STATIC_CHAIN (fndecl))
26825 return NULL;
26827 if (TARGET_64BIT)
26829 /* We always use R10 in 64-bit mode. */
26830 regno = R10_REG;
26832 else
26834 tree fntype;
26835 unsigned int ccvt;
26837 /* By default in 32-bit mode we use ECX to pass the static chain. */
26838 regno = CX_REG;
26840 fntype = TREE_TYPE (fndecl);
26841 ccvt = ix86_get_callcvt (fntype);
26842 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26844 /* Fastcall functions use ecx/edx for arguments, which leaves
26845 us with EAX for the static chain.
26846 Thiscall functions use ecx for arguments, which also
26847 leaves us with EAX for the static chain. */
26848 regno = AX_REG;
26850 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26852 /* Thiscall functions use ecx for arguments, which leaves
26853 us with EAX and EDX for the static chain.
26854 We are using for abi-compatibility EAX. */
26855 regno = AX_REG;
26857 else if (ix86_function_regparm (fntype, fndecl) == 3)
26859 /* For regparm 3, we have no free call-clobbered registers in
26860 which to store the static chain. In order to implement this,
26861 we have the trampoline push the static chain to the stack.
26862 However, we can't push a value below the return address when
26863 we call the nested function directly, so we have to use an
26864 alternate entry point. For this we use ESI, and have the
26865 alternate entry point push ESI, so that things appear the
26866 same once we're executing the nested function. */
26867 if (incoming_p)
26869 if (fndecl == current_function_decl)
26870 ix86_static_chain_on_stack = true;
26871 return gen_frame_mem (SImode,
26872 plus_constant (Pmode,
26873 arg_pointer_rtx, -8));
26875 regno = SI_REG;
26879 return gen_rtx_REG (Pmode, regno);
26882 /* Emit RTL insns to initialize the variable parts of a trampoline.
26883 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26884 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26885 to be passed to the target function. */
26887 static void
26888 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26890 rtx mem, fnaddr;
26891 int opcode;
26892 int offset = 0;
26894 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26896 if (TARGET_64BIT)
26898 int size;
26900 /* Load the function address to r11. Try to load address using
26901 the shorter movl instead of movabs. We may want to support
26902 movq for kernel mode, but kernel does not use trampolines at
26903 the moment. FNADDR is a 32bit address and may not be in
26904 DImode when ptr_mode == SImode. Always use movl in this
26905 case. */
26906 if (ptr_mode == SImode
26907 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26909 fnaddr = copy_addr_to_reg (fnaddr);
26911 mem = adjust_address (m_tramp, HImode, offset);
26912 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26914 mem = adjust_address (m_tramp, SImode, offset + 2);
26915 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26916 offset += 6;
26918 else
26920 mem = adjust_address (m_tramp, HImode, offset);
26921 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26923 mem = adjust_address (m_tramp, DImode, offset + 2);
26924 emit_move_insn (mem, fnaddr);
26925 offset += 10;
26928 /* Load static chain using movabs to r10. Use the shorter movl
26929 instead of movabs when ptr_mode == SImode. */
26930 if (ptr_mode == SImode)
26932 opcode = 0xba41;
26933 size = 6;
26935 else
26937 opcode = 0xba49;
26938 size = 10;
26941 mem = adjust_address (m_tramp, HImode, offset);
26942 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26944 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26945 emit_move_insn (mem, chain_value);
26946 offset += size;
26948 /* Jump to r11; the last (unused) byte is a nop, only there to
26949 pad the write out to a single 32-bit store. */
26950 mem = adjust_address (m_tramp, SImode, offset);
26951 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26952 offset += 4;
26954 else
26956 rtx disp, chain;
26958 /* Depending on the static chain location, either load a register
26959 with a constant, or push the constant to the stack. All of the
26960 instructions are the same size. */
26961 chain = ix86_static_chain (fndecl, true);
26962 if (REG_P (chain))
26964 switch (REGNO (chain))
26966 case AX_REG:
26967 opcode = 0xb8; break;
26968 case CX_REG:
26969 opcode = 0xb9; break;
26970 default:
26971 gcc_unreachable ();
26974 else
26975 opcode = 0x68;
26977 mem = adjust_address (m_tramp, QImode, offset);
26978 emit_move_insn (mem, gen_int_mode (opcode, QImode));
26980 mem = adjust_address (m_tramp, SImode, offset + 1);
26981 emit_move_insn (mem, chain_value);
26982 offset += 5;
26984 mem = adjust_address (m_tramp, QImode, offset);
26985 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
26987 mem = adjust_address (m_tramp, SImode, offset + 1);
26989 /* Compute offset from the end of the jmp to the target function.
26990 In the case in which the trampoline stores the static chain on
26991 the stack, we need to skip the first insn which pushes the
26992 (call-saved) register static chain; this push is 1 byte. */
26993 offset += 5;
26994 disp = expand_binop (SImode, sub_optab, fnaddr,
26995 plus_constant (Pmode, XEXP (m_tramp, 0),
26996 offset - (MEM_P (chain) ? 1 : 0)),
26997 NULL_RTX, 1, OPTAB_DIRECT);
26998 emit_move_insn (mem, disp);
27001 gcc_assert (offset <= TRAMPOLINE_SIZE);
27003 #ifdef HAVE_ENABLE_EXECUTE_STACK
27004 #ifdef CHECK_EXECUTE_STACK_ENABLED
27005 if (CHECK_EXECUTE_STACK_ENABLED)
27006 #endif
27007 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27008 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27009 #endif
27012 /* The following file contains several enumerations and data structures
27013 built from the definitions in i386-builtin-types.def. */
27015 #include "i386-builtin-types.inc"
27017 /* Table for the ix86 builtin non-function types. */
27018 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27020 /* Retrieve an element from the above table, building some of
27021 the types lazily. */
27023 static tree
27024 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27026 unsigned int index;
27027 tree type, itype;
27029 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27031 type = ix86_builtin_type_tab[(int) tcode];
27032 if (type != NULL)
27033 return type;
27035 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27036 if (tcode <= IX86_BT_LAST_VECT)
27038 enum machine_mode mode;
27040 index = tcode - IX86_BT_LAST_PRIM - 1;
27041 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27042 mode = ix86_builtin_type_vect_mode[index];
27044 type = build_vector_type_for_mode (itype, mode);
27046 else
27048 int quals;
27050 index = tcode - IX86_BT_LAST_VECT - 1;
27051 if (tcode <= IX86_BT_LAST_PTR)
27052 quals = TYPE_UNQUALIFIED;
27053 else
27054 quals = TYPE_QUAL_CONST;
27056 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27057 if (quals != TYPE_UNQUALIFIED)
27058 itype = build_qualified_type (itype, quals);
27060 type = build_pointer_type (itype);
27063 ix86_builtin_type_tab[(int) tcode] = type;
27064 return type;
27067 /* Table for the ix86 builtin function types. */
27068 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27070 /* Retrieve an element from the above table, building some of
27071 the types lazily. */
27073 static tree
27074 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27076 tree type;
27078 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27080 type = ix86_builtin_func_type_tab[(int) tcode];
27081 if (type != NULL)
27082 return type;
27084 if (tcode <= IX86_BT_LAST_FUNC)
27086 unsigned start = ix86_builtin_func_start[(int) tcode];
27087 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27088 tree rtype, atype, args = void_list_node;
27089 unsigned i;
27091 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27092 for (i = after - 1; i > start; --i)
27094 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27095 args = tree_cons (NULL, atype, args);
27098 type = build_function_type (rtype, args);
27100 else
27102 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27103 enum ix86_builtin_func_type icode;
27105 icode = ix86_builtin_func_alias_base[index];
27106 type = ix86_get_builtin_func_type (icode);
27109 ix86_builtin_func_type_tab[(int) tcode] = type;
27110 return type;
27114 /* Codes for all the SSE/MMX builtins. */
27115 enum ix86_builtins
27117 IX86_BUILTIN_ADDPS,
27118 IX86_BUILTIN_ADDSS,
27119 IX86_BUILTIN_DIVPS,
27120 IX86_BUILTIN_DIVSS,
27121 IX86_BUILTIN_MULPS,
27122 IX86_BUILTIN_MULSS,
27123 IX86_BUILTIN_SUBPS,
27124 IX86_BUILTIN_SUBSS,
27126 IX86_BUILTIN_CMPEQPS,
27127 IX86_BUILTIN_CMPLTPS,
27128 IX86_BUILTIN_CMPLEPS,
27129 IX86_BUILTIN_CMPGTPS,
27130 IX86_BUILTIN_CMPGEPS,
27131 IX86_BUILTIN_CMPNEQPS,
27132 IX86_BUILTIN_CMPNLTPS,
27133 IX86_BUILTIN_CMPNLEPS,
27134 IX86_BUILTIN_CMPNGTPS,
27135 IX86_BUILTIN_CMPNGEPS,
27136 IX86_BUILTIN_CMPORDPS,
27137 IX86_BUILTIN_CMPUNORDPS,
27138 IX86_BUILTIN_CMPEQSS,
27139 IX86_BUILTIN_CMPLTSS,
27140 IX86_BUILTIN_CMPLESS,
27141 IX86_BUILTIN_CMPNEQSS,
27142 IX86_BUILTIN_CMPNLTSS,
27143 IX86_BUILTIN_CMPNLESS,
27144 IX86_BUILTIN_CMPORDSS,
27145 IX86_BUILTIN_CMPUNORDSS,
27147 IX86_BUILTIN_COMIEQSS,
27148 IX86_BUILTIN_COMILTSS,
27149 IX86_BUILTIN_COMILESS,
27150 IX86_BUILTIN_COMIGTSS,
27151 IX86_BUILTIN_COMIGESS,
27152 IX86_BUILTIN_COMINEQSS,
27153 IX86_BUILTIN_UCOMIEQSS,
27154 IX86_BUILTIN_UCOMILTSS,
27155 IX86_BUILTIN_UCOMILESS,
27156 IX86_BUILTIN_UCOMIGTSS,
27157 IX86_BUILTIN_UCOMIGESS,
27158 IX86_BUILTIN_UCOMINEQSS,
27160 IX86_BUILTIN_CVTPI2PS,
27161 IX86_BUILTIN_CVTPS2PI,
27162 IX86_BUILTIN_CVTSI2SS,
27163 IX86_BUILTIN_CVTSI642SS,
27164 IX86_BUILTIN_CVTSS2SI,
27165 IX86_BUILTIN_CVTSS2SI64,
27166 IX86_BUILTIN_CVTTPS2PI,
27167 IX86_BUILTIN_CVTTSS2SI,
27168 IX86_BUILTIN_CVTTSS2SI64,
27170 IX86_BUILTIN_MAXPS,
27171 IX86_BUILTIN_MAXSS,
27172 IX86_BUILTIN_MINPS,
27173 IX86_BUILTIN_MINSS,
27175 IX86_BUILTIN_LOADUPS,
27176 IX86_BUILTIN_STOREUPS,
27177 IX86_BUILTIN_MOVSS,
27179 IX86_BUILTIN_MOVHLPS,
27180 IX86_BUILTIN_MOVLHPS,
27181 IX86_BUILTIN_LOADHPS,
27182 IX86_BUILTIN_LOADLPS,
27183 IX86_BUILTIN_STOREHPS,
27184 IX86_BUILTIN_STORELPS,
27186 IX86_BUILTIN_MASKMOVQ,
27187 IX86_BUILTIN_MOVMSKPS,
27188 IX86_BUILTIN_PMOVMSKB,
27190 IX86_BUILTIN_MOVNTPS,
27191 IX86_BUILTIN_MOVNTQ,
27193 IX86_BUILTIN_LOADDQU,
27194 IX86_BUILTIN_STOREDQU,
27196 IX86_BUILTIN_PACKSSWB,
27197 IX86_BUILTIN_PACKSSDW,
27198 IX86_BUILTIN_PACKUSWB,
27200 IX86_BUILTIN_PADDB,
27201 IX86_BUILTIN_PADDW,
27202 IX86_BUILTIN_PADDD,
27203 IX86_BUILTIN_PADDQ,
27204 IX86_BUILTIN_PADDSB,
27205 IX86_BUILTIN_PADDSW,
27206 IX86_BUILTIN_PADDUSB,
27207 IX86_BUILTIN_PADDUSW,
27208 IX86_BUILTIN_PSUBB,
27209 IX86_BUILTIN_PSUBW,
27210 IX86_BUILTIN_PSUBD,
27211 IX86_BUILTIN_PSUBQ,
27212 IX86_BUILTIN_PSUBSB,
27213 IX86_BUILTIN_PSUBSW,
27214 IX86_BUILTIN_PSUBUSB,
27215 IX86_BUILTIN_PSUBUSW,
27217 IX86_BUILTIN_PAND,
27218 IX86_BUILTIN_PANDN,
27219 IX86_BUILTIN_POR,
27220 IX86_BUILTIN_PXOR,
27222 IX86_BUILTIN_PAVGB,
27223 IX86_BUILTIN_PAVGW,
27225 IX86_BUILTIN_PCMPEQB,
27226 IX86_BUILTIN_PCMPEQW,
27227 IX86_BUILTIN_PCMPEQD,
27228 IX86_BUILTIN_PCMPGTB,
27229 IX86_BUILTIN_PCMPGTW,
27230 IX86_BUILTIN_PCMPGTD,
27232 IX86_BUILTIN_PMADDWD,
27234 IX86_BUILTIN_PMAXSW,
27235 IX86_BUILTIN_PMAXUB,
27236 IX86_BUILTIN_PMINSW,
27237 IX86_BUILTIN_PMINUB,
27239 IX86_BUILTIN_PMULHUW,
27240 IX86_BUILTIN_PMULHW,
27241 IX86_BUILTIN_PMULLW,
27243 IX86_BUILTIN_PSADBW,
27244 IX86_BUILTIN_PSHUFW,
27246 IX86_BUILTIN_PSLLW,
27247 IX86_BUILTIN_PSLLD,
27248 IX86_BUILTIN_PSLLQ,
27249 IX86_BUILTIN_PSRAW,
27250 IX86_BUILTIN_PSRAD,
27251 IX86_BUILTIN_PSRLW,
27252 IX86_BUILTIN_PSRLD,
27253 IX86_BUILTIN_PSRLQ,
27254 IX86_BUILTIN_PSLLWI,
27255 IX86_BUILTIN_PSLLDI,
27256 IX86_BUILTIN_PSLLQI,
27257 IX86_BUILTIN_PSRAWI,
27258 IX86_BUILTIN_PSRADI,
27259 IX86_BUILTIN_PSRLWI,
27260 IX86_BUILTIN_PSRLDI,
27261 IX86_BUILTIN_PSRLQI,
27263 IX86_BUILTIN_PUNPCKHBW,
27264 IX86_BUILTIN_PUNPCKHWD,
27265 IX86_BUILTIN_PUNPCKHDQ,
27266 IX86_BUILTIN_PUNPCKLBW,
27267 IX86_BUILTIN_PUNPCKLWD,
27268 IX86_BUILTIN_PUNPCKLDQ,
27270 IX86_BUILTIN_SHUFPS,
27272 IX86_BUILTIN_RCPPS,
27273 IX86_BUILTIN_RCPSS,
27274 IX86_BUILTIN_RSQRTPS,
27275 IX86_BUILTIN_RSQRTPS_NR,
27276 IX86_BUILTIN_RSQRTSS,
27277 IX86_BUILTIN_RSQRTF,
27278 IX86_BUILTIN_SQRTPS,
27279 IX86_BUILTIN_SQRTPS_NR,
27280 IX86_BUILTIN_SQRTSS,
27282 IX86_BUILTIN_UNPCKHPS,
27283 IX86_BUILTIN_UNPCKLPS,
27285 IX86_BUILTIN_ANDPS,
27286 IX86_BUILTIN_ANDNPS,
27287 IX86_BUILTIN_ORPS,
27288 IX86_BUILTIN_XORPS,
27290 IX86_BUILTIN_EMMS,
27291 IX86_BUILTIN_LDMXCSR,
27292 IX86_BUILTIN_STMXCSR,
27293 IX86_BUILTIN_SFENCE,
27295 IX86_BUILTIN_FXSAVE,
27296 IX86_BUILTIN_FXRSTOR,
27297 IX86_BUILTIN_FXSAVE64,
27298 IX86_BUILTIN_FXRSTOR64,
27300 IX86_BUILTIN_XSAVE,
27301 IX86_BUILTIN_XRSTOR,
27302 IX86_BUILTIN_XSAVE64,
27303 IX86_BUILTIN_XRSTOR64,
27305 IX86_BUILTIN_XSAVEOPT,
27306 IX86_BUILTIN_XSAVEOPT64,
27308 /* 3DNow! Original */
27309 IX86_BUILTIN_FEMMS,
27310 IX86_BUILTIN_PAVGUSB,
27311 IX86_BUILTIN_PF2ID,
27312 IX86_BUILTIN_PFACC,
27313 IX86_BUILTIN_PFADD,
27314 IX86_BUILTIN_PFCMPEQ,
27315 IX86_BUILTIN_PFCMPGE,
27316 IX86_BUILTIN_PFCMPGT,
27317 IX86_BUILTIN_PFMAX,
27318 IX86_BUILTIN_PFMIN,
27319 IX86_BUILTIN_PFMUL,
27320 IX86_BUILTIN_PFRCP,
27321 IX86_BUILTIN_PFRCPIT1,
27322 IX86_BUILTIN_PFRCPIT2,
27323 IX86_BUILTIN_PFRSQIT1,
27324 IX86_BUILTIN_PFRSQRT,
27325 IX86_BUILTIN_PFSUB,
27326 IX86_BUILTIN_PFSUBR,
27327 IX86_BUILTIN_PI2FD,
27328 IX86_BUILTIN_PMULHRW,
27330 /* 3DNow! Athlon Extensions */
27331 IX86_BUILTIN_PF2IW,
27332 IX86_BUILTIN_PFNACC,
27333 IX86_BUILTIN_PFPNACC,
27334 IX86_BUILTIN_PI2FW,
27335 IX86_BUILTIN_PSWAPDSI,
27336 IX86_BUILTIN_PSWAPDSF,
27338 /* SSE2 */
27339 IX86_BUILTIN_ADDPD,
27340 IX86_BUILTIN_ADDSD,
27341 IX86_BUILTIN_DIVPD,
27342 IX86_BUILTIN_DIVSD,
27343 IX86_BUILTIN_MULPD,
27344 IX86_BUILTIN_MULSD,
27345 IX86_BUILTIN_SUBPD,
27346 IX86_BUILTIN_SUBSD,
27348 IX86_BUILTIN_CMPEQPD,
27349 IX86_BUILTIN_CMPLTPD,
27350 IX86_BUILTIN_CMPLEPD,
27351 IX86_BUILTIN_CMPGTPD,
27352 IX86_BUILTIN_CMPGEPD,
27353 IX86_BUILTIN_CMPNEQPD,
27354 IX86_BUILTIN_CMPNLTPD,
27355 IX86_BUILTIN_CMPNLEPD,
27356 IX86_BUILTIN_CMPNGTPD,
27357 IX86_BUILTIN_CMPNGEPD,
27358 IX86_BUILTIN_CMPORDPD,
27359 IX86_BUILTIN_CMPUNORDPD,
27360 IX86_BUILTIN_CMPEQSD,
27361 IX86_BUILTIN_CMPLTSD,
27362 IX86_BUILTIN_CMPLESD,
27363 IX86_BUILTIN_CMPNEQSD,
27364 IX86_BUILTIN_CMPNLTSD,
27365 IX86_BUILTIN_CMPNLESD,
27366 IX86_BUILTIN_CMPORDSD,
27367 IX86_BUILTIN_CMPUNORDSD,
27369 IX86_BUILTIN_COMIEQSD,
27370 IX86_BUILTIN_COMILTSD,
27371 IX86_BUILTIN_COMILESD,
27372 IX86_BUILTIN_COMIGTSD,
27373 IX86_BUILTIN_COMIGESD,
27374 IX86_BUILTIN_COMINEQSD,
27375 IX86_BUILTIN_UCOMIEQSD,
27376 IX86_BUILTIN_UCOMILTSD,
27377 IX86_BUILTIN_UCOMILESD,
27378 IX86_BUILTIN_UCOMIGTSD,
27379 IX86_BUILTIN_UCOMIGESD,
27380 IX86_BUILTIN_UCOMINEQSD,
27382 IX86_BUILTIN_MAXPD,
27383 IX86_BUILTIN_MAXSD,
27384 IX86_BUILTIN_MINPD,
27385 IX86_BUILTIN_MINSD,
27387 IX86_BUILTIN_ANDPD,
27388 IX86_BUILTIN_ANDNPD,
27389 IX86_BUILTIN_ORPD,
27390 IX86_BUILTIN_XORPD,
27392 IX86_BUILTIN_SQRTPD,
27393 IX86_BUILTIN_SQRTSD,
27395 IX86_BUILTIN_UNPCKHPD,
27396 IX86_BUILTIN_UNPCKLPD,
27398 IX86_BUILTIN_SHUFPD,
27400 IX86_BUILTIN_LOADUPD,
27401 IX86_BUILTIN_STOREUPD,
27402 IX86_BUILTIN_MOVSD,
27404 IX86_BUILTIN_LOADHPD,
27405 IX86_BUILTIN_LOADLPD,
27407 IX86_BUILTIN_CVTDQ2PD,
27408 IX86_BUILTIN_CVTDQ2PS,
27410 IX86_BUILTIN_CVTPD2DQ,
27411 IX86_BUILTIN_CVTPD2PI,
27412 IX86_BUILTIN_CVTPD2PS,
27413 IX86_BUILTIN_CVTTPD2DQ,
27414 IX86_BUILTIN_CVTTPD2PI,
27416 IX86_BUILTIN_CVTPI2PD,
27417 IX86_BUILTIN_CVTSI2SD,
27418 IX86_BUILTIN_CVTSI642SD,
27420 IX86_BUILTIN_CVTSD2SI,
27421 IX86_BUILTIN_CVTSD2SI64,
27422 IX86_BUILTIN_CVTSD2SS,
27423 IX86_BUILTIN_CVTSS2SD,
27424 IX86_BUILTIN_CVTTSD2SI,
27425 IX86_BUILTIN_CVTTSD2SI64,
27427 IX86_BUILTIN_CVTPS2DQ,
27428 IX86_BUILTIN_CVTPS2PD,
27429 IX86_BUILTIN_CVTTPS2DQ,
27431 IX86_BUILTIN_MOVNTI,
27432 IX86_BUILTIN_MOVNTI64,
27433 IX86_BUILTIN_MOVNTPD,
27434 IX86_BUILTIN_MOVNTDQ,
27436 IX86_BUILTIN_MOVQ128,
27438 /* SSE2 MMX */
27439 IX86_BUILTIN_MASKMOVDQU,
27440 IX86_BUILTIN_MOVMSKPD,
27441 IX86_BUILTIN_PMOVMSKB128,
27443 IX86_BUILTIN_PACKSSWB128,
27444 IX86_BUILTIN_PACKSSDW128,
27445 IX86_BUILTIN_PACKUSWB128,
27447 IX86_BUILTIN_PADDB128,
27448 IX86_BUILTIN_PADDW128,
27449 IX86_BUILTIN_PADDD128,
27450 IX86_BUILTIN_PADDQ128,
27451 IX86_BUILTIN_PADDSB128,
27452 IX86_BUILTIN_PADDSW128,
27453 IX86_BUILTIN_PADDUSB128,
27454 IX86_BUILTIN_PADDUSW128,
27455 IX86_BUILTIN_PSUBB128,
27456 IX86_BUILTIN_PSUBW128,
27457 IX86_BUILTIN_PSUBD128,
27458 IX86_BUILTIN_PSUBQ128,
27459 IX86_BUILTIN_PSUBSB128,
27460 IX86_BUILTIN_PSUBSW128,
27461 IX86_BUILTIN_PSUBUSB128,
27462 IX86_BUILTIN_PSUBUSW128,
27464 IX86_BUILTIN_PAND128,
27465 IX86_BUILTIN_PANDN128,
27466 IX86_BUILTIN_POR128,
27467 IX86_BUILTIN_PXOR128,
27469 IX86_BUILTIN_PAVGB128,
27470 IX86_BUILTIN_PAVGW128,
27472 IX86_BUILTIN_PCMPEQB128,
27473 IX86_BUILTIN_PCMPEQW128,
27474 IX86_BUILTIN_PCMPEQD128,
27475 IX86_BUILTIN_PCMPGTB128,
27476 IX86_BUILTIN_PCMPGTW128,
27477 IX86_BUILTIN_PCMPGTD128,
27479 IX86_BUILTIN_PMADDWD128,
27481 IX86_BUILTIN_PMAXSW128,
27482 IX86_BUILTIN_PMAXUB128,
27483 IX86_BUILTIN_PMINSW128,
27484 IX86_BUILTIN_PMINUB128,
27486 IX86_BUILTIN_PMULUDQ,
27487 IX86_BUILTIN_PMULUDQ128,
27488 IX86_BUILTIN_PMULHUW128,
27489 IX86_BUILTIN_PMULHW128,
27490 IX86_BUILTIN_PMULLW128,
27492 IX86_BUILTIN_PSADBW128,
27493 IX86_BUILTIN_PSHUFHW,
27494 IX86_BUILTIN_PSHUFLW,
27495 IX86_BUILTIN_PSHUFD,
27497 IX86_BUILTIN_PSLLDQI128,
27498 IX86_BUILTIN_PSLLWI128,
27499 IX86_BUILTIN_PSLLDI128,
27500 IX86_BUILTIN_PSLLQI128,
27501 IX86_BUILTIN_PSRAWI128,
27502 IX86_BUILTIN_PSRADI128,
27503 IX86_BUILTIN_PSRLDQI128,
27504 IX86_BUILTIN_PSRLWI128,
27505 IX86_BUILTIN_PSRLDI128,
27506 IX86_BUILTIN_PSRLQI128,
27508 IX86_BUILTIN_PSLLDQ128,
27509 IX86_BUILTIN_PSLLW128,
27510 IX86_BUILTIN_PSLLD128,
27511 IX86_BUILTIN_PSLLQ128,
27512 IX86_BUILTIN_PSRAW128,
27513 IX86_BUILTIN_PSRAD128,
27514 IX86_BUILTIN_PSRLW128,
27515 IX86_BUILTIN_PSRLD128,
27516 IX86_BUILTIN_PSRLQ128,
27518 IX86_BUILTIN_PUNPCKHBW128,
27519 IX86_BUILTIN_PUNPCKHWD128,
27520 IX86_BUILTIN_PUNPCKHDQ128,
27521 IX86_BUILTIN_PUNPCKHQDQ128,
27522 IX86_BUILTIN_PUNPCKLBW128,
27523 IX86_BUILTIN_PUNPCKLWD128,
27524 IX86_BUILTIN_PUNPCKLDQ128,
27525 IX86_BUILTIN_PUNPCKLQDQ128,
27527 IX86_BUILTIN_CLFLUSH,
27528 IX86_BUILTIN_MFENCE,
27529 IX86_BUILTIN_LFENCE,
27530 IX86_BUILTIN_PAUSE,
27532 IX86_BUILTIN_FNSTENV,
27533 IX86_BUILTIN_FLDENV,
27534 IX86_BUILTIN_FNSTSW,
27535 IX86_BUILTIN_FNCLEX,
27537 IX86_BUILTIN_BSRSI,
27538 IX86_BUILTIN_BSRDI,
27539 IX86_BUILTIN_RDPMC,
27540 IX86_BUILTIN_RDTSC,
27541 IX86_BUILTIN_RDTSCP,
27542 IX86_BUILTIN_ROLQI,
27543 IX86_BUILTIN_ROLHI,
27544 IX86_BUILTIN_RORQI,
27545 IX86_BUILTIN_RORHI,
27547 /* SSE3. */
27548 IX86_BUILTIN_ADDSUBPS,
27549 IX86_BUILTIN_HADDPS,
27550 IX86_BUILTIN_HSUBPS,
27551 IX86_BUILTIN_MOVSHDUP,
27552 IX86_BUILTIN_MOVSLDUP,
27553 IX86_BUILTIN_ADDSUBPD,
27554 IX86_BUILTIN_HADDPD,
27555 IX86_BUILTIN_HSUBPD,
27556 IX86_BUILTIN_LDDQU,
27558 IX86_BUILTIN_MONITOR,
27559 IX86_BUILTIN_MWAIT,
27561 /* SSSE3. */
27562 IX86_BUILTIN_PHADDW,
27563 IX86_BUILTIN_PHADDD,
27564 IX86_BUILTIN_PHADDSW,
27565 IX86_BUILTIN_PHSUBW,
27566 IX86_BUILTIN_PHSUBD,
27567 IX86_BUILTIN_PHSUBSW,
27568 IX86_BUILTIN_PMADDUBSW,
27569 IX86_BUILTIN_PMULHRSW,
27570 IX86_BUILTIN_PSHUFB,
27571 IX86_BUILTIN_PSIGNB,
27572 IX86_BUILTIN_PSIGNW,
27573 IX86_BUILTIN_PSIGND,
27574 IX86_BUILTIN_PALIGNR,
27575 IX86_BUILTIN_PABSB,
27576 IX86_BUILTIN_PABSW,
27577 IX86_BUILTIN_PABSD,
27579 IX86_BUILTIN_PHADDW128,
27580 IX86_BUILTIN_PHADDD128,
27581 IX86_BUILTIN_PHADDSW128,
27582 IX86_BUILTIN_PHSUBW128,
27583 IX86_BUILTIN_PHSUBD128,
27584 IX86_BUILTIN_PHSUBSW128,
27585 IX86_BUILTIN_PMADDUBSW128,
27586 IX86_BUILTIN_PMULHRSW128,
27587 IX86_BUILTIN_PSHUFB128,
27588 IX86_BUILTIN_PSIGNB128,
27589 IX86_BUILTIN_PSIGNW128,
27590 IX86_BUILTIN_PSIGND128,
27591 IX86_BUILTIN_PALIGNR128,
27592 IX86_BUILTIN_PABSB128,
27593 IX86_BUILTIN_PABSW128,
27594 IX86_BUILTIN_PABSD128,
27596 /* AMDFAM10 - SSE4A New Instructions. */
27597 IX86_BUILTIN_MOVNTSD,
27598 IX86_BUILTIN_MOVNTSS,
27599 IX86_BUILTIN_EXTRQI,
27600 IX86_BUILTIN_EXTRQ,
27601 IX86_BUILTIN_INSERTQI,
27602 IX86_BUILTIN_INSERTQ,
27604 /* SSE4.1. */
27605 IX86_BUILTIN_BLENDPD,
27606 IX86_BUILTIN_BLENDPS,
27607 IX86_BUILTIN_BLENDVPD,
27608 IX86_BUILTIN_BLENDVPS,
27609 IX86_BUILTIN_PBLENDVB128,
27610 IX86_BUILTIN_PBLENDW128,
27612 IX86_BUILTIN_DPPD,
27613 IX86_BUILTIN_DPPS,
27615 IX86_BUILTIN_INSERTPS128,
27617 IX86_BUILTIN_MOVNTDQA,
27618 IX86_BUILTIN_MPSADBW128,
27619 IX86_BUILTIN_PACKUSDW128,
27620 IX86_BUILTIN_PCMPEQQ,
27621 IX86_BUILTIN_PHMINPOSUW128,
27623 IX86_BUILTIN_PMAXSB128,
27624 IX86_BUILTIN_PMAXSD128,
27625 IX86_BUILTIN_PMAXUD128,
27626 IX86_BUILTIN_PMAXUW128,
27628 IX86_BUILTIN_PMINSB128,
27629 IX86_BUILTIN_PMINSD128,
27630 IX86_BUILTIN_PMINUD128,
27631 IX86_BUILTIN_PMINUW128,
27633 IX86_BUILTIN_PMOVSXBW128,
27634 IX86_BUILTIN_PMOVSXBD128,
27635 IX86_BUILTIN_PMOVSXBQ128,
27636 IX86_BUILTIN_PMOVSXWD128,
27637 IX86_BUILTIN_PMOVSXWQ128,
27638 IX86_BUILTIN_PMOVSXDQ128,
27640 IX86_BUILTIN_PMOVZXBW128,
27641 IX86_BUILTIN_PMOVZXBD128,
27642 IX86_BUILTIN_PMOVZXBQ128,
27643 IX86_BUILTIN_PMOVZXWD128,
27644 IX86_BUILTIN_PMOVZXWQ128,
27645 IX86_BUILTIN_PMOVZXDQ128,
27647 IX86_BUILTIN_PMULDQ128,
27648 IX86_BUILTIN_PMULLD128,
27650 IX86_BUILTIN_ROUNDSD,
27651 IX86_BUILTIN_ROUNDSS,
27653 IX86_BUILTIN_ROUNDPD,
27654 IX86_BUILTIN_ROUNDPS,
27656 IX86_BUILTIN_FLOORPD,
27657 IX86_BUILTIN_CEILPD,
27658 IX86_BUILTIN_TRUNCPD,
27659 IX86_BUILTIN_RINTPD,
27660 IX86_BUILTIN_ROUNDPD_AZ,
27662 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27663 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27664 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27666 IX86_BUILTIN_FLOORPS,
27667 IX86_BUILTIN_CEILPS,
27668 IX86_BUILTIN_TRUNCPS,
27669 IX86_BUILTIN_RINTPS,
27670 IX86_BUILTIN_ROUNDPS_AZ,
27672 IX86_BUILTIN_FLOORPS_SFIX,
27673 IX86_BUILTIN_CEILPS_SFIX,
27674 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27676 IX86_BUILTIN_PTESTZ,
27677 IX86_BUILTIN_PTESTC,
27678 IX86_BUILTIN_PTESTNZC,
27680 IX86_BUILTIN_VEC_INIT_V2SI,
27681 IX86_BUILTIN_VEC_INIT_V4HI,
27682 IX86_BUILTIN_VEC_INIT_V8QI,
27683 IX86_BUILTIN_VEC_EXT_V2DF,
27684 IX86_BUILTIN_VEC_EXT_V2DI,
27685 IX86_BUILTIN_VEC_EXT_V4SF,
27686 IX86_BUILTIN_VEC_EXT_V4SI,
27687 IX86_BUILTIN_VEC_EXT_V8HI,
27688 IX86_BUILTIN_VEC_EXT_V2SI,
27689 IX86_BUILTIN_VEC_EXT_V4HI,
27690 IX86_BUILTIN_VEC_EXT_V16QI,
27691 IX86_BUILTIN_VEC_SET_V2DI,
27692 IX86_BUILTIN_VEC_SET_V4SF,
27693 IX86_BUILTIN_VEC_SET_V4SI,
27694 IX86_BUILTIN_VEC_SET_V8HI,
27695 IX86_BUILTIN_VEC_SET_V4HI,
27696 IX86_BUILTIN_VEC_SET_V16QI,
27698 IX86_BUILTIN_VEC_PACK_SFIX,
27699 IX86_BUILTIN_VEC_PACK_SFIX256,
27701 /* SSE4.2. */
27702 IX86_BUILTIN_CRC32QI,
27703 IX86_BUILTIN_CRC32HI,
27704 IX86_BUILTIN_CRC32SI,
27705 IX86_BUILTIN_CRC32DI,
27707 IX86_BUILTIN_PCMPESTRI128,
27708 IX86_BUILTIN_PCMPESTRM128,
27709 IX86_BUILTIN_PCMPESTRA128,
27710 IX86_BUILTIN_PCMPESTRC128,
27711 IX86_BUILTIN_PCMPESTRO128,
27712 IX86_BUILTIN_PCMPESTRS128,
27713 IX86_BUILTIN_PCMPESTRZ128,
27714 IX86_BUILTIN_PCMPISTRI128,
27715 IX86_BUILTIN_PCMPISTRM128,
27716 IX86_BUILTIN_PCMPISTRA128,
27717 IX86_BUILTIN_PCMPISTRC128,
27718 IX86_BUILTIN_PCMPISTRO128,
27719 IX86_BUILTIN_PCMPISTRS128,
27720 IX86_BUILTIN_PCMPISTRZ128,
27722 IX86_BUILTIN_PCMPGTQ,
27724 /* AES instructions */
27725 IX86_BUILTIN_AESENC128,
27726 IX86_BUILTIN_AESENCLAST128,
27727 IX86_BUILTIN_AESDEC128,
27728 IX86_BUILTIN_AESDECLAST128,
27729 IX86_BUILTIN_AESIMC128,
27730 IX86_BUILTIN_AESKEYGENASSIST128,
27732 /* PCLMUL instruction */
27733 IX86_BUILTIN_PCLMULQDQ128,
27735 /* AVX */
27736 IX86_BUILTIN_ADDPD256,
27737 IX86_BUILTIN_ADDPS256,
27738 IX86_BUILTIN_ADDSUBPD256,
27739 IX86_BUILTIN_ADDSUBPS256,
27740 IX86_BUILTIN_ANDPD256,
27741 IX86_BUILTIN_ANDPS256,
27742 IX86_BUILTIN_ANDNPD256,
27743 IX86_BUILTIN_ANDNPS256,
27744 IX86_BUILTIN_BLENDPD256,
27745 IX86_BUILTIN_BLENDPS256,
27746 IX86_BUILTIN_BLENDVPD256,
27747 IX86_BUILTIN_BLENDVPS256,
27748 IX86_BUILTIN_DIVPD256,
27749 IX86_BUILTIN_DIVPS256,
27750 IX86_BUILTIN_DPPS256,
27751 IX86_BUILTIN_HADDPD256,
27752 IX86_BUILTIN_HADDPS256,
27753 IX86_BUILTIN_HSUBPD256,
27754 IX86_BUILTIN_HSUBPS256,
27755 IX86_BUILTIN_MAXPD256,
27756 IX86_BUILTIN_MAXPS256,
27757 IX86_BUILTIN_MINPD256,
27758 IX86_BUILTIN_MINPS256,
27759 IX86_BUILTIN_MULPD256,
27760 IX86_BUILTIN_MULPS256,
27761 IX86_BUILTIN_ORPD256,
27762 IX86_BUILTIN_ORPS256,
27763 IX86_BUILTIN_SHUFPD256,
27764 IX86_BUILTIN_SHUFPS256,
27765 IX86_BUILTIN_SUBPD256,
27766 IX86_BUILTIN_SUBPS256,
27767 IX86_BUILTIN_XORPD256,
27768 IX86_BUILTIN_XORPS256,
27769 IX86_BUILTIN_CMPSD,
27770 IX86_BUILTIN_CMPSS,
27771 IX86_BUILTIN_CMPPD,
27772 IX86_BUILTIN_CMPPS,
27773 IX86_BUILTIN_CMPPD256,
27774 IX86_BUILTIN_CMPPS256,
27775 IX86_BUILTIN_CVTDQ2PD256,
27776 IX86_BUILTIN_CVTDQ2PS256,
27777 IX86_BUILTIN_CVTPD2PS256,
27778 IX86_BUILTIN_CVTPS2DQ256,
27779 IX86_BUILTIN_CVTPS2PD256,
27780 IX86_BUILTIN_CVTTPD2DQ256,
27781 IX86_BUILTIN_CVTPD2DQ256,
27782 IX86_BUILTIN_CVTTPS2DQ256,
27783 IX86_BUILTIN_EXTRACTF128PD256,
27784 IX86_BUILTIN_EXTRACTF128PS256,
27785 IX86_BUILTIN_EXTRACTF128SI256,
27786 IX86_BUILTIN_VZEROALL,
27787 IX86_BUILTIN_VZEROUPPER,
27788 IX86_BUILTIN_VPERMILVARPD,
27789 IX86_BUILTIN_VPERMILVARPS,
27790 IX86_BUILTIN_VPERMILVARPD256,
27791 IX86_BUILTIN_VPERMILVARPS256,
27792 IX86_BUILTIN_VPERMILPD,
27793 IX86_BUILTIN_VPERMILPS,
27794 IX86_BUILTIN_VPERMILPD256,
27795 IX86_BUILTIN_VPERMILPS256,
27796 IX86_BUILTIN_VPERMIL2PD,
27797 IX86_BUILTIN_VPERMIL2PS,
27798 IX86_BUILTIN_VPERMIL2PD256,
27799 IX86_BUILTIN_VPERMIL2PS256,
27800 IX86_BUILTIN_VPERM2F128PD256,
27801 IX86_BUILTIN_VPERM2F128PS256,
27802 IX86_BUILTIN_VPERM2F128SI256,
27803 IX86_BUILTIN_VBROADCASTSS,
27804 IX86_BUILTIN_VBROADCASTSD256,
27805 IX86_BUILTIN_VBROADCASTSS256,
27806 IX86_BUILTIN_VBROADCASTPD256,
27807 IX86_BUILTIN_VBROADCASTPS256,
27808 IX86_BUILTIN_VINSERTF128PD256,
27809 IX86_BUILTIN_VINSERTF128PS256,
27810 IX86_BUILTIN_VINSERTF128SI256,
27811 IX86_BUILTIN_LOADUPD256,
27812 IX86_BUILTIN_LOADUPS256,
27813 IX86_BUILTIN_STOREUPD256,
27814 IX86_BUILTIN_STOREUPS256,
27815 IX86_BUILTIN_LDDQU256,
27816 IX86_BUILTIN_MOVNTDQ256,
27817 IX86_BUILTIN_MOVNTPD256,
27818 IX86_BUILTIN_MOVNTPS256,
27819 IX86_BUILTIN_LOADDQU256,
27820 IX86_BUILTIN_STOREDQU256,
27821 IX86_BUILTIN_MASKLOADPD,
27822 IX86_BUILTIN_MASKLOADPS,
27823 IX86_BUILTIN_MASKSTOREPD,
27824 IX86_BUILTIN_MASKSTOREPS,
27825 IX86_BUILTIN_MASKLOADPD256,
27826 IX86_BUILTIN_MASKLOADPS256,
27827 IX86_BUILTIN_MASKSTOREPD256,
27828 IX86_BUILTIN_MASKSTOREPS256,
27829 IX86_BUILTIN_MOVSHDUP256,
27830 IX86_BUILTIN_MOVSLDUP256,
27831 IX86_BUILTIN_MOVDDUP256,
27833 IX86_BUILTIN_SQRTPD256,
27834 IX86_BUILTIN_SQRTPS256,
27835 IX86_BUILTIN_SQRTPS_NR256,
27836 IX86_BUILTIN_RSQRTPS256,
27837 IX86_BUILTIN_RSQRTPS_NR256,
27839 IX86_BUILTIN_RCPPS256,
27841 IX86_BUILTIN_ROUNDPD256,
27842 IX86_BUILTIN_ROUNDPS256,
27844 IX86_BUILTIN_FLOORPD256,
27845 IX86_BUILTIN_CEILPD256,
27846 IX86_BUILTIN_TRUNCPD256,
27847 IX86_BUILTIN_RINTPD256,
27848 IX86_BUILTIN_ROUNDPD_AZ256,
27850 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27851 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27852 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27854 IX86_BUILTIN_FLOORPS256,
27855 IX86_BUILTIN_CEILPS256,
27856 IX86_BUILTIN_TRUNCPS256,
27857 IX86_BUILTIN_RINTPS256,
27858 IX86_BUILTIN_ROUNDPS_AZ256,
27860 IX86_BUILTIN_FLOORPS_SFIX256,
27861 IX86_BUILTIN_CEILPS_SFIX256,
27862 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27864 IX86_BUILTIN_UNPCKHPD256,
27865 IX86_BUILTIN_UNPCKLPD256,
27866 IX86_BUILTIN_UNPCKHPS256,
27867 IX86_BUILTIN_UNPCKLPS256,
27869 IX86_BUILTIN_SI256_SI,
27870 IX86_BUILTIN_PS256_PS,
27871 IX86_BUILTIN_PD256_PD,
27872 IX86_BUILTIN_SI_SI256,
27873 IX86_BUILTIN_PS_PS256,
27874 IX86_BUILTIN_PD_PD256,
27876 IX86_BUILTIN_VTESTZPD,
27877 IX86_BUILTIN_VTESTCPD,
27878 IX86_BUILTIN_VTESTNZCPD,
27879 IX86_BUILTIN_VTESTZPS,
27880 IX86_BUILTIN_VTESTCPS,
27881 IX86_BUILTIN_VTESTNZCPS,
27882 IX86_BUILTIN_VTESTZPD256,
27883 IX86_BUILTIN_VTESTCPD256,
27884 IX86_BUILTIN_VTESTNZCPD256,
27885 IX86_BUILTIN_VTESTZPS256,
27886 IX86_BUILTIN_VTESTCPS256,
27887 IX86_BUILTIN_VTESTNZCPS256,
27888 IX86_BUILTIN_PTESTZ256,
27889 IX86_BUILTIN_PTESTC256,
27890 IX86_BUILTIN_PTESTNZC256,
27892 IX86_BUILTIN_MOVMSKPD256,
27893 IX86_BUILTIN_MOVMSKPS256,
27895 /* AVX2 */
27896 IX86_BUILTIN_MPSADBW256,
27897 IX86_BUILTIN_PABSB256,
27898 IX86_BUILTIN_PABSW256,
27899 IX86_BUILTIN_PABSD256,
27900 IX86_BUILTIN_PACKSSDW256,
27901 IX86_BUILTIN_PACKSSWB256,
27902 IX86_BUILTIN_PACKUSDW256,
27903 IX86_BUILTIN_PACKUSWB256,
27904 IX86_BUILTIN_PADDB256,
27905 IX86_BUILTIN_PADDW256,
27906 IX86_BUILTIN_PADDD256,
27907 IX86_BUILTIN_PADDQ256,
27908 IX86_BUILTIN_PADDSB256,
27909 IX86_BUILTIN_PADDSW256,
27910 IX86_BUILTIN_PADDUSB256,
27911 IX86_BUILTIN_PADDUSW256,
27912 IX86_BUILTIN_PALIGNR256,
27913 IX86_BUILTIN_AND256I,
27914 IX86_BUILTIN_ANDNOT256I,
27915 IX86_BUILTIN_PAVGB256,
27916 IX86_BUILTIN_PAVGW256,
27917 IX86_BUILTIN_PBLENDVB256,
27918 IX86_BUILTIN_PBLENDVW256,
27919 IX86_BUILTIN_PCMPEQB256,
27920 IX86_BUILTIN_PCMPEQW256,
27921 IX86_BUILTIN_PCMPEQD256,
27922 IX86_BUILTIN_PCMPEQQ256,
27923 IX86_BUILTIN_PCMPGTB256,
27924 IX86_BUILTIN_PCMPGTW256,
27925 IX86_BUILTIN_PCMPGTD256,
27926 IX86_BUILTIN_PCMPGTQ256,
27927 IX86_BUILTIN_PHADDW256,
27928 IX86_BUILTIN_PHADDD256,
27929 IX86_BUILTIN_PHADDSW256,
27930 IX86_BUILTIN_PHSUBW256,
27931 IX86_BUILTIN_PHSUBD256,
27932 IX86_BUILTIN_PHSUBSW256,
27933 IX86_BUILTIN_PMADDUBSW256,
27934 IX86_BUILTIN_PMADDWD256,
27935 IX86_BUILTIN_PMAXSB256,
27936 IX86_BUILTIN_PMAXSW256,
27937 IX86_BUILTIN_PMAXSD256,
27938 IX86_BUILTIN_PMAXUB256,
27939 IX86_BUILTIN_PMAXUW256,
27940 IX86_BUILTIN_PMAXUD256,
27941 IX86_BUILTIN_PMINSB256,
27942 IX86_BUILTIN_PMINSW256,
27943 IX86_BUILTIN_PMINSD256,
27944 IX86_BUILTIN_PMINUB256,
27945 IX86_BUILTIN_PMINUW256,
27946 IX86_BUILTIN_PMINUD256,
27947 IX86_BUILTIN_PMOVMSKB256,
27948 IX86_BUILTIN_PMOVSXBW256,
27949 IX86_BUILTIN_PMOVSXBD256,
27950 IX86_BUILTIN_PMOVSXBQ256,
27951 IX86_BUILTIN_PMOVSXWD256,
27952 IX86_BUILTIN_PMOVSXWQ256,
27953 IX86_BUILTIN_PMOVSXDQ256,
27954 IX86_BUILTIN_PMOVZXBW256,
27955 IX86_BUILTIN_PMOVZXBD256,
27956 IX86_BUILTIN_PMOVZXBQ256,
27957 IX86_BUILTIN_PMOVZXWD256,
27958 IX86_BUILTIN_PMOVZXWQ256,
27959 IX86_BUILTIN_PMOVZXDQ256,
27960 IX86_BUILTIN_PMULDQ256,
27961 IX86_BUILTIN_PMULHRSW256,
27962 IX86_BUILTIN_PMULHUW256,
27963 IX86_BUILTIN_PMULHW256,
27964 IX86_BUILTIN_PMULLW256,
27965 IX86_BUILTIN_PMULLD256,
27966 IX86_BUILTIN_PMULUDQ256,
27967 IX86_BUILTIN_POR256,
27968 IX86_BUILTIN_PSADBW256,
27969 IX86_BUILTIN_PSHUFB256,
27970 IX86_BUILTIN_PSHUFD256,
27971 IX86_BUILTIN_PSHUFHW256,
27972 IX86_BUILTIN_PSHUFLW256,
27973 IX86_BUILTIN_PSIGNB256,
27974 IX86_BUILTIN_PSIGNW256,
27975 IX86_BUILTIN_PSIGND256,
27976 IX86_BUILTIN_PSLLDQI256,
27977 IX86_BUILTIN_PSLLWI256,
27978 IX86_BUILTIN_PSLLW256,
27979 IX86_BUILTIN_PSLLDI256,
27980 IX86_BUILTIN_PSLLD256,
27981 IX86_BUILTIN_PSLLQI256,
27982 IX86_BUILTIN_PSLLQ256,
27983 IX86_BUILTIN_PSRAWI256,
27984 IX86_BUILTIN_PSRAW256,
27985 IX86_BUILTIN_PSRADI256,
27986 IX86_BUILTIN_PSRAD256,
27987 IX86_BUILTIN_PSRLDQI256,
27988 IX86_BUILTIN_PSRLWI256,
27989 IX86_BUILTIN_PSRLW256,
27990 IX86_BUILTIN_PSRLDI256,
27991 IX86_BUILTIN_PSRLD256,
27992 IX86_BUILTIN_PSRLQI256,
27993 IX86_BUILTIN_PSRLQ256,
27994 IX86_BUILTIN_PSUBB256,
27995 IX86_BUILTIN_PSUBW256,
27996 IX86_BUILTIN_PSUBD256,
27997 IX86_BUILTIN_PSUBQ256,
27998 IX86_BUILTIN_PSUBSB256,
27999 IX86_BUILTIN_PSUBSW256,
28000 IX86_BUILTIN_PSUBUSB256,
28001 IX86_BUILTIN_PSUBUSW256,
28002 IX86_BUILTIN_PUNPCKHBW256,
28003 IX86_BUILTIN_PUNPCKHWD256,
28004 IX86_BUILTIN_PUNPCKHDQ256,
28005 IX86_BUILTIN_PUNPCKHQDQ256,
28006 IX86_BUILTIN_PUNPCKLBW256,
28007 IX86_BUILTIN_PUNPCKLWD256,
28008 IX86_BUILTIN_PUNPCKLDQ256,
28009 IX86_BUILTIN_PUNPCKLQDQ256,
28010 IX86_BUILTIN_PXOR256,
28011 IX86_BUILTIN_MOVNTDQA256,
28012 IX86_BUILTIN_VBROADCASTSS_PS,
28013 IX86_BUILTIN_VBROADCASTSS_PS256,
28014 IX86_BUILTIN_VBROADCASTSD_PD256,
28015 IX86_BUILTIN_VBROADCASTSI256,
28016 IX86_BUILTIN_PBLENDD256,
28017 IX86_BUILTIN_PBLENDD128,
28018 IX86_BUILTIN_PBROADCASTB256,
28019 IX86_BUILTIN_PBROADCASTW256,
28020 IX86_BUILTIN_PBROADCASTD256,
28021 IX86_BUILTIN_PBROADCASTQ256,
28022 IX86_BUILTIN_PBROADCASTB128,
28023 IX86_BUILTIN_PBROADCASTW128,
28024 IX86_BUILTIN_PBROADCASTD128,
28025 IX86_BUILTIN_PBROADCASTQ128,
28026 IX86_BUILTIN_VPERMVARSI256,
28027 IX86_BUILTIN_VPERMDF256,
28028 IX86_BUILTIN_VPERMVARSF256,
28029 IX86_BUILTIN_VPERMDI256,
28030 IX86_BUILTIN_VPERMTI256,
28031 IX86_BUILTIN_VEXTRACT128I256,
28032 IX86_BUILTIN_VINSERT128I256,
28033 IX86_BUILTIN_MASKLOADD,
28034 IX86_BUILTIN_MASKLOADQ,
28035 IX86_BUILTIN_MASKLOADD256,
28036 IX86_BUILTIN_MASKLOADQ256,
28037 IX86_BUILTIN_MASKSTORED,
28038 IX86_BUILTIN_MASKSTOREQ,
28039 IX86_BUILTIN_MASKSTORED256,
28040 IX86_BUILTIN_MASKSTOREQ256,
28041 IX86_BUILTIN_PSLLVV4DI,
28042 IX86_BUILTIN_PSLLVV2DI,
28043 IX86_BUILTIN_PSLLVV8SI,
28044 IX86_BUILTIN_PSLLVV4SI,
28045 IX86_BUILTIN_PSRAVV8SI,
28046 IX86_BUILTIN_PSRAVV4SI,
28047 IX86_BUILTIN_PSRLVV4DI,
28048 IX86_BUILTIN_PSRLVV2DI,
28049 IX86_BUILTIN_PSRLVV8SI,
28050 IX86_BUILTIN_PSRLVV4SI,
28052 IX86_BUILTIN_GATHERSIV2DF,
28053 IX86_BUILTIN_GATHERSIV4DF,
28054 IX86_BUILTIN_GATHERDIV2DF,
28055 IX86_BUILTIN_GATHERDIV4DF,
28056 IX86_BUILTIN_GATHERSIV4SF,
28057 IX86_BUILTIN_GATHERSIV8SF,
28058 IX86_BUILTIN_GATHERDIV4SF,
28059 IX86_BUILTIN_GATHERDIV8SF,
28060 IX86_BUILTIN_GATHERSIV2DI,
28061 IX86_BUILTIN_GATHERSIV4DI,
28062 IX86_BUILTIN_GATHERDIV2DI,
28063 IX86_BUILTIN_GATHERDIV4DI,
28064 IX86_BUILTIN_GATHERSIV4SI,
28065 IX86_BUILTIN_GATHERSIV8SI,
28066 IX86_BUILTIN_GATHERDIV4SI,
28067 IX86_BUILTIN_GATHERDIV8SI,
28069 /* AVX512F */
28070 IX86_BUILTIN_ADDPD512,
28071 IX86_BUILTIN_ADDPS512,
28072 IX86_BUILTIN_ADDSD_ROUND,
28073 IX86_BUILTIN_ADDSS_ROUND,
28074 IX86_BUILTIN_ALIGND512,
28075 IX86_BUILTIN_ALIGNQ512,
28076 IX86_BUILTIN_BLENDMD512,
28077 IX86_BUILTIN_BLENDMPD512,
28078 IX86_BUILTIN_BLENDMPS512,
28079 IX86_BUILTIN_BLENDMQ512,
28080 IX86_BUILTIN_BROADCASTF32X4_512,
28081 IX86_BUILTIN_BROADCASTF64X4_512,
28082 IX86_BUILTIN_BROADCASTI32X4_512,
28083 IX86_BUILTIN_BROADCASTI64X4_512,
28084 IX86_BUILTIN_BROADCASTSD512,
28085 IX86_BUILTIN_BROADCASTSS512,
28086 IX86_BUILTIN_CMPD512,
28087 IX86_BUILTIN_CMPPD512,
28088 IX86_BUILTIN_CMPPS512,
28089 IX86_BUILTIN_CMPQ512,
28090 IX86_BUILTIN_CMPSD_MASK,
28091 IX86_BUILTIN_CMPSS_MASK,
28092 IX86_BUILTIN_COMIDF,
28093 IX86_BUILTIN_COMISF,
28094 IX86_BUILTIN_COMPRESSPD512,
28095 IX86_BUILTIN_COMPRESSPDSTORE512,
28096 IX86_BUILTIN_COMPRESSPS512,
28097 IX86_BUILTIN_COMPRESSPSSTORE512,
28098 IX86_BUILTIN_CVTDQ2PD512,
28099 IX86_BUILTIN_CVTDQ2PS512,
28100 IX86_BUILTIN_CVTPD2DQ512,
28101 IX86_BUILTIN_CVTPD2PS512,
28102 IX86_BUILTIN_CVTPD2UDQ512,
28103 IX86_BUILTIN_CVTPH2PS512,
28104 IX86_BUILTIN_CVTPS2DQ512,
28105 IX86_BUILTIN_CVTPS2PD512,
28106 IX86_BUILTIN_CVTPS2PH512,
28107 IX86_BUILTIN_CVTPS2UDQ512,
28108 IX86_BUILTIN_CVTSD2SS_ROUND,
28109 IX86_BUILTIN_CVTSI2SD64,
28110 IX86_BUILTIN_CVTSI2SS32,
28111 IX86_BUILTIN_CVTSI2SS64,
28112 IX86_BUILTIN_CVTSS2SD_ROUND,
28113 IX86_BUILTIN_CVTTPD2DQ512,
28114 IX86_BUILTIN_CVTTPD2UDQ512,
28115 IX86_BUILTIN_CVTTPS2DQ512,
28116 IX86_BUILTIN_CVTTPS2UDQ512,
28117 IX86_BUILTIN_CVTUDQ2PD512,
28118 IX86_BUILTIN_CVTUDQ2PS512,
28119 IX86_BUILTIN_CVTUSI2SD32,
28120 IX86_BUILTIN_CVTUSI2SD64,
28121 IX86_BUILTIN_CVTUSI2SS32,
28122 IX86_BUILTIN_CVTUSI2SS64,
28123 IX86_BUILTIN_DIVPD512,
28124 IX86_BUILTIN_DIVPS512,
28125 IX86_BUILTIN_DIVSD_ROUND,
28126 IX86_BUILTIN_DIVSS_ROUND,
28127 IX86_BUILTIN_EXPANDPD512,
28128 IX86_BUILTIN_EXPANDPD512Z,
28129 IX86_BUILTIN_EXPANDPDLOAD512,
28130 IX86_BUILTIN_EXPANDPDLOAD512Z,
28131 IX86_BUILTIN_EXPANDPS512,
28132 IX86_BUILTIN_EXPANDPS512Z,
28133 IX86_BUILTIN_EXPANDPSLOAD512,
28134 IX86_BUILTIN_EXPANDPSLOAD512Z,
28135 IX86_BUILTIN_EXTRACTF32X4,
28136 IX86_BUILTIN_EXTRACTF64X4,
28137 IX86_BUILTIN_EXTRACTI32X4,
28138 IX86_BUILTIN_EXTRACTI64X4,
28139 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28140 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28141 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28142 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28143 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28144 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28145 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28146 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28147 IX86_BUILTIN_GETEXPPD512,
28148 IX86_BUILTIN_GETEXPPS512,
28149 IX86_BUILTIN_GETEXPSD128,
28150 IX86_BUILTIN_GETEXPSS128,
28151 IX86_BUILTIN_GETMANTPD512,
28152 IX86_BUILTIN_GETMANTPS512,
28153 IX86_BUILTIN_GETMANTSD128,
28154 IX86_BUILTIN_GETMANTSS128,
28155 IX86_BUILTIN_INSERTF32X4,
28156 IX86_BUILTIN_INSERTF64X4,
28157 IX86_BUILTIN_INSERTI32X4,
28158 IX86_BUILTIN_INSERTI64X4,
28159 IX86_BUILTIN_LOADAPD512,
28160 IX86_BUILTIN_LOADAPS512,
28161 IX86_BUILTIN_LOADDQUDI512,
28162 IX86_BUILTIN_LOADDQUSI512,
28163 IX86_BUILTIN_LOADUPD512,
28164 IX86_BUILTIN_LOADUPS512,
28165 IX86_BUILTIN_MAXPD512,
28166 IX86_BUILTIN_MAXPS512,
28167 IX86_BUILTIN_MAXSD_ROUND,
28168 IX86_BUILTIN_MAXSS_ROUND,
28169 IX86_BUILTIN_MINPD512,
28170 IX86_BUILTIN_MINPS512,
28171 IX86_BUILTIN_MINSD_ROUND,
28172 IX86_BUILTIN_MINSS_ROUND,
28173 IX86_BUILTIN_MOVAPD512,
28174 IX86_BUILTIN_MOVAPS512,
28175 IX86_BUILTIN_MOVDDUP512,
28176 IX86_BUILTIN_MOVDQA32LOAD512,
28177 IX86_BUILTIN_MOVDQA32STORE512,
28178 IX86_BUILTIN_MOVDQA32_512,
28179 IX86_BUILTIN_MOVDQA64LOAD512,
28180 IX86_BUILTIN_MOVDQA64STORE512,
28181 IX86_BUILTIN_MOVDQA64_512,
28182 IX86_BUILTIN_MOVNTDQ512,
28183 IX86_BUILTIN_MOVNTDQA512,
28184 IX86_BUILTIN_MOVNTPD512,
28185 IX86_BUILTIN_MOVNTPS512,
28186 IX86_BUILTIN_MOVSHDUP512,
28187 IX86_BUILTIN_MOVSLDUP512,
28188 IX86_BUILTIN_MULPD512,
28189 IX86_BUILTIN_MULPS512,
28190 IX86_BUILTIN_MULSD_ROUND,
28191 IX86_BUILTIN_MULSS_ROUND,
28192 IX86_BUILTIN_PABSD512,
28193 IX86_BUILTIN_PABSQ512,
28194 IX86_BUILTIN_PADDD512,
28195 IX86_BUILTIN_PADDQ512,
28196 IX86_BUILTIN_PANDD512,
28197 IX86_BUILTIN_PANDND512,
28198 IX86_BUILTIN_PANDNQ512,
28199 IX86_BUILTIN_PANDQ512,
28200 IX86_BUILTIN_PBROADCASTD512,
28201 IX86_BUILTIN_PBROADCASTD512_GPR,
28202 IX86_BUILTIN_PBROADCASTMB512,
28203 IX86_BUILTIN_PBROADCASTMW512,
28204 IX86_BUILTIN_PBROADCASTQ512,
28205 IX86_BUILTIN_PBROADCASTQ512_GPR,
28206 IX86_BUILTIN_PBROADCASTQ512_MEM,
28207 IX86_BUILTIN_PCMPEQD512_MASK,
28208 IX86_BUILTIN_PCMPEQQ512_MASK,
28209 IX86_BUILTIN_PCMPGTD512_MASK,
28210 IX86_BUILTIN_PCMPGTQ512_MASK,
28211 IX86_BUILTIN_PCOMPRESSD512,
28212 IX86_BUILTIN_PCOMPRESSDSTORE512,
28213 IX86_BUILTIN_PCOMPRESSQ512,
28214 IX86_BUILTIN_PCOMPRESSQSTORE512,
28215 IX86_BUILTIN_PEXPANDD512,
28216 IX86_BUILTIN_PEXPANDD512Z,
28217 IX86_BUILTIN_PEXPANDDLOAD512,
28218 IX86_BUILTIN_PEXPANDDLOAD512Z,
28219 IX86_BUILTIN_PEXPANDQ512,
28220 IX86_BUILTIN_PEXPANDQ512Z,
28221 IX86_BUILTIN_PEXPANDQLOAD512,
28222 IX86_BUILTIN_PEXPANDQLOAD512Z,
28223 IX86_BUILTIN_PMAXSD512,
28224 IX86_BUILTIN_PMAXSQ512,
28225 IX86_BUILTIN_PMAXUD512,
28226 IX86_BUILTIN_PMAXUQ512,
28227 IX86_BUILTIN_PMINSD512,
28228 IX86_BUILTIN_PMINSQ512,
28229 IX86_BUILTIN_PMINUD512,
28230 IX86_BUILTIN_PMINUQ512,
28231 IX86_BUILTIN_PMOVDB512,
28232 IX86_BUILTIN_PMOVDB512_MEM,
28233 IX86_BUILTIN_PMOVDW512,
28234 IX86_BUILTIN_PMOVDW512_MEM,
28235 IX86_BUILTIN_PMOVQB512,
28236 IX86_BUILTIN_PMOVQB512_MEM,
28237 IX86_BUILTIN_PMOVQD512,
28238 IX86_BUILTIN_PMOVQD512_MEM,
28239 IX86_BUILTIN_PMOVQW512,
28240 IX86_BUILTIN_PMOVQW512_MEM,
28241 IX86_BUILTIN_PMOVSDB512,
28242 IX86_BUILTIN_PMOVSDB512_MEM,
28243 IX86_BUILTIN_PMOVSDW512,
28244 IX86_BUILTIN_PMOVSDW512_MEM,
28245 IX86_BUILTIN_PMOVSQB512,
28246 IX86_BUILTIN_PMOVSQB512_MEM,
28247 IX86_BUILTIN_PMOVSQD512,
28248 IX86_BUILTIN_PMOVSQD512_MEM,
28249 IX86_BUILTIN_PMOVSQW512,
28250 IX86_BUILTIN_PMOVSQW512_MEM,
28251 IX86_BUILTIN_PMOVSXBD512,
28252 IX86_BUILTIN_PMOVSXBQ512,
28253 IX86_BUILTIN_PMOVSXDQ512,
28254 IX86_BUILTIN_PMOVSXWD512,
28255 IX86_BUILTIN_PMOVSXWQ512,
28256 IX86_BUILTIN_PMOVUSDB512,
28257 IX86_BUILTIN_PMOVUSDB512_MEM,
28258 IX86_BUILTIN_PMOVUSDW512,
28259 IX86_BUILTIN_PMOVUSDW512_MEM,
28260 IX86_BUILTIN_PMOVUSQB512,
28261 IX86_BUILTIN_PMOVUSQB512_MEM,
28262 IX86_BUILTIN_PMOVUSQD512,
28263 IX86_BUILTIN_PMOVUSQD512_MEM,
28264 IX86_BUILTIN_PMOVUSQW512,
28265 IX86_BUILTIN_PMOVUSQW512_MEM,
28266 IX86_BUILTIN_PMOVZXBD512,
28267 IX86_BUILTIN_PMOVZXBQ512,
28268 IX86_BUILTIN_PMOVZXDQ512,
28269 IX86_BUILTIN_PMOVZXWD512,
28270 IX86_BUILTIN_PMOVZXWQ512,
28271 IX86_BUILTIN_PMULDQ512,
28272 IX86_BUILTIN_PMULLD512,
28273 IX86_BUILTIN_PMULUDQ512,
28274 IX86_BUILTIN_PORD512,
28275 IX86_BUILTIN_PORQ512,
28276 IX86_BUILTIN_PROLD512,
28277 IX86_BUILTIN_PROLQ512,
28278 IX86_BUILTIN_PROLVD512,
28279 IX86_BUILTIN_PROLVQ512,
28280 IX86_BUILTIN_PRORD512,
28281 IX86_BUILTIN_PRORQ512,
28282 IX86_BUILTIN_PRORVD512,
28283 IX86_BUILTIN_PRORVQ512,
28284 IX86_BUILTIN_PSHUFD512,
28285 IX86_BUILTIN_PSLLD512,
28286 IX86_BUILTIN_PSLLDI512,
28287 IX86_BUILTIN_PSLLQ512,
28288 IX86_BUILTIN_PSLLQI512,
28289 IX86_BUILTIN_PSLLVV16SI,
28290 IX86_BUILTIN_PSLLVV8DI,
28291 IX86_BUILTIN_PSRAD512,
28292 IX86_BUILTIN_PSRADI512,
28293 IX86_BUILTIN_PSRAQ512,
28294 IX86_BUILTIN_PSRAQI512,
28295 IX86_BUILTIN_PSRAVV16SI,
28296 IX86_BUILTIN_PSRAVV8DI,
28297 IX86_BUILTIN_PSRLD512,
28298 IX86_BUILTIN_PSRLDI512,
28299 IX86_BUILTIN_PSRLQ512,
28300 IX86_BUILTIN_PSRLQI512,
28301 IX86_BUILTIN_PSRLVV16SI,
28302 IX86_BUILTIN_PSRLVV8DI,
28303 IX86_BUILTIN_PSUBD512,
28304 IX86_BUILTIN_PSUBQ512,
28305 IX86_BUILTIN_PTESTMD512,
28306 IX86_BUILTIN_PTESTMQ512,
28307 IX86_BUILTIN_PTESTNMD512,
28308 IX86_BUILTIN_PTESTNMQ512,
28309 IX86_BUILTIN_PUNPCKHDQ512,
28310 IX86_BUILTIN_PUNPCKHQDQ512,
28311 IX86_BUILTIN_PUNPCKLDQ512,
28312 IX86_BUILTIN_PUNPCKLQDQ512,
28313 IX86_BUILTIN_PXORD512,
28314 IX86_BUILTIN_PXORQ512,
28315 IX86_BUILTIN_RCP14PD512,
28316 IX86_BUILTIN_RCP14PS512,
28317 IX86_BUILTIN_RCP14SD,
28318 IX86_BUILTIN_RCP14SS,
28319 IX86_BUILTIN_RNDSCALEPD,
28320 IX86_BUILTIN_RNDSCALEPS,
28321 IX86_BUILTIN_RNDSCALESD,
28322 IX86_BUILTIN_RNDSCALESS,
28323 IX86_BUILTIN_RSQRT14PD512,
28324 IX86_BUILTIN_RSQRT14PS512,
28325 IX86_BUILTIN_RSQRT14SD,
28326 IX86_BUILTIN_RSQRT14SS,
28327 IX86_BUILTIN_SCALEFPD512,
28328 IX86_BUILTIN_SCALEFPS512,
28329 IX86_BUILTIN_SCALEFSD,
28330 IX86_BUILTIN_SCALEFSS,
28331 IX86_BUILTIN_SHUFPD512,
28332 IX86_BUILTIN_SHUFPS512,
28333 IX86_BUILTIN_SHUF_F32x4,
28334 IX86_BUILTIN_SHUF_F64x2,
28335 IX86_BUILTIN_SHUF_I32x4,
28336 IX86_BUILTIN_SHUF_I64x2,
28337 IX86_BUILTIN_SQRTPD512,
28338 IX86_BUILTIN_SQRTPD512_MASK,
28339 IX86_BUILTIN_SQRTPS512_MASK,
28340 IX86_BUILTIN_SQRTPS_NR512,
28341 IX86_BUILTIN_SQRTSD_ROUND,
28342 IX86_BUILTIN_SQRTSS_ROUND,
28343 IX86_BUILTIN_STOREAPD512,
28344 IX86_BUILTIN_STOREAPS512,
28345 IX86_BUILTIN_STOREDQUDI512,
28346 IX86_BUILTIN_STOREDQUSI512,
28347 IX86_BUILTIN_STOREUPD512,
28348 IX86_BUILTIN_STOREUPS512,
28349 IX86_BUILTIN_SUBPD512,
28350 IX86_BUILTIN_SUBPS512,
28351 IX86_BUILTIN_SUBSD_ROUND,
28352 IX86_BUILTIN_SUBSS_ROUND,
28353 IX86_BUILTIN_UCMPD512,
28354 IX86_BUILTIN_UCMPQ512,
28355 IX86_BUILTIN_UNPCKHPD512,
28356 IX86_BUILTIN_UNPCKHPS512,
28357 IX86_BUILTIN_UNPCKLPD512,
28358 IX86_BUILTIN_UNPCKLPS512,
28359 IX86_BUILTIN_VCVTSD2SI32,
28360 IX86_BUILTIN_VCVTSD2SI64,
28361 IX86_BUILTIN_VCVTSD2USI32,
28362 IX86_BUILTIN_VCVTSD2USI64,
28363 IX86_BUILTIN_VCVTSS2SI32,
28364 IX86_BUILTIN_VCVTSS2SI64,
28365 IX86_BUILTIN_VCVTSS2USI32,
28366 IX86_BUILTIN_VCVTSS2USI64,
28367 IX86_BUILTIN_VCVTTSD2SI32,
28368 IX86_BUILTIN_VCVTTSD2SI64,
28369 IX86_BUILTIN_VCVTTSD2USI32,
28370 IX86_BUILTIN_VCVTTSD2USI64,
28371 IX86_BUILTIN_VCVTTSS2SI32,
28372 IX86_BUILTIN_VCVTTSS2SI64,
28373 IX86_BUILTIN_VCVTTSS2USI32,
28374 IX86_BUILTIN_VCVTTSS2USI64,
28375 IX86_BUILTIN_VFMADDPD512_MASK,
28376 IX86_BUILTIN_VFMADDPD512_MASK3,
28377 IX86_BUILTIN_VFMADDPD512_MASKZ,
28378 IX86_BUILTIN_VFMADDPS512_MASK,
28379 IX86_BUILTIN_VFMADDPS512_MASK3,
28380 IX86_BUILTIN_VFMADDPS512_MASKZ,
28381 IX86_BUILTIN_VFMADDSD3_ROUND,
28382 IX86_BUILTIN_VFMADDSS3_ROUND,
28383 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28384 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28385 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28386 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28387 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28388 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28389 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28390 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28391 IX86_BUILTIN_VFMSUBPD512_MASK3,
28392 IX86_BUILTIN_VFMSUBPS512_MASK3,
28393 IX86_BUILTIN_VFMSUBSD3_MASK3,
28394 IX86_BUILTIN_VFMSUBSS3_MASK3,
28395 IX86_BUILTIN_VFNMADDPD512_MASK,
28396 IX86_BUILTIN_VFNMADDPS512_MASK,
28397 IX86_BUILTIN_VFNMSUBPD512_MASK,
28398 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28399 IX86_BUILTIN_VFNMSUBPS512_MASK,
28400 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28401 IX86_BUILTIN_VPCLZCNTD512,
28402 IX86_BUILTIN_VPCLZCNTQ512,
28403 IX86_BUILTIN_VPCONFLICTD512,
28404 IX86_BUILTIN_VPCONFLICTQ512,
28405 IX86_BUILTIN_VPERMDF512,
28406 IX86_BUILTIN_VPERMDI512,
28407 IX86_BUILTIN_VPERMI2VARD512,
28408 IX86_BUILTIN_VPERMI2VARPD512,
28409 IX86_BUILTIN_VPERMI2VARPS512,
28410 IX86_BUILTIN_VPERMI2VARQ512,
28411 IX86_BUILTIN_VPERMILPD512,
28412 IX86_BUILTIN_VPERMILPS512,
28413 IX86_BUILTIN_VPERMILVARPD512,
28414 IX86_BUILTIN_VPERMILVARPS512,
28415 IX86_BUILTIN_VPERMT2VARD512,
28416 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28417 IX86_BUILTIN_VPERMT2VARPD512,
28418 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28419 IX86_BUILTIN_VPERMT2VARPS512,
28420 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28421 IX86_BUILTIN_VPERMT2VARQ512,
28422 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28423 IX86_BUILTIN_VPERMVARDF512,
28424 IX86_BUILTIN_VPERMVARDI512,
28425 IX86_BUILTIN_VPERMVARSF512,
28426 IX86_BUILTIN_VPERMVARSI512,
28427 IX86_BUILTIN_VTERNLOGD512_MASK,
28428 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28429 IX86_BUILTIN_VTERNLOGQ512_MASK,
28430 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28432 /* Mask arithmetic operations */
28433 IX86_BUILTIN_KAND16,
28434 IX86_BUILTIN_KANDN16,
28435 IX86_BUILTIN_KNOT16,
28436 IX86_BUILTIN_KOR16,
28437 IX86_BUILTIN_KORTESTC16,
28438 IX86_BUILTIN_KORTESTZ16,
28439 IX86_BUILTIN_KUNPCKBW,
28440 IX86_BUILTIN_KXNOR16,
28441 IX86_BUILTIN_KXOR16,
28442 IX86_BUILTIN_KMOV16,
28444 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28445 where all operands are 32-byte or 64-byte wide respectively. */
28446 IX86_BUILTIN_GATHERALTSIV4DF,
28447 IX86_BUILTIN_GATHERALTDIV8SF,
28448 IX86_BUILTIN_GATHERALTSIV4DI,
28449 IX86_BUILTIN_GATHERALTDIV8SI,
28450 IX86_BUILTIN_GATHER3ALTDIV16SF,
28451 IX86_BUILTIN_GATHER3ALTDIV16SI,
28452 IX86_BUILTIN_GATHER3ALTSIV8DF,
28453 IX86_BUILTIN_GATHER3ALTSIV8DI,
28454 IX86_BUILTIN_GATHER3DIV16SF,
28455 IX86_BUILTIN_GATHER3DIV16SI,
28456 IX86_BUILTIN_GATHER3DIV8DF,
28457 IX86_BUILTIN_GATHER3DIV8DI,
28458 IX86_BUILTIN_GATHER3SIV16SF,
28459 IX86_BUILTIN_GATHER3SIV16SI,
28460 IX86_BUILTIN_GATHER3SIV8DF,
28461 IX86_BUILTIN_GATHER3SIV8DI,
28462 IX86_BUILTIN_SCATTERDIV16SF,
28463 IX86_BUILTIN_SCATTERDIV16SI,
28464 IX86_BUILTIN_SCATTERDIV8DF,
28465 IX86_BUILTIN_SCATTERDIV8DI,
28466 IX86_BUILTIN_SCATTERSIV16SF,
28467 IX86_BUILTIN_SCATTERSIV16SI,
28468 IX86_BUILTIN_SCATTERSIV8DF,
28469 IX86_BUILTIN_SCATTERSIV8DI,
28471 /* AVX512PF */
28472 IX86_BUILTIN_GATHERPFQPD,
28473 IX86_BUILTIN_GATHERPFDPS,
28474 IX86_BUILTIN_GATHERPFDPD,
28475 IX86_BUILTIN_GATHERPFQPS,
28476 IX86_BUILTIN_SCATTERPFDPD,
28477 IX86_BUILTIN_SCATTERPFDPS,
28478 IX86_BUILTIN_SCATTERPFQPD,
28479 IX86_BUILTIN_SCATTERPFQPS,
28481 /* AVX-512ER */
28482 IX86_BUILTIN_EXP2PD_MASK,
28483 IX86_BUILTIN_EXP2PS_MASK,
28484 IX86_BUILTIN_EXP2PS,
28485 IX86_BUILTIN_RCP28PD,
28486 IX86_BUILTIN_RCP28PS,
28487 IX86_BUILTIN_RCP28SD,
28488 IX86_BUILTIN_RCP28SS,
28489 IX86_BUILTIN_RSQRT28PD,
28490 IX86_BUILTIN_RSQRT28PS,
28491 IX86_BUILTIN_RSQRT28SD,
28492 IX86_BUILTIN_RSQRT28SS,
28494 /* SHA builtins. */
28495 IX86_BUILTIN_SHA1MSG1,
28496 IX86_BUILTIN_SHA1MSG2,
28497 IX86_BUILTIN_SHA1NEXTE,
28498 IX86_BUILTIN_SHA1RNDS4,
28499 IX86_BUILTIN_SHA256MSG1,
28500 IX86_BUILTIN_SHA256MSG2,
28501 IX86_BUILTIN_SHA256RNDS2,
28503 /* TFmode support builtins. */
28504 IX86_BUILTIN_INFQ,
28505 IX86_BUILTIN_HUGE_VALQ,
28506 IX86_BUILTIN_FABSQ,
28507 IX86_BUILTIN_COPYSIGNQ,
28509 /* Vectorizer support builtins. */
28510 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28511 IX86_BUILTIN_CPYSGNPS,
28512 IX86_BUILTIN_CPYSGNPD,
28513 IX86_BUILTIN_CPYSGNPS256,
28514 IX86_BUILTIN_CPYSGNPS512,
28515 IX86_BUILTIN_CPYSGNPD256,
28516 IX86_BUILTIN_CPYSGNPD512,
28517 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28518 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28521 /* FMA4 instructions. */
28522 IX86_BUILTIN_VFMADDSS,
28523 IX86_BUILTIN_VFMADDSD,
28524 IX86_BUILTIN_VFMADDPS,
28525 IX86_BUILTIN_VFMADDPD,
28526 IX86_BUILTIN_VFMADDPS256,
28527 IX86_BUILTIN_VFMADDPD256,
28528 IX86_BUILTIN_VFMADDSUBPS,
28529 IX86_BUILTIN_VFMADDSUBPD,
28530 IX86_BUILTIN_VFMADDSUBPS256,
28531 IX86_BUILTIN_VFMADDSUBPD256,
28533 /* FMA3 instructions. */
28534 IX86_BUILTIN_VFMADDSS3,
28535 IX86_BUILTIN_VFMADDSD3,
28537 /* XOP instructions. */
28538 IX86_BUILTIN_VPCMOV,
28539 IX86_BUILTIN_VPCMOV_V2DI,
28540 IX86_BUILTIN_VPCMOV_V4SI,
28541 IX86_BUILTIN_VPCMOV_V8HI,
28542 IX86_BUILTIN_VPCMOV_V16QI,
28543 IX86_BUILTIN_VPCMOV_V4SF,
28544 IX86_BUILTIN_VPCMOV_V2DF,
28545 IX86_BUILTIN_VPCMOV256,
28546 IX86_BUILTIN_VPCMOV_V4DI256,
28547 IX86_BUILTIN_VPCMOV_V8SI256,
28548 IX86_BUILTIN_VPCMOV_V16HI256,
28549 IX86_BUILTIN_VPCMOV_V32QI256,
28550 IX86_BUILTIN_VPCMOV_V8SF256,
28551 IX86_BUILTIN_VPCMOV_V4DF256,
28553 IX86_BUILTIN_VPPERM,
28555 IX86_BUILTIN_VPMACSSWW,
28556 IX86_BUILTIN_VPMACSWW,
28557 IX86_BUILTIN_VPMACSSWD,
28558 IX86_BUILTIN_VPMACSWD,
28559 IX86_BUILTIN_VPMACSSDD,
28560 IX86_BUILTIN_VPMACSDD,
28561 IX86_BUILTIN_VPMACSSDQL,
28562 IX86_BUILTIN_VPMACSSDQH,
28563 IX86_BUILTIN_VPMACSDQL,
28564 IX86_BUILTIN_VPMACSDQH,
28565 IX86_BUILTIN_VPMADCSSWD,
28566 IX86_BUILTIN_VPMADCSWD,
28568 IX86_BUILTIN_VPHADDBW,
28569 IX86_BUILTIN_VPHADDBD,
28570 IX86_BUILTIN_VPHADDBQ,
28571 IX86_BUILTIN_VPHADDWD,
28572 IX86_BUILTIN_VPHADDWQ,
28573 IX86_BUILTIN_VPHADDDQ,
28574 IX86_BUILTIN_VPHADDUBW,
28575 IX86_BUILTIN_VPHADDUBD,
28576 IX86_BUILTIN_VPHADDUBQ,
28577 IX86_BUILTIN_VPHADDUWD,
28578 IX86_BUILTIN_VPHADDUWQ,
28579 IX86_BUILTIN_VPHADDUDQ,
28580 IX86_BUILTIN_VPHSUBBW,
28581 IX86_BUILTIN_VPHSUBWD,
28582 IX86_BUILTIN_VPHSUBDQ,
28584 IX86_BUILTIN_VPROTB,
28585 IX86_BUILTIN_VPROTW,
28586 IX86_BUILTIN_VPROTD,
28587 IX86_BUILTIN_VPROTQ,
28588 IX86_BUILTIN_VPROTB_IMM,
28589 IX86_BUILTIN_VPROTW_IMM,
28590 IX86_BUILTIN_VPROTD_IMM,
28591 IX86_BUILTIN_VPROTQ_IMM,
28593 IX86_BUILTIN_VPSHLB,
28594 IX86_BUILTIN_VPSHLW,
28595 IX86_BUILTIN_VPSHLD,
28596 IX86_BUILTIN_VPSHLQ,
28597 IX86_BUILTIN_VPSHAB,
28598 IX86_BUILTIN_VPSHAW,
28599 IX86_BUILTIN_VPSHAD,
28600 IX86_BUILTIN_VPSHAQ,
28602 IX86_BUILTIN_VFRCZSS,
28603 IX86_BUILTIN_VFRCZSD,
28604 IX86_BUILTIN_VFRCZPS,
28605 IX86_BUILTIN_VFRCZPD,
28606 IX86_BUILTIN_VFRCZPS256,
28607 IX86_BUILTIN_VFRCZPD256,
28609 IX86_BUILTIN_VPCOMEQUB,
28610 IX86_BUILTIN_VPCOMNEUB,
28611 IX86_BUILTIN_VPCOMLTUB,
28612 IX86_BUILTIN_VPCOMLEUB,
28613 IX86_BUILTIN_VPCOMGTUB,
28614 IX86_BUILTIN_VPCOMGEUB,
28615 IX86_BUILTIN_VPCOMFALSEUB,
28616 IX86_BUILTIN_VPCOMTRUEUB,
28618 IX86_BUILTIN_VPCOMEQUW,
28619 IX86_BUILTIN_VPCOMNEUW,
28620 IX86_BUILTIN_VPCOMLTUW,
28621 IX86_BUILTIN_VPCOMLEUW,
28622 IX86_BUILTIN_VPCOMGTUW,
28623 IX86_BUILTIN_VPCOMGEUW,
28624 IX86_BUILTIN_VPCOMFALSEUW,
28625 IX86_BUILTIN_VPCOMTRUEUW,
28627 IX86_BUILTIN_VPCOMEQUD,
28628 IX86_BUILTIN_VPCOMNEUD,
28629 IX86_BUILTIN_VPCOMLTUD,
28630 IX86_BUILTIN_VPCOMLEUD,
28631 IX86_BUILTIN_VPCOMGTUD,
28632 IX86_BUILTIN_VPCOMGEUD,
28633 IX86_BUILTIN_VPCOMFALSEUD,
28634 IX86_BUILTIN_VPCOMTRUEUD,
28636 IX86_BUILTIN_VPCOMEQUQ,
28637 IX86_BUILTIN_VPCOMNEUQ,
28638 IX86_BUILTIN_VPCOMLTUQ,
28639 IX86_BUILTIN_VPCOMLEUQ,
28640 IX86_BUILTIN_VPCOMGTUQ,
28641 IX86_BUILTIN_VPCOMGEUQ,
28642 IX86_BUILTIN_VPCOMFALSEUQ,
28643 IX86_BUILTIN_VPCOMTRUEUQ,
28645 IX86_BUILTIN_VPCOMEQB,
28646 IX86_BUILTIN_VPCOMNEB,
28647 IX86_BUILTIN_VPCOMLTB,
28648 IX86_BUILTIN_VPCOMLEB,
28649 IX86_BUILTIN_VPCOMGTB,
28650 IX86_BUILTIN_VPCOMGEB,
28651 IX86_BUILTIN_VPCOMFALSEB,
28652 IX86_BUILTIN_VPCOMTRUEB,
28654 IX86_BUILTIN_VPCOMEQW,
28655 IX86_BUILTIN_VPCOMNEW,
28656 IX86_BUILTIN_VPCOMLTW,
28657 IX86_BUILTIN_VPCOMLEW,
28658 IX86_BUILTIN_VPCOMGTW,
28659 IX86_BUILTIN_VPCOMGEW,
28660 IX86_BUILTIN_VPCOMFALSEW,
28661 IX86_BUILTIN_VPCOMTRUEW,
28663 IX86_BUILTIN_VPCOMEQD,
28664 IX86_BUILTIN_VPCOMNED,
28665 IX86_BUILTIN_VPCOMLTD,
28666 IX86_BUILTIN_VPCOMLED,
28667 IX86_BUILTIN_VPCOMGTD,
28668 IX86_BUILTIN_VPCOMGED,
28669 IX86_BUILTIN_VPCOMFALSED,
28670 IX86_BUILTIN_VPCOMTRUED,
28672 IX86_BUILTIN_VPCOMEQQ,
28673 IX86_BUILTIN_VPCOMNEQ,
28674 IX86_BUILTIN_VPCOMLTQ,
28675 IX86_BUILTIN_VPCOMLEQ,
28676 IX86_BUILTIN_VPCOMGTQ,
28677 IX86_BUILTIN_VPCOMGEQ,
28678 IX86_BUILTIN_VPCOMFALSEQ,
28679 IX86_BUILTIN_VPCOMTRUEQ,
28681 /* LWP instructions. */
28682 IX86_BUILTIN_LLWPCB,
28683 IX86_BUILTIN_SLWPCB,
28684 IX86_BUILTIN_LWPVAL32,
28685 IX86_BUILTIN_LWPVAL64,
28686 IX86_BUILTIN_LWPINS32,
28687 IX86_BUILTIN_LWPINS64,
28689 IX86_BUILTIN_CLZS,
28691 /* RTM */
28692 IX86_BUILTIN_XBEGIN,
28693 IX86_BUILTIN_XEND,
28694 IX86_BUILTIN_XABORT,
28695 IX86_BUILTIN_XTEST,
28697 /* BMI instructions. */
28698 IX86_BUILTIN_BEXTR32,
28699 IX86_BUILTIN_BEXTR64,
28700 IX86_BUILTIN_CTZS,
28702 /* TBM instructions. */
28703 IX86_BUILTIN_BEXTRI32,
28704 IX86_BUILTIN_BEXTRI64,
28706 /* BMI2 instructions. */
28707 IX86_BUILTIN_BZHI32,
28708 IX86_BUILTIN_BZHI64,
28709 IX86_BUILTIN_PDEP32,
28710 IX86_BUILTIN_PDEP64,
28711 IX86_BUILTIN_PEXT32,
28712 IX86_BUILTIN_PEXT64,
28714 /* ADX instructions. */
28715 IX86_BUILTIN_ADDCARRYX32,
28716 IX86_BUILTIN_ADDCARRYX64,
28718 /* FSGSBASE instructions. */
28719 IX86_BUILTIN_RDFSBASE32,
28720 IX86_BUILTIN_RDFSBASE64,
28721 IX86_BUILTIN_RDGSBASE32,
28722 IX86_BUILTIN_RDGSBASE64,
28723 IX86_BUILTIN_WRFSBASE32,
28724 IX86_BUILTIN_WRFSBASE64,
28725 IX86_BUILTIN_WRGSBASE32,
28726 IX86_BUILTIN_WRGSBASE64,
28728 /* RDRND instructions. */
28729 IX86_BUILTIN_RDRAND16_STEP,
28730 IX86_BUILTIN_RDRAND32_STEP,
28731 IX86_BUILTIN_RDRAND64_STEP,
28733 /* RDSEED instructions. */
28734 IX86_BUILTIN_RDSEED16_STEP,
28735 IX86_BUILTIN_RDSEED32_STEP,
28736 IX86_BUILTIN_RDSEED64_STEP,
28738 /* F16C instructions. */
28739 IX86_BUILTIN_CVTPH2PS,
28740 IX86_BUILTIN_CVTPH2PS256,
28741 IX86_BUILTIN_CVTPS2PH,
28742 IX86_BUILTIN_CVTPS2PH256,
28744 /* CFString built-in for darwin */
28745 IX86_BUILTIN_CFSTRING,
28747 /* Builtins to get CPU type and supported features. */
28748 IX86_BUILTIN_CPU_INIT,
28749 IX86_BUILTIN_CPU_IS,
28750 IX86_BUILTIN_CPU_SUPPORTS,
28752 /* Read/write FLAGS register built-ins. */
28753 IX86_BUILTIN_READ_FLAGS,
28754 IX86_BUILTIN_WRITE_FLAGS,
28756 IX86_BUILTIN_MAX
28759 /* Table for the ix86 builtin decls. */
28760 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28762 /* Table of all of the builtin functions that are possible with different ISA's
28763 but are waiting to be built until a function is declared to use that
28764 ISA. */
28765 struct builtin_isa {
28766 const char *name; /* function name */
28767 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28768 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28769 bool const_p; /* true if the declaration is constant */
28770 bool set_and_not_built_p;
28773 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28776 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28777 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28778 function decl in the ix86_builtins array. Returns the function decl or
28779 NULL_TREE, if the builtin was not added.
28781 If the front end has a special hook for builtin functions, delay adding
28782 builtin functions that aren't in the current ISA until the ISA is changed
28783 with function specific optimization. Doing so, can save about 300K for the
28784 default compiler. When the builtin is expanded, check at that time whether
28785 it is valid.
28787 If the front end doesn't have a special hook, record all builtins, even if
28788 it isn't an instruction set in the current ISA in case the user uses
28789 function specific options for a different ISA, so that we don't get scope
28790 errors if a builtin is added in the middle of a function scope. */
28792 static inline tree
28793 def_builtin (HOST_WIDE_INT mask, const char *name,
28794 enum ix86_builtin_func_type tcode,
28795 enum ix86_builtins code)
28797 tree decl = NULL_TREE;
28799 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28801 ix86_builtins_isa[(int) code].isa = mask;
28803 mask &= ~OPTION_MASK_ISA_64BIT;
28804 if (mask == 0
28805 || (mask & ix86_isa_flags) != 0
28806 || (lang_hooks.builtin_function
28807 == lang_hooks.builtin_function_ext_scope))
28810 tree type = ix86_get_builtin_func_type (tcode);
28811 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28812 NULL, NULL_TREE);
28813 ix86_builtins[(int) code] = decl;
28814 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28816 else
28818 ix86_builtins[(int) code] = NULL_TREE;
28819 ix86_builtins_isa[(int) code].tcode = tcode;
28820 ix86_builtins_isa[(int) code].name = name;
28821 ix86_builtins_isa[(int) code].const_p = false;
28822 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28826 return decl;
28829 /* Like def_builtin, but also marks the function decl "const". */
28831 static inline tree
28832 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28833 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28835 tree decl = def_builtin (mask, name, tcode, code);
28836 if (decl)
28837 TREE_READONLY (decl) = 1;
28838 else
28839 ix86_builtins_isa[(int) code].const_p = true;
28841 return decl;
28844 /* Add any new builtin functions for a given ISA that may not have been
28845 declared. This saves a bit of space compared to adding all of the
28846 declarations to the tree, even if we didn't use them. */
28848 static void
28849 ix86_add_new_builtins (HOST_WIDE_INT isa)
28851 int i;
28853 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28855 if ((ix86_builtins_isa[i].isa & isa) != 0
28856 && ix86_builtins_isa[i].set_and_not_built_p)
28858 tree decl, type;
28860 /* Don't define the builtin again. */
28861 ix86_builtins_isa[i].set_and_not_built_p = false;
28863 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28864 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28865 type, i, BUILT_IN_MD, NULL,
28866 NULL_TREE);
28868 ix86_builtins[i] = decl;
28869 if (ix86_builtins_isa[i].const_p)
28870 TREE_READONLY (decl) = 1;
28875 /* Bits for builtin_description.flag. */
28877 /* Set when we don't support the comparison natively, and should
28878 swap_comparison in order to support it. */
28879 #define BUILTIN_DESC_SWAP_OPERANDS 1
28881 struct builtin_description
28883 const HOST_WIDE_INT mask;
28884 const enum insn_code icode;
28885 const char *const name;
28886 const enum ix86_builtins code;
28887 const enum rtx_code comparison;
28888 const int flag;
28891 static const struct builtin_description bdesc_comi[] =
28893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28919 static const struct builtin_description bdesc_pcmpestr[] =
28921 /* SSE4.2 */
28922 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28923 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28924 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28925 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28926 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28927 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28928 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28931 static const struct builtin_description bdesc_pcmpistr[] =
28933 /* SSE4.2 */
28934 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28935 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28936 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28937 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28938 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28939 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28940 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28943 /* Special builtins with variable number of arguments. */
28944 static const struct builtin_description bdesc_special_args[] =
28946 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28947 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28948 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28950 /* 80387 (for use internally for atomic compound assignment). */
28951 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28952 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28953 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28954 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28956 /* MMX */
28957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28959 /* 3DNow! */
28960 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28962 /* FXSR, XSAVE and XSAVEOPT */
28963 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28964 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
28965 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28966 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28967 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28969 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28970 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28971 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28972 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28973 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28975 /* SSE */
28976 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28977 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28981 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28985 /* SSE or 3DNow!A */
28986 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28987 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
28989 /* SSE2 */
28990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
28994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
28996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
28997 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
28998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
28999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29004 /* SSE3 */
29005 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29007 /* SSE4.1 */
29008 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29010 /* SSE4A */
29011 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29012 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29014 /* AVX */
29015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29018 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29019 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29020 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29045 /* AVX2 */
29046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29049 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29051 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29052 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29053 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29054 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29056 /* AVX512F */
29057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29105 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29106 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29107 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29108 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29109 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29110 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29112 /* FSGSBASE */
29113 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29114 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29115 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29116 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29117 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29118 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29119 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29120 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29122 /* RTM */
29123 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29124 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29125 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29128 /* Builtins with variable number of arguments. */
29129 static const struct builtin_description bdesc_args[] =
29131 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29132 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29133 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29134 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29135 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29136 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29137 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29139 /* MMX */
29140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29203 /* 3DNow! */
29204 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29205 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29206 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29207 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29209 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29210 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29211 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29212 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29213 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29214 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29215 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29216 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29217 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29218 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29219 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29220 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29221 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29222 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29223 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29225 /* 3DNow!A */
29226 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29227 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29228 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29229 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29230 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29231 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29233 /* SSE */
29234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29236 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29238 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29242 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29245 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29249 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29250 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29251 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29280 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29287 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29289 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29299 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29310 /* SSE MMX or 3Dnow!A */
29311 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29312 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29313 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29315 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29316 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29317 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29318 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29320 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29321 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29323 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29325 /* SSE2 */
29326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29332 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29344 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29345 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29349 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29351 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29352 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29353 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29354 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29462 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29495 /* SSE2 MMX */
29496 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29499 /* SSE3 */
29500 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29501 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29503 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29504 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29505 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29506 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29507 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29508 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29510 /* SSSE3 */
29511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29523 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29524 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29526 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29527 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29528 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29529 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29530 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29531 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29532 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29533 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29534 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29535 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29536 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29537 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29538 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29539 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29540 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29541 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29543 /* SSSE3. */
29544 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29545 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29547 /* SSE4.1 */
29548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29563 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29565 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29566 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29567 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29568 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29569 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29570 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29571 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29573 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29574 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29575 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29576 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29577 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29578 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29579 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29580 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29581 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29582 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29583 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29584 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29586 /* SSE4.1 */
29587 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29588 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29589 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29590 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29592 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29593 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29594 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29595 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29597 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29598 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29600 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29601 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29603 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29604 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29605 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29606 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29608 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29609 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29611 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29612 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29614 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29615 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29616 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29618 /* SSE4.2 */
29619 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29620 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29621 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29622 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29623 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29625 /* SSE4A */
29626 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29627 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29628 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29629 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29631 /* AES */
29632 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29633 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29635 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29636 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29637 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29638 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29640 /* PCLMUL */
29641 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29643 /* AVX */
29644 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29645 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29648 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29649 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29652 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29658 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29659 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29660 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29661 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29662 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29663 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29664 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29665 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29666 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29667 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29668 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29669 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29784 /* AVX2 */
29785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29786 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29787 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29788 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29793 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29794 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29795 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29796 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29802 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29804 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29805 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29806 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29807 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29808 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29824 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29825 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29826 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29827 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29932 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29934 /* BMI */
29935 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29936 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29937 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29939 /* TBM */
29940 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29941 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29943 /* F16C */
29944 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29945 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29946 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29947 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29949 /* BMI2 */
29950 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29951 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29952 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29953 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29954 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29955 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29957 /* AVX512F */
29958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
29960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
29966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
29968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
29969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
29971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
29972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
29976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
29978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
29983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
29984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
29985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
29986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
29987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
29988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
29989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
29990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30007 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30008 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30010 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30011 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30119 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30120 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30121 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30122 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30154 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30159 /* Mask arithmetic operations */
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30171 /* SHA */
30172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30181 /* Builtins with rounding support. */
30182 static const struct builtin_description bdesc_round_args[] =
30184 /* AVX512F */
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30204 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30206 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30213 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30215 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30265 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30267 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30269 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30271 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30273 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30275 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30277 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30279 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30305 /* AVX512ER */
30306 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30307 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30308 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30309 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30310 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30311 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30312 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30313 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30314 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30315 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30318 /* FMA4 and XOP. */
30319 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30320 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30321 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30322 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30323 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30324 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30325 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30326 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30327 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30328 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30329 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30330 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30331 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30332 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30333 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30334 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30335 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30336 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30337 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30338 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30339 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30340 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30341 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30342 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30343 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30344 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30345 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30346 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30347 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30348 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30349 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30350 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30351 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30352 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30353 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30354 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30355 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30356 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30357 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30358 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30359 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30360 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30361 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30362 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30363 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30364 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30365 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30366 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30367 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30368 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30369 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30370 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30372 static const struct builtin_description bdesc_multi_arg[] =
30374 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30375 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30376 UNKNOWN, (int)MULTI_ARG_3_SF },
30377 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30378 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30379 UNKNOWN, (int)MULTI_ARG_3_DF },
30381 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30382 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30383 UNKNOWN, (int)MULTI_ARG_3_SF },
30384 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30385 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30386 UNKNOWN, (int)MULTI_ARG_3_DF },
30388 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30389 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30390 UNKNOWN, (int)MULTI_ARG_3_SF },
30391 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30392 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30393 UNKNOWN, (int)MULTI_ARG_3_DF },
30394 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30395 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30396 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30397 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30398 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30399 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30401 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30402 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30403 UNKNOWN, (int)MULTI_ARG_3_SF },
30404 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30405 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30406 UNKNOWN, (int)MULTI_ARG_3_DF },
30407 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30408 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30409 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30410 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30411 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30412 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30574 /* TM vector builtins. */
30576 /* Reuse the existing x86-specific `struct builtin_description' cause
30577 we're lazy. Add casts to make them fit. */
30578 static const struct builtin_description bdesc_tm[] =
30580 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30581 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30582 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30583 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30584 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30585 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30586 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30588 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30589 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30590 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30591 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30592 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30593 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30594 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30596 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30597 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30598 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30599 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30600 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30601 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30602 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30604 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30605 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30606 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30609 /* TM callbacks. */
30611 /* Return the builtin decl needed to load a vector of TYPE. */
30613 static tree
30614 ix86_builtin_tm_load (tree type)
30616 if (TREE_CODE (type) == VECTOR_TYPE)
30618 switch (tree_to_uhwi (TYPE_SIZE (type)))
30620 case 64:
30621 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30622 case 128:
30623 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30624 case 256:
30625 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30628 return NULL_TREE;
30631 /* Return the builtin decl needed to store a vector of TYPE. */
30633 static tree
30634 ix86_builtin_tm_store (tree type)
30636 if (TREE_CODE (type) == VECTOR_TYPE)
30638 switch (tree_to_uhwi (TYPE_SIZE (type)))
30640 case 64:
30641 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30642 case 128:
30643 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30644 case 256:
30645 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30648 return NULL_TREE;
30651 /* Initialize the transactional memory vector load/store builtins. */
30653 static void
30654 ix86_init_tm_builtins (void)
30656 enum ix86_builtin_func_type ftype;
30657 const struct builtin_description *d;
30658 size_t i;
30659 tree decl;
30660 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30661 tree attrs_log, attrs_type_log;
30663 if (!flag_tm)
30664 return;
30666 /* If there are no builtins defined, we must be compiling in a
30667 language without trans-mem support. */
30668 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30669 return;
30671 /* Use whatever attributes a normal TM load has. */
30672 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30673 attrs_load = DECL_ATTRIBUTES (decl);
30674 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30675 /* Use whatever attributes a normal TM store has. */
30676 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30677 attrs_store = DECL_ATTRIBUTES (decl);
30678 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30679 /* Use whatever attributes a normal TM log has. */
30680 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30681 attrs_log = DECL_ATTRIBUTES (decl);
30682 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30684 for (i = 0, d = bdesc_tm;
30685 i < ARRAY_SIZE (bdesc_tm);
30686 i++, d++)
30688 if ((d->mask & ix86_isa_flags) != 0
30689 || (lang_hooks.builtin_function
30690 == lang_hooks.builtin_function_ext_scope))
30692 tree type, attrs, attrs_type;
30693 enum built_in_function code = (enum built_in_function) d->code;
30695 ftype = (enum ix86_builtin_func_type) d->flag;
30696 type = ix86_get_builtin_func_type (ftype);
30698 if (BUILTIN_TM_LOAD_P (code))
30700 attrs = attrs_load;
30701 attrs_type = attrs_type_load;
30703 else if (BUILTIN_TM_STORE_P (code))
30705 attrs = attrs_store;
30706 attrs_type = attrs_type_store;
30708 else
30710 attrs = attrs_log;
30711 attrs_type = attrs_type_log;
30713 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30714 /* The builtin without the prefix for
30715 calling it directly. */
30716 d->name + strlen ("__builtin_"),
30717 attrs);
30718 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30719 set the TYPE_ATTRIBUTES. */
30720 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30722 set_builtin_decl (code, decl, false);
30727 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30728 in the current target ISA to allow the user to compile particular modules
30729 with different target specific options that differ from the command line
30730 options. */
30731 static void
30732 ix86_init_mmx_sse_builtins (void)
30734 const struct builtin_description * d;
30735 enum ix86_builtin_func_type ftype;
30736 size_t i;
30738 /* Add all special builtins with variable number of operands. */
30739 for (i = 0, d = bdesc_special_args;
30740 i < ARRAY_SIZE (bdesc_special_args);
30741 i++, d++)
30743 if (d->name == 0)
30744 continue;
30746 ftype = (enum ix86_builtin_func_type) d->flag;
30747 def_builtin (d->mask, d->name, ftype, d->code);
30750 /* Add all builtins with variable number of operands. */
30751 for (i = 0, d = bdesc_args;
30752 i < ARRAY_SIZE (bdesc_args);
30753 i++, d++)
30755 if (d->name == 0)
30756 continue;
30758 ftype = (enum ix86_builtin_func_type) d->flag;
30759 def_builtin_const (d->mask, d->name, ftype, d->code);
30762 /* Add all builtins with rounding. */
30763 for (i = 0, d = bdesc_round_args;
30764 i < ARRAY_SIZE (bdesc_round_args);
30765 i++, d++)
30767 if (d->name == 0)
30768 continue;
30770 ftype = (enum ix86_builtin_func_type) d->flag;
30771 def_builtin_const (d->mask, d->name, ftype, d->code);
30774 /* pcmpestr[im] insns. */
30775 for (i = 0, d = bdesc_pcmpestr;
30776 i < ARRAY_SIZE (bdesc_pcmpestr);
30777 i++, d++)
30779 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30780 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30781 else
30782 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30783 def_builtin_const (d->mask, d->name, ftype, d->code);
30786 /* pcmpistr[im] insns. */
30787 for (i = 0, d = bdesc_pcmpistr;
30788 i < ARRAY_SIZE (bdesc_pcmpistr);
30789 i++, d++)
30791 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30792 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30793 else
30794 ftype = INT_FTYPE_V16QI_V16QI_INT;
30795 def_builtin_const (d->mask, d->name, ftype, d->code);
30798 /* comi/ucomi insns. */
30799 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30801 if (d->mask == OPTION_MASK_ISA_SSE2)
30802 ftype = INT_FTYPE_V2DF_V2DF;
30803 else
30804 ftype = INT_FTYPE_V4SF_V4SF;
30805 def_builtin_const (d->mask, d->name, ftype, d->code);
30808 /* SSE */
30809 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30810 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30811 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30812 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30814 /* SSE or 3DNow!A */
30815 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30816 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30817 IX86_BUILTIN_MASKMOVQ);
30819 /* SSE2 */
30820 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30821 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30823 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30824 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30825 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30826 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30828 /* SSE3. */
30829 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30830 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30831 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30832 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30834 /* AES */
30835 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30836 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30837 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30838 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30839 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30840 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30841 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30842 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30843 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30844 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30845 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30846 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30848 /* PCLMUL */
30849 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30850 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30852 /* RDRND */
30853 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30854 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30855 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30856 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30857 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30858 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30859 IX86_BUILTIN_RDRAND64_STEP);
30861 /* AVX2 */
30862 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30863 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30864 IX86_BUILTIN_GATHERSIV2DF);
30866 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30867 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30868 IX86_BUILTIN_GATHERSIV4DF);
30870 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30871 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30872 IX86_BUILTIN_GATHERDIV2DF);
30874 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30875 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30876 IX86_BUILTIN_GATHERDIV4DF);
30878 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30879 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30880 IX86_BUILTIN_GATHERSIV4SF);
30882 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30883 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30884 IX86_BUILTIN_GATHERSIV8SF);
30886 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30887 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30888 IX86_BUILTIN_GATHERDIV4SF);
30890 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30891 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30892 IX86_BUILTIN_GATHERDIV8SF);
30894 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30895 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30896 IX86_BUILTIN_GATHERSIV2DI);
30898 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30899 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30900 IX86_BUILTIN_GATHERSIV4DI);
30902 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30903 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30904 IX86_BUILTIN_GATHERDIV2DI);
30906 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30907 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30908 IX86_BUILTIN_GATHERDIV4DI);
30910 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30911 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30912 IX86_BUILTIN_GATHERSIV4SI);
30914 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30915 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30916 IX86_BUILTIN_GATHERSIV8SI);
30918 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30919 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30920 IX86_BUILTIN_GATHERDIV4SI);
30922 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30923 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30924 IX86_BUILTIN_GATHERDIV8SI);
30926 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30927 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30928 IX86_BUILTIN_GATHERALTSIV4DF);
30930 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30931 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30932 IX86_BUILTIN_GATHERALTDIV8SF);
30934 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30935 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30936 IX86_BUILTIN_GATHERALTSIV4DI);
30938 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30939 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30940 IX86_BUILTIN_GATHERALTDIV8SI);
30942 /* AVX512F */
30943 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30944 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30945 IX86_BUILTIN_GATHER3SIV16SF);
30947 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30948 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30949 IX86_BUILTIN_GATHER3SIV8DF);
30951 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30952 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30953 IX86_BUILTIN_GATHER3DIV16SF);
30955 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30956 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30957 IX86_BUILTIN_GATHER3DIV8DF);
30959 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30960 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
30961 IX86_BUILTIN_GATHER3SIV16SI);
30963 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30964 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
30965 IX86_BUILTIN_GATHER3SIV8DI);
30967 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30968 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
30969 IX86_BUILTIN_GATHER3DIV16SI);
30971 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30972 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
30973 IX86_BUILTIN_GATHER3DIV8DI);
30975 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30976 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30977 IX86_BUILTIN_GATHER3ALTSIV8DF);
30979 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30980 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30981 IX86_BUILTIN_GATHER3ALTDIV16SF);
30983 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30984 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30985 IX86_BUILTIN_GATHER3ALTSIV8DI);
30987 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30988 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30989 IX86_BUILTIN_GATHER3ALTDIV16SI);
30991 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30992 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
30993 IX86_BUILTIN_SCATTERSIV16SF);
30995 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30996 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
30997 IX86_BUILTIN_SCATTERSIV8DF);
30999 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31000 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31001 IX86_BUILTIN_SCATTERDIV16SF);
31003 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31004 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31005 IX86_BUILTIN_SCATTERDIV8DF);
31007 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31008 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31009 IX86_BUILTIN_SCATTERSIV16SI);
31011 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31012 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31013 IX86_BUILTIN_SCATTERSIV8DI);
31015 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31016 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31017 IX86_BUILTIN_SCATTERDIV16SI);
31019 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31020 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31021 IX86_BUILTIN_SCATTERDIV8DI);
31023 /* AVX512PF */
31024 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31025 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31026 IX86_BUILTIN_GATHERPFDPD);
31027 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31028 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31029 IX86_BUILTIN_GATHERPFDPS);
31030 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31031 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31032 IX86_BUILTIN_GATHERPFQPD);
31033 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31034 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31035 IX86_BUILTIN_GATHERPFQPS);
31036 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31037 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31038 IX86_BUILTIN_SCATTERPFDPD);
31039 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31040 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31041 IX86_BUILTIN_SCATTERPFDPS);
31042 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31043 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31044 IX86_BUILTIN_SCATTERPFQPD);
31045 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31046 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31047 IX86_BUILTIN_SCATTERPFQPS);
31049 /* SHA */
31050 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31051 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31052 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31053 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31054 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31055 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31056 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31057 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31058 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31059 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31060 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31061 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31062 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31063 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31065 /* RTM. */
31066 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31067 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31069 /* MMX access to the vec_init patterns. */
31070 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31071 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31073 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31074 V4HI_FTYPE_HI_HI_HI_HI,
31075 IX86_BUILTIN_VEC_INIT_V4HI);
31077 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31078 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31079 IX86_BUILTIN_VEC_INIT_V8QI);
31081 /* Access to the vec_extract patterns. */
31082 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31083 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31084 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31085 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31086 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31087 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31088 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31089 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31090 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31091 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31093 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31094 "__builtin_ia32_vec_ext_v4hi",
31095 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31097 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31098 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31100 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31101 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31103 /* Access to the vec_set patterns. */
31104 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31105 "__builtin_ia32_vec_set_v2di",
31106 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31108 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31109 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31111 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31112 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31114 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31115 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31117 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31118 "__builtin_ia32_vec_set_v4hi",
31119 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31121 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31122 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31124 /* RDSEED */
31125 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31126 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31127 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31128 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31129 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31130 "__builtin_ia32_rdseed_di_step",
31131 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31133 /* ADCX */
31134 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31135 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31136 def_builtin (OPTION_MASK_ISA_64BIT,
31137 "__builtin_ia32_addcarryx_u64",
31138 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31139 IX86_BUILTIN_ADDCARRYX64);
31141 /* Read/write FLAGS. */
31142 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31143 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31144 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31145 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31146 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31147 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31148 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31149 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31152 /* Add FMA4 multi-arg argument instructions */
31153 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31155 if (d->name == 0)
31156 continue;
31158 ftype = (enum ix86_builtin_func_type) d->flag;
31159 def_builtin_const (d->mask, d->name, ftype, d->code);
31163 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31164 to return a pointer to VERSION_DECL if the outcome of the expression
31165 formed by PREDICATE_CHAIN is true. This function will be called during
31166 version dispatch to decide which function version to execute. It returns
31167 the basic block at the end, to which more conditions can be added. */
31169 static basic_block
31170 add_condition_to_bb (tree function_decl, tree version_decl,
31171 tree predicate_chain, basic_block new_bb)
31173 gimple return_stmt;
31174 tree convert_expr, result_var;
31175 gimple convert_stmt;
31176 gimple call_cond_stmt;
31177 gimple if_else_stmt;
31179 basic_block bb1, bb2, bb3;
31180 edge e12, e23;
31182 tree cond_var, and_expr_var = NULL_TREE;
31183 gimple_seq gseq;
31185 tree predicate_decl, predicate_arg;
31187 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31189 gcc_assert (new_bb != NULL);
31190 gseq = bb_seq (new_bb);
31193 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31194 build_fold_addr_expr (version_decl));
31195 result_var = create_tmp_var (ptr_type_node, NULL);
31196 convert_stmt = gimple_build_assign (result_var, convert_expr);
31197 return_stmt = gimple_build_return (result_var);
31199 if (predicate_chain == NULL_TREE)
31201 gimple_seq_add_stmt (&gseq, convert_stmt);
31202 gimple_seq_add_stmt (&gseq, return_stmt);
31203 set_bb_seq (new_bb, gseq);
31204 gimple_set_bb (convert_stmt, new_bb);
31205 gimple_set_bb (return_stmt, new_bb);
31206 pop_cfun ();
31207 return new_bb;
31210 while (predicate_chain != NULL)
31212 cond_var = create_tmp_var (integer_type_node, NULL);
31213 predicate_decl = TREE_PURPOSE (predicate_chain);
31214 predicate_arg = TREE_VALUE (predicate_chain);
31215 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31216 gimple_call_set_lhs (call_cond_stmt, cond_var);
31218 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31219 gimple_set_bb (call_cond_stmt, new_bb);
31220 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31222 predicate_chain = TREE_CHAIN (predicate_chain);
31224 if (and_expr_var == NULL)
31225 and_expr_var = cond_var;
31226 else
31228 gimple assign_stmt;
31229 /* Use MIN_EXPR to check if any integer is zero?.
31230 and_expr_var = min_expr <cond_var, and_expr_var> */
31231 assign_stmt = gimple_build_assign (and_expr_var,
31232 build2 (MIN_EXPR, integer_type_node,
31233 cond_var, and_expr_var));
31235 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31236 gimple_set_bb (assign_stmt, new_bb);
31237 gimple_seq_add_stmt (&gseq, assign_stmt);
31241 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31242 integer_zero_node,
31243 NULL_TREE, NULL_TREE);
31244 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31245 gimple_set_bb (if_else_stmt, new_bb);
31246 gimple_seq_add_stmt (&gseq, if_else_stmt);
31248 gimple_seq_add_stmt (&gseq, convert_stmt);
31249 gimple_seq_add_stmt (&gseq, return_stmt);
31250 set_bb_seq (new_bb, gseq);
31252 bb1 = new_bb;
31253 e12 = split_block (bb1, if_else_stmt);
31254 bb2 = e12->dest;
31255 e12->flags &= ~EDGE_FALLTHRU;
31256 e12->flags |= EDGE_TRUE_VALUE;
31258 e23 = split_block (bb2, return_stmt);
31260 gimple_set_bb (convert_stmt, bb2);
31261 gimple_set_bb (return_stmt, bb2);
31263 bb3 = e23->dest;
31264 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31266 remove_edge (e23);
31267 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31269 pop_cfun ();
31271 return bb3;
31274 /* This parses the attribute arguments to target in DECL and determines
31275 the right builtin to use to match the platform specification.
31276 It returns the priority value for this version decl. If PREDICATE_LIST
31277 is not NULL, it stores the list of cpu features that need to be checked
31278 before dispatching this function. */
31280 static unsigned int
31281 get_builtin_code_for_version (tree decl, tree *predicate_list)
31283 tree attrs;
31284 struct cl_target_option cur_target;
31285 tree target_node;
31286 struct cl_target_option *new_target;
31287 const char *arg_str = NULL;
31288 const char *attrs_str = NULL;
31289 char *tok_str = NULL;
31290 char *token;
31292 /* Priority of i386 features, greater value is higher priority. This is
31293 used to decide the order in which function dispatch must happen. For
31294 instance, a version specialized for SSE4.2 should be checked for dispatch
31295 before a version for SSE3, as SSE4.2 implies SSE3. */
31296 enum feature_priority
31298 P_ZERO = 0,
31299 P_MMX,
31300 P_SSE,
31301 P_SSE2,
31302 P_SSE3,
31303 P_SSSE3,
31304 P_PROC_SSSE3,
31305 P_SSE4_A,
31306 P_PROC_SSE4_A,
31307 P_SSE4_1,
31308 P_SSE4_2,
31309 P_PROC_SSE4_2,
31310 P_POPCNT,
31311 P_AVX,
31312 P_PROC_AVX,
31313 P_FMA4,
31314 P_XOP,
31315 P_PROC_XOP,
31316 P_FMA,
31317 P_PROC_FMA,
31318 P_AVX2,
31319 P_PROC_AVX2
31322 enum feature_priority priority = P_ZERO;
31324 /* These are the target attribute strings for which a dispatcher is
31325 available, from fold_builtin_cpu. */
31327 static struct _feature_list
31329 const char *const name;
31330 const enum feature_priority priority;
31332 const feature_list[] =
31334 {"mmx", P_MMX},
31335 {"sse", P_SSE},
31336 {"sse2", P_SSE2},
31337 {"sse3", P_SSE3},
31338 {"sse4a", P_SSE4_A},
31339 {"ssse3", P_SSSE3},
31340 {"sse4.1", P_SSE4_1},
31341 {"sse4.2", P_SSE4_2},
31342 {"popcnt", P_POPCNT},
31343 {"avx", P_AVX},
31344 {"fma4", P_FMA4},
31345 {"xop", P_XOP},
31346 {"fma", P_FMA},
31347 {"avx2", P_AVX2}
31351 static unsigned int NUM_FEATURES
31352 = sizeof (feature_list) / sizeof (struct _feature_list);
31354 unsigned int i;
31356 tree predicate_chain = NULL_TREE;
31357 tree predicate_decl, predicate_arg;
31359 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31360 gcc_assert (attrs != NULL);
31362 attrs = TREE_VALUE (TREE_VALUE (attrs));
31364 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31365 attrs_str = TREE_STRING_POINTER (attrs);
31367 /* Return priority zero for default function. */
31368 if (strcmp (attrs_str, "default") == 0)
31369 return 0;
31371 /* Handle arch= if specified. For priority, set it to be 1 more than
31372 the best instruction set the processor can handle. For instance, if
31373 there is a version for atom and a version for ssse3 (the highest ISA
31374 priority for atom), the atom version must be checked for dispatch
31375 before the ssse3 version. */
31376 if (strstr (attrs_str, "arch=") != NULL)
31378 cl_target_option_save (&cur_target, &global_options);
31379 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31380 &global_options_set);
31382 gcc_assert (target_node);
31383 new_target = TREE_TARGET_OPTION (target_node);
31384 gcc_assert (new_target);
31386 if (new_target->arch_specified && new_target->arch > 0)
31388 switch (new_target->arch)
31390 case PROCESSOR_CORE2:
31391 arg_str = "core2";
31392 priority = P_PROC_SSSE3;
31393 break;
31394 case PROCESSOR_NEHALEM:
31395 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31396 arg_str = "westmere";
31397 else
31398 /* We translate "arch=corei7" and "arch=nehalem" to
31399 "corei7" so that it will be mapped to M_INTEL_COREI7
31400 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31401 arg_str = "corei7";
31402 priority = P_PROC_SSE4_2;
31403 break;
31404 case PROCESSOR_SANDYBRIDGE:
31405 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31406 arg_str = "ivybridge";
31407 else
31408 arg_str = "sandybridge";
31409 priority = P_PROC_AVX;
31410 break;
31411 case PROCESSOR_HASWELL:
31412 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31413 arg_str = "broadwell";
31414 else
31415 arg_str = "haswell";
31416 priority = P_PROC_AVX2;
31417 break;
31418 case PROCESSOR_BONNELL:
31419 arg_str = "bonnell";
31420 priority = P_PROC_SSSE3;
31421 break;
31422 case PROCESSOR_SILVERMONT:
31423 arg_str = "silvermont";
31424 priority = P_PROC_SSE4_2;
31425 break;
31426 case PROCESSOR_AMDFAM10:
31427 arg_str = "amdfam10h";
31428 priority = P_PROC_SSE4_A;
31429 break;
31430 case PROCESSOR_BTVER1:
31431 arg_str = "btver1";
31432 priority = P_PROC_SSE4_A;
31433 break;
31434 case PROCESSOR_BTVER2:
31435 arg_str = "btver2";
31436 priority = P_PROC_AVX;
31437 break;
31438 case PROCESSOR_BDVER1:
31439 arg_str = "bdver1";
31440 priority = P_PROC_XOP;
31441 break;
31442 case PROCESSOR_BDVER2:
31443 arg_str = "bdver2";
31444 priority = P_PROC_FMA;
31445 break;
31446 case PROCESSOR_BDVER3:
31447 arg_str = "bdver3";
31448 priority = P_PROC_FMA;
31449 break;
31450 case PROCESSOR_BDVER4:
31451 arg_str = "bdver4";
31452 priority = P_PROC_AVX2;
31453 break;
31457 cl_target_option_restore (&global_options, &cur_target);
31459 if (predicate_list && arg_str == NULL)
31461 error_at (DECL_SOURCE_LOCATION (decl),
31462 "No dispatcher found for the versioning attributes");
31463 return 0;
31466 if (predicate_list)
31468 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31469 /* For a C string literal the length includes the trailing NULL. */
31470 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31471 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31472 predicate_chain);
31476 /* Process feature name. */
31477 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31478 strcpy (tok_str, attrs_str);
31479 token = strtok (tok_str, ",");
31480 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31482 while (token != NULL)
31484 /* Do not process "arch=" */
31485 if (strncmp (token, "arch=", 5) == 0)
31487 token = strtok (NULL, ",");
31488 continue;
31490 for (i = 0; i < NUM_FEATURES; ++i)
31492 if (strcmp (token, feature_list[i].name) == 0)
31494 if (predicate_list)
31496 predicate_arg = build_string_literal (
31497 strlen (feature_list[i].name) + 1,
31498 feature_list[i].name);
31499 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31500 predicate_chain);
31502 /* Find the maximum priority feature. */
31503 if (feature_list[i].priority > priority)
31504 priority = feature_list[i].priority;
31506 break;
31509 if (predicate_list && i == NUM_FEATURES)
31511 error_at (DECL_SOURCE_LOCATION (decl),
31512 "No dispatcher found for %s", token);
31513 return 0;
31515 token = strtok (NULL, ",");
31517 free (tok_str);
31519 if (predicate_list && predicate_chain == NULL_TREE)
31521 error_at (DECL_SOURCE_LOCATION (decl),
31522 "No dispatcher found for the versioning attributes : %s",
31523 attrs_str);
31524 return 0;
31526 else if (predicate_list)
31528 predicate_chain = nreverse (predicate_chain);
31529 *predicate_list = predicate_chain;
31532 return priority;
31535 /* This compares the priority of target features in function DECL1
31536 and DECL2. It returns positive value if DECL1 is higher priority,
31537 negative value if DECL2 is higher priority and 0 if they are the
31538 same. */
31540 static int
31541 ix86_compare_version_priority (tree decl1, tree decl2)
31543 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31544 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31546 return (int)priority1 - (int)priority2;
31549 /* V1 and V2 point to function versions with different priorities
31550 based on the target ISA. This function compares their priorities. */
31552 static int
31553 feature_compare (const void *v1, const void *v2)
31555 typedef struct _function_version_info
31557 tree version_decl;
31558 tree predicate_chain;
31559 unsigned int dispatch_priority;
31560 } function_version_info;
31562 const function_version_info c1 = *(const function_version_info *)v1;
31563 const function_version_info c2 = *(const function_version_info *)v2;
31564 return (c2.dispatch_priority - c1.dispatch_priority);
31567 /* This function generates the dispatch function for
31568 multi-versioned functions. DISPATCH_DECL is the function which will
31569 contain the dispatch logic. FNDECLS are the function choices for
31570 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31571 in DISPATCH_DECL in which the dispatch code is generated. */
31573 static int
31574 dispatch_function_versions (tree dispatch_decl,
31575 void *fndecls_p,
31576 basic_block *empty_bb)
31578 tree default_decl;
31579 gimple ifunc_cpu_init_stmt;
31580 gimple_seq gseq;
31581 int ix;
31582 tree ele;
31583 vec<tree> *fndecls;
31584 unsigned int num_versions = 0;
31585 unsigned int actual_versions = 0;
31586 unsigned int i;
31588 struct _function_version_info
31590 tree version_decl;
31591 tree predicate_chain;
31592 unsigned int dispatch_priority;
31593 }*function_version_info;
31595 gcc_assert (dispatch_decl != NULL
31596 && fndecls_p != NULL
31597 && empty_bb != NULL);
31599 /*fndecls_p is actually a vector. */
31600 fndecls = static_cast<vec<tree> *> (fndecls_p);
31602 /* At least one more version other than the default. */
31603 num_versions = fndecls->length ();
31604 gcc_assert (num_versions >= 2);
31606 function_version_info = (struct _function_version_info *)
31607 XNEWVEC (struct _function_version_info, (num_versions - 1));
31609 /* The first version in the vector is the default decl. */
31610 default_decl = (*fndecls)[0];
31612 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31614 gseq = bb_seq (*empty_bb);
31615 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31616 constructors, so explicity call __builtin_cpu_init here. */
31617 ifunc_cpu_init_stmt = gimple_build_call_vec (
31618 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31619 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31620 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31621 set_bb_seq (*empty_bb, gseq);
31623 pop_cfun ();
31626 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31628 tree version_decl = ele;
31629 tree predicate_chain = NULL_TREE;
31630 unsigned int priority;
31631 /* Get attribute string, parse it and find the right predicate decl.
31632 The predicate function could be a lengthy combination of many
31633 features, like arch-type and various isa-variants. */
31634 priority = get_builtin_code_for_version (version_decl,
31635 &predicate_chain);
31637 if (predicate_chain == NULL_TREE)
31638 continue;
31640 function_version_info [actual_versions].version_decl = version_decl;
31641 function_version_info [actual_versions].predicate_chain
31642 = predicate_chain;
31643 function_version_info [actual_versions].dispatch_priority = priority;
31644 actual_versions++;
31647 /* Sort the versions according to descending order of dispatch priority. The
31648 priority is based on the ISA. This is not a perfect solution. There
31649 could still be ambiguity. If more than one function version is suitable
31650 to execute, which one should be dispatched? In future, allow the user
31651 to specify a dispatch priority next to the version. */
31652 qsort (function_version_info, actual_versions,
31653 sizeof (struct _function_version_info), feature_compare);
31655 for (i = 0; i < actual_versions; ++i)
31656 *empty_bb = add_condition_to_bb (dispatch_decl,
31657 function_version_info[i].version_decl,
31658 function_version_info[i].predicate_chain,
31659 *empty_bb);
31661 /* dispatch default version at the end. */
31662 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31663 NULL, *empty_bb);
31665 free (function_version_info);
31666 return 0;
31669 /* Comparator function to be used in qsort routine to sort attribute
31670 specification strings to "target". */
31672 static int
31673 attr_strcmp (const void *v1, const void *v2)
31675 const char *c1 = *(char *const*)v1;
31676 const char *c2 = *(char *const*)v2;
31677 return strcmp (c1, c2);
31680 /* ARGLIST is the argument to target attribute. This function tokenizes
31681 the comma separated arguments, sorts them and returns a string which
31682 is a unique identifier for the comma separated arguments. It also
31683 replaces non-identifier characters "=,-" with "_". */
31685 static char *
31686 sorted_attr_string (tree arglist)
31688 tree arg;
31689 size_t str_len_sum = 0;
31690 char **args = NULL;
31691 char *attr_str, *ret_str;
31692 char *attr = NULL;
31693 unsigned int argnum = 1;
31694 unsigned int i;
31696 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31698 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31699 size_t len = strlen (str);
31700 str_len_sum += len + 1;
31701 if (arg != arglist)
31702 argnum++;
31703 for (i = 0; i < strlen (str); i++)
31704 if (str[i] == ',')
31705 argnum++;
31708 attr_str = XNEWVEC (char, str_len_sum);
31709 str_len_sum = 0;
31710 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31712 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31713 size_t len = strlen (str);
31714 memcpy (attr_str + str_len_sum, str, len);
31715 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31716 str_len_sum += len + 1;
31719 /* Replace "=,-" with "_". */
31720 for (i = 0; i < strlen (attr_str); i++)
31721 if (attr_str[i] == '=' || attr_str[i]== '-')
31722 attr_str[i] = '_';
31724 if (argnum == 1)
31725 return attr_str;
31727 args = XNEWVEC (char *, argnum);
31729 i = 0;
31730 attr = strtok (attr_str, ",");
31731 while (attr != NULL)
31733 args[i] = attr;
31734 i++;
31735 attr = strtok (NULL, ",");
31738 qsort (args, argnum, sizeof (char *), attr_strcmp);
31740 ret_str = XNEWVEC (char, str_len_sum);
31741 str_len_sum = 0;
31742 for (i = 0; i < argnum; i++)
31744 size_t len = strlen (args[i]);
31745 memcpy (ret_str + str_len_sum, args[i], len);
31746 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31747 str_len_sum += len + 1;
31750 XDELETEVEC (args);
31751 XDELETEVEC (attr_str);
31752 return ret_str;
31755 /* This function changes the assembler name for functions that are
31756 versions. If DECL is a function version and has a "target"
31757 attribute, it appends the attribute string to its assembler name. */
31759 static tree
31760 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31762 tree version_attr;
31763 const char *orig_name, *version_string;
31764 char *attr_str, *assembler_name;
31766 if (DECL_DECLARED_INLINE_P (decl)
31767 && lookup_attribute ("gnu_inline",
31768 DECL_ATTRIBUTES (decl)))
31769 error_at (DECL_SOURCE_LOCATION (decl),
31770 "Function versions cannot be marked as gnu_inline,"
31771 " bodies have to be generated");
31773 if (DECL_VIRTUAL_P (decl)
31774 || DECL_VINDEX (decl))
31775 sorry ("Virtual function multiversioning not supported");
31777 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31779 /* target attribute string cannot be NULL. */
31780 gcc_assert (version_attr != NULL_TREE);
31782 orig_name = IDENTIFIER_POINTER (id);
31783 version_string
31784 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31786 if (strcmp (version_string, "default") == 0)
31787 return id;
31789 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31790 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31792 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31794 /* Allow assembler name to be modified if already set. */
31795 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31796 SET_DECL_RTL (decl, NULL);
31798 tree ret = get_identifier (assembler_name);
31799 XDELETEVEC (attr_str);
31800 XDELETEVEC (assembler_name);
31801 return ret;
31804 /* This function returns true if FN1 and FN2 are versions of the same function,
31805 that is, the target strings of the function decls are different. This assumes
31806 that FN1 and FN2 have the same signature. */
31808 static bool
31809 ix86_function_versions (tree fn1, tree fn2)
31811 tree attr1, attr2;
31812 char *target1, *target2;
31813 bool result;
31815 if (TREE_CODE (fn1) != FUNCTION_DECL
31816 || TREE_CODE (fn2) != FUNCTION_DECL)
31817 return false;
31819 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31820 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31822 /* At least one function decl should have the target attribute specified. */
31823 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31824 return false;
31826 /* Diagnose missing target attribute if one of the decls is already
31827 multi-versioned. */
31828 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31830 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31832 if (attr2 != NULL_TREE)
31834 tree tem = fn1;
31835 fn1 = fn2;
31836 fn2 = tem;
31837 attr1 = attr2;
31839 error_at (DECL_SOURCE_LOCATION (fn2),
31840 "missing %<target%> attribute for multi-versioned %D",
31841 fn2);
31842 inform (DECL_SOURCE_LOCATION (fn1),
31843 "previous declaration of %D", fn1);
31844 /* Prevent diagnosing of the same error multiple times. */
31845 DECL_ATTRIBUTES (fn2)
31846 = tree_cons (get_identifier ("target"),
31847 copy_node (TREE_VALUE (attr1)),
31848 DECL_ATTRIBUTES (fn2));
31850 return false;
31853 target1 = sorted_attr_string (TREE_VALUE (attr1));
31854 target2 = sorted_attr_string (TREE_VALUE (attr2));
31856 /* The sorted target strings must be different for fn1 and fn2
31857 to be versions. */
31858 if (strcmp (target1, target2) == 0)
31859 result = false;
31860 else
31861 result = true;
31863 XDELETEVEC (target1);
31864 XDELETEVEC (target2);
31866 return result;
31869 static tree
31870 ix86_mangle_decl_assembler_name (tree decl, tree id)
31872 /* For function version, add the target suffix to the assembler name. */
31873 if (TREE_CODE (decl) == FUNCTION_DECL
31874 && DECL_FUNCTION_VERSIONED (decl))
31875 id = ix86_mangle_function_version_assembler_name (decl, id);
31876 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31877 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31878 #endif
31880 return id;
31883 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31884 is true, append the full path name of the source file. */
31886 static char *
31887 make_name (tree decl, const char *suffix, bool make_unique)
31889 char *global_var_name;
31890 int name_len;
31891 const char *name;
31892 const char *unique_name = NULL;
31894 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31896 /* Get a unique name that can be used globally without any chances
31897 of collision at link time. */
31898 if (make_unique)
31899 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31901 name_len = strlen (name) + strlen (suffix) + 2;
31903 if (make_unique)
31904 name_len += strlen (unique_name) + 1;
31905 global_var_name = XNEWVEC (char, name_len);
31907 /* Use '.' to concatenate names as it is demangler friendly. */
31908 if (make_unique)
31909 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31910 suffix);
31911 else
31912 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31914 return global_var_name;
31917 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31919 /* Make a dispatcher declaration for the multi-versioned function DECL.
31920 Calls to DECL function will be replaced with calls to the dispatcher
31921 by the front-end. Return the decl created. */
31923 static tree
31924 make_dispatcher_decl (const tree decl)
31926 tree func_decl;
31927 char *func_name;
31928 tree fn_type, func_type;
31929 bool is_uniq = false;
31931 if (TREE_PUBLIC (decl) == 0)
31932 is_uniq = true;
31934 func_name = make_name (decl, "ifunc", is_uniq);
31936 fn_type = TREE_TYPE (decl);
31937 func_type = build_function_type (TREE_TYPE (fn_type),
31938 TYPE_ARG_TYPES (fn_type));
31940 func_decl = build_fn_decl (func_name, func_type);
31941 XDELETEVEC (func_name);
31942 TREE_USED (func_decl) = 1;
31943 DECL_CONTEXT (func_decl) = NULL_TREE;
31944 DECL_INITIAL (func_decl) = error_mark_node;
31945 DECL_ARTIFICIAL (func_decl) = 1;
31946 /* Mark this func as external, the resolver will flip it again if
31947 it gets generated. */
31948 DECL_EXTERNAL (func_decl) = 1;
31949 /* This will be of type IFUNCs have to be externally visible. */
31950 TREE_PUBLIC (func_decl) = 1;
31952 return func_decl;
31955 #endif
31957 /* Returns true if decl is multi-versioned and DECL is the default function,
31958 that is it is not tagged with target specific optimization. */
31960 static bool
31961 is_function_default_version (const tree decl)
31963 if (TREE_CODE (decl) != FUNCTION_DECL
31964 || !DECL_FUNCTION_VERSIONED (decl))
31965 return false;
31966 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31967 gcc_assert (attr);
31968 attr = TREE_VALUE (TREE_VALUE (attr));
31969 return (TREE_CODE (attr) == STRING_CST
31970 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
31973 /* Make a dispatcher declaration for the multi-versioned function DECL.
31974 Calls to DECL function will be replaced with calls to the dispatcher
31975 by the front-end. Returns the decl of the dispatcher function. */
31977 static tree
31978 ix86_get_function_versions_dispatcher (void *decl)
31980 tree fn = (tree) decl;
31981 struct cgraph_node *node = NULL;
31982 struct cgraph_node *default_node = NULL;
31983 struct cgraph_function_version_info *node_v = NULL;
31984 struct cgraph_function_version_info *first_v = NULL;
31986 tree dispatch_decl = NULL;
31988 struct cgraph_function_version_info *default_version_info = NULL;
31990 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31992 node = cgraph_get_node (fn);
31993 gcc_assert (node != NULL);
31995 node_v = get_cgraph_node_version (node);
31996 gcc_assert (node_v != NULL);
31998 if (node_v->dispatcher_resolver != NULL)
31999 return node_v->dispatcher_resolver;
32001 /* Find the default version and make it the first node. */
32002 first_v = node_v;
32003 /* Go to the beginning of the chain. */
32004 while (first_v->prev != NULL)
32005 first_v = first_v->prev;
32006 default_version_info = first_v;
32007 while (default_version_info != NULL)
32009 if (is_function_default_version
32010 (default_version_info->this_node->decl))
32011 break;
32012 default_version_info = default_version_info->next;
32015 /* If there is no default node, just return NULL. */
32016 if (default_version_info == NULL)
32017 return NULL;
32019 /* Make default info the first node. */
32020 if (first_v != default_version_info)
32022 default_version_info->prev->next = default_version_info->next;
32023 if (default_version_info->next)
32024 default_version_info->next->prev = default_version_info->prev;
32025 first_v->prev = default_version_info;
32026 default_version_info->next = first_v;
32027 default_version_info->prev = NULL;
32030 default_node = default_version_info->this_node;
32032 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32033 if (targetm.has_ifunc_p ())
32035 struct cgraph_function_version_info *it_v = NULL;
32036 struct cgraph_node *dispatcher_node = NULL;
32037 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32039 /* Right now, the dispatching is done via ifunc. */
32040 dispatch_decl = make_dispatcher_decl (default_node->decl);
32042 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32043 gcc_assert (dispatcher_node != NULL);
32044 dispatcher_node->dispatcher_function = 1;
32045 dispatcher_version_info
32046 = insert_new_cgraph_node_version (dispatcher_node);
32047 dispatcher_version_info->next = default_version_info;
32048 dispatcher_node->definition = 1;
32050 /* Set the dispatcher for all the versions. */
32051 it_v = default_version_info;
32052 while (it_v != NULL)
32054 it_v->dispatcher_resolver = dispatch_decl;
32055 it_v = it_v->next;
32058 else
32059 #endif
32061 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32062 "multiversioning needs ifunc which is not supported "
32063 "on this target");
32066 return dispatch_decl;
32069 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32070 it to CHAIN. */
32072 static tree
32073 make_attribute (const char *name, const char *arg_name, tree chain)
32075 tree attr_name;
32076 tree attr_arg_name;
32077 tree attr_args;
32078 tree attr;
32080 attr_name = get_identifier (name);
32081 attr_arg_name = build_string (strlen (arg_name), arg_name);
32082 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32083 attr = tree_cons (attr_name, attr_args, chain);
32084 return attr;
32087 /* Make the resolver function decl to dispatch the versions of
32088 a multi-versioned function, DEFAULT_DECL. Create an
32089 empty basic block in the resolver and store the pointer in
32090 EMPTY_BB. Return the decl of the resolver function. */
32092 static tree
32093 make_resolver_func (const tree default_decl,
32094 const tree dispatch_decl,
32095 basic_block *empty_bb)
32097 char *resolver_name;
32098 tree decl, type, decl_name, t;
32099 bool is_uniq = false;
32101 /* IFUNC's have to be globally visible. So, if the default_decl is
32102 not, then the name of the IFUNC should be made unique. */
32103 if (TREE_PUBLIC (default_decl) == 0)
32104 is_uniq = true;
32106 /* Append the filename to the resolver function if the versions are
32107 not externally visible. This is because the resolver function has
32108 to be externally visible for the loader to find it. So, appending
32109 the filename will prevent conflicts with a resolver function from
32110 another module which is based on the same version name. */
32111 resolver_name = make_name (default_decl, "resolver", is_uniq);
32113 /* The resolver function should return a (void *). */
32114 type = build_function_type_list (ptr_type_node, NULL_TREE);
32116 decl = build_fn_decl (resolver_name, type);
32117 decl_name = get_identifier (resolver_name);
32118 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32120 DECL_NAME (decl) = decl_name;
32121 TREE_USED (decl) = 1;
32122 DECL_ARTIFICIAL (decl) = 1;
32123 DECL_IGNORED_P (decl) = 0;
32124 /* IFUNC resolvers have to be externally visible. */
32125 TREE_PUBLIC (decl) = 1;
32126 DECL_UNINLINABLE (decl) = 1;
32128 /* Resolver is not external, body is generated. */
32129 DECL_EXTERNAL (decl) = 0;
32130 DECL_EXTERNAL (dispatch_decl) = 0;
32132 DECL_CONTEXT (decl) = NULL_TREE;
32133 DECL_INITIAL (decl) = make_node (BLOCK);
32134 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32136 if (DECL_COMDAT_GROUP (default_decl)
32137 || TREE_PUBLIC (default_decl))
32139 /* In this case, each translation unit with a call to this
32140 versioned function will put out a resolver. Ensure it
32141 is comdat to keep just one copy. */
32142 DECL_COMDAT (decl) = 1;
32143 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32145 /* Build result decl and add to function_decl. */
32146 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32147 DECL_ARTIFICIAL (t) = 1;
32148 DECL_IGNORED_P (t) = 1;
32149 DECL_RESULT (decl) = t;
32151 gimplify_function_tree (decl);
32152 push_cfun (DECL_STRUCT_FUNCTION (decl));
32153 *empty_bb = init_lowered_empty_function (decl, false);
32155 cgraph_add_new_function (decl, true);
32156 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32158 pop_cfun ();
32160 gcc_assert (dispatch_decl != NULL);
32161 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32162 DECL_ATTRIBUTES (dispatch_decl)
32163 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32165 /* Create the alias for dispatch to resolver here. */
32166 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32167 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32168 XDELETEVEC (resolver_name);
32169 return decl;
32172 /* Generate the dispatching code body to dispatch multi-versioned function
32173 DECL. The target hook is called to process the "target" attributes and
32174 provide the code to dispatch the right function at run-time. NODE points
32175 to the dispatcher decl whose body will be created. */
32177 static tree
32178 ix86_generate_version_dispatcher_body (void *node_p)
32180 tree resolver_decl;
32181 basic_block empty_bb;
32182 tree default_ver_decl;
32183 struct cgraph_node *versn;
32184 struct cgraph_node *node;
32186 struct cgraph_function_version_info *node_version_info = NULL;
32187 struct cgraph_function_version_info *versn_info = NULL;
32189 node = (cgraph_node *)node_p;
32191 node_version_info = get_cgraph_node_version (node);
32192 gcc_assert (node->dispatcher_function
32193 && node_version_info != NULL);
32195 if (node_version_info->dispatcher_resolver)
32196 return node_version_info->dispatcher_resolver;
32198 /* The first version in the chain corresponds to the default version. */
32199 default_ver_decl = node_version_info->next->this_node->decl;
32201 /* node is going to be an alias, so remove the finalized bit. */
32202 node->definition = false;
32204 resolver_decl = make_resolver_func (default_ver_decl,
32205 node->decl, &empty_bb);
32207 node_version_info->dispatcher_resolver = resolver_decl;
32209 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32211 auto_vec<tree, 2> fn_ver_vec;
32213 for (versn_info = node_version_info->next; versn_info;
32214 versn_info = versn_info->next)
32216 versn = versn_info->this_node;
32217 /* Check for virtual functions here again, as by this time it should
32218 have been determined if this function needs a vtable index or
32219 not. This happens for methods in derived classes that override
32220 virtual methods in base classes but are not explicitly marked as
32221 virtual. */
32222 if (DECL_VINDEX (versn->decl))
32223 sorry ("Virtual function multiversioning not supported");
32225 fn_ver_vec.safe_push (versn->decl);
32228 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32229 rebuild_cgraph_edges ();
32230 pop_cfun ();
32231 return resolver_decl;
32233 /* This builds the processor_model struct type defined in
32234 libgcc/config/i386/cpuinfo.c */
32236 static tree
32237 build_processor_model_struct (void)
32239 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32240 "__cpu_features"};
32241 tree field = NULL_TREE, field_chain = NULL_TREE;
32242 int i;
32243 tree type = make_node (RECORD_TYPE);
32245 /* The first 3 fields are unsigned int. */
32246 for (i = 0; i < 3; ++i)
32248 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32249 get_identifier (field_name[i]), unsigned_type_node);
32250 if (field_chain != NULL_TREE)
32251 DECL_CHAIN (field) = field_chain;
32252 field_chain = field;
32255 /* The last field is an array of unsigned integers of size one. */
32256 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32257 get_identifier (field_name[3]),
32258 build_array_type (unsigned_type_node,
32259 build_index_type (size_one_node)));
32260 if (field_chain != NULL_TREE)
32261 DECL_CHAIN (field) = field_chain;
32262 field_chain = field;
32264 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32265 return type;
32268 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32270 static tree
32271 make_var_decl (tree type, const char *name)
32273 tree new_decl;
32275 new_decl = build_decl (UNKNOWN_LOCATION,
32276 VAR_DECL,
32277 get_identifier(name),
32278 type);
32280 DECL_EXTERNAL (new_decl) = 1;
32281 TREE_STATIC (new_decl) = 1;
32282 TREE_PUBLIC (new_decl) = 1;
32283 DECL_INITIAL (new_decl) = 0;
32284 DECL_ARTIFICIAL (new_decl) = 0;
32285 DECL_PRESERVE_P (new_decl) = 1;
32287 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32288 assemble_variable (new_decl, 0, 0, 0);
32290 return new_decl;
32293 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32294 into an integer defined in libgcc/config/i386/cpuinfo.c */
32296 static tree
32297 fold_builtin_cpu (tree fndecl, tree *args)
32299 unsigned int i;
32300 enum ix86_builtins fn_code = (enum ix86_builtins)
32301 DECL_FUNCTION_CODE (fndecl);
32302 tree param_string_cst = NULL;
32304 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32305 enum processor_features
32307 F_CMOV = 0,
32308 F_MMX,
32309 F_POPCNT,
32310 F_SSE,
32311 F_SSE2,
32312 F_SSE3,
32313 F_SSSE3,
32314 F_SSE4_1,
32315 F_SSE4_2,
32316 F_AVX,
32317 F_AVX2,
32318 F_SSE4_A,
32319 F_FMA4,
32320 F_XOP,
32321 F_FMA,
32322 F_MAX
32325 /* These are the values for vendor types and cpu types and subtypes
32326 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32327 the corresponding start value. */
32328 enum processor_model
32330 M_INTEL = 1,
32331 M_AMD,
32332 M_CPU_TYPE_START,
32333 M_INTEL_BONNELL,
32334 M_INTEL_CORE2,
32335 M_INTEL_COREI7,
32336 M_AMDFAM10H,
32337 M_AMDFAM15H,
32338 M_INTEL_SILVERMONT,
32339 M_AMD_BTVER1,
32340 M_AMD_BTVER2,
32341 M_CPU_SUBTYPE_START,
32342 M_INTEL_COREI7_NEHALEM,
32343 M_INTEL_COREI7_WESTMERE,
32344 M_INTEL_COREI7_SANDYBRIDGE,
32345 M_AMDFAM10H_BARCELONA,
32346 M_AMDFAM10H_SHANGHAI,
32347 M_AMDFAM10H_ISTANBUL,
32348 M_AMDFAM15H_BDVER1,
32349 M_AMDFAM15H_BDVER2,
32350 M_AMDFAM15H_BDVER3,
32351 M_AMDFAM15H_BDVER4,
32352 M_INTEL_COREI7_IVYBRIDGE,
32353 M_INTEL_COREI7_HASWELL
32356 static struct _arch_names_table
32358 const char *const name;
32359 const enum processor_model model;
32361 const arch_names_table[] =
32363 {"amd", M_AMD},
32364 {"intel", M_INTEL},
32365 {"atom", M_INTEL_BONNELL},
32366 {"slm", M_INTEL_SILVERMONT},
32367 {"core2", M_INTEL_CORE2},
32368 {"corei7", M_INTEL_COREI7},
32369 {"nehalem", M_INTEL_COREI7_NEHALEM},
32370 {"westmere", M_INTEL_COREI7_WESTMERE},
32371 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32372 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32373 {"haswell", M_INTEL_COREI7_HASWELL},
32374 {"bonnell", M_INTEL_BONNELL},
32375 {"silvermont", M_INTEL_SILVERMONT},
32376 {"amdfam10h", M_AMDFAM10H},
32377 {"barcelona", M_AMDFAM10H_BARCELONA},
32378 {"shanghai", M_AMDFAM10H_SHANGHAI},
32379 {"istanbul", M_AMDFAM10H_ISTANBUL},
32380 {"btver1", M_AMD_BTVER1},
32381 {"amdfam15h", M_AMDFAM15H},
32382 {"bdver1", M_AMDFAM15H_BDVER1},
32383 {"bdver2", M_AMDFAM15H_BDVER2},
32384 {"bdver3", M_AMDFAM15H_BDVER3},
32385 {"bdver4", M_AMDFAM15H_BDVER4},
32386 {"btver2", M_AMD_BTVER2},
32389 static struct _isa_names_table
32391 const char *const name;
32392 const enum processor_features feature;
32394 const isa_names_table[] =
32396 {"cmov", F_CMOV},
32397 {"mmx", F_MMX},
32398 {"popcnt", F_POPCNT},
32399 {"sse", F_SSE},
32400 {"sse2", F_SSE2},
32401 {"sse3", F_SSE3},
32402 {"ssse3", F_SSSE3},
32403 {"sse4a", F_SSE4_A},
32404 {"sse4.1", F_SSE4_1},
32405 {"sse4.2", F_SSE4_2},
32406 {"avx", F_AVX},
32407 {"fma4", F_FMA4},
32408 {"xop", F_XOP},
32409 {"fma", F_FMA},
32410 {"avx2", F_AVX2}
32413 tree __processor_model_type = build_processor_model_struct ();
32414 tree __cpu_model_var = make_var_decl (__processor_model_type,
32415 "__cpu_model");
32418 varpool_add_new_variable (__cpu_model_var);
32420 gcc_assert ((args != NULL) && (*args != NULL));
32422 param_string_cst = *args;
32423 while (param_string_cst
32424 && TREE_CODE (param_string_cst) != STRING_CST)
32426 /* *args must be a expr that can contain other EXPRS leading to a
32427 STRING_CST. */
32428 if (!EXPR_P (param_string_cst))
32430 error ("Parameter to builtin must be a string constant or literal");
32431 return integer_zero_node;
32433 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32436 gcc_assert (param_string_cst);
32438 if (fn_code == IX86_BUILTIN_CPU_IS)
32440 tree ref;
32441 tree field;
32442 tree final;
32444 unsigned int field_val = 0;
32445 unsigned int NUM_ARCH_NAMES
32446 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32448 for (i = 0; i < NUM_ARCH_NAMES; i++)
32449 if (strcmp (arch_names_table[i].name,
32450 TREE_STRING_POINTER (param_string_cst)) == 0)
32451 break;
32453 if (i == NUM_ARCH_NAMES)
32455 error ("Parameter to builtin not valid: %s",
32456 TREE_STRING_POINTER (param_string_cst));
32457 return integer_zero_node;
32460 field = TYPE_FIELDS (__processor_model_type);
32461 field_val = arch_names_table[i].model;
32463 /* CPU types are stored in the next field. */
32464 if (field_val > M_CPU_TYPE_START
32465 && field_val < M_CPU_SUBTYPE_START)
32467 field = DECL_CHAIN (field);
32468 field_val -= M_CPU_TYPE_START;
32471 /* CPU subtypes are stored in the next field. */
32472 if (field_val > M_CPU_SUBTYPE_START)
32474 field = DECL_CHAIN ( DECL_CHAIN (field));
32475 field_val -= M_CPU_SUBTYPE_START;
32478 /* Get the appropriate field in __cpu_model. */
32479 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32480 field, NULL_TREE);
32482 /* Check the value. */
32483 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32484 build_int_cstu (unsigned_type_node, field_val));
32485 return build1 (CONVERT_EXPR, integer_type_node, final);
32487 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32489 tree ref;
32490 tree array_elt;
32491 tree field;
32492 tree final;
32494 unsigned int field_val = 0;
32495 unsigned int NUM_ISA_NAMES
32496 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32498 for (i = 0; i < NUM_ISA_NAMES; i++)
32499 if (strcmp (isa_names_table[i].name,
32500 TREE_STRING_POINTER (param_string_cst)) == 0)
32501 break;
32503 if (i == NUM_ISA_NAMES)
32505 error ("Parameter to builtin not valid: %s",
32506 TREE_STRING_POINTER (param_string_cst));
32507 return integer_zero_node;
32510 field = TYPE_FIELDS (__processor_model_type);
32511 /* Get the last field, which is __cpu_features. */
32512 while (DECL_CHAIN (field))
32513 field = DECL_CHAIN (field);
32515 /* Get the appropriate field: __cpu_model.__cpu_features */
32516 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32517 field, NULL_TREE);
32519 /* Access the 0th element of __cpu_features array. */
32520 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32521 integer_zero_node, NULL_TREE, NULL_TREE);
32523 field_val = (1 << isa_names_table[i].feature);
32524 /* Return __cpu_model.__cpu_features[0] & field_val */
32525 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32526 build_int_cstu (unsigned_type_node, field_val));
32527 return build1 (CONVERT_EXPR, integer_type_node, final);
32529 gcc_unreachable ();
32532 static tree
32533 ix86_fold_builtin (tree fndecl, int n_args,
32534 tree *args, bool ignore ATTRIBUTE_UNUSED)
32536 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32538 enum ix86_builtins fn_code = (enum ix86_builtins)
32539 DECL_FUNCTION_CODE (fndecl);
32540 if (fn_code == IX86_BUILTIN_CPU_IS
32541 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32543 gcc_assert (n_args == 1);
32544 return fold_builtin_cpu (fndecl, args);
32548 #ifdef SUBTARGET_FOLD_BUILTIN
32549 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32550 #endif
32552 return NULL_TREE;
32555 /* Make builtins to detect cpu type and features supported. NAME is
32556 the builtin name, CODE is the builtin code, and FTYPE is the function
32557 type of the builtin. */
32559 static void
32560 make_cpu_type_builtin (const char* name, int code,
32561 enum ix86_builtin_func_type ftype, bool is_const)
32563 tree decl;
32564 tree type;
32566 type = ix86_get_builtin_func_type (ftype);
32567 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32568 NULL, NULL_TREE);
32569 gcc_assert (decl != NULL_TREE);
32570 ix86_builtins[(int) code] = decl;
32571 TREE_READONLY (decl) = is_const;
32574 /* Make builtins to get CPU type and features supported. The created
32575 builtins are :
32577 __builtin_cpu_init (), to detect cpu type and features,
32578 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32579 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32582 static void
32583 ix86_init_platform_type_builtins (void)
32585 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32586 INT_FTYPE_VOID, false);
32587 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32588 INT_FTYPE_PCCHAR, true);
32589 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32590 INT_FTYPE_PCCHAR, true);
32593 /* Internal method for ix86_init_builtins. */
32595 static void
32596 ix86_init_builtins_va_builtins_abi (void)
32598 tree ms_va_ref, sysv_va_ref;
32599 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32600 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32601 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32602 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32604 if (!TARGET_64BIT)
32605 return;
32606 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32607 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32608 ms_va_ref = build_reference_type (ms_va_list_type_node);
32609 sysv_va_ref =
32610 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32612 fnvoid_va_end_ms =
32613 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32614 fnvoid_va_start_ms =
32615 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32616 fnvoid_va_end_sysv =
32617 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32618 fnvoid_va_start_sysv =
32619 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32620 NULL_TREE);
32621 fnvoid_va_copy_ms =
32622 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32623 NULL_TREE);
32624 fnvoid_va_copy_sysv =
32625 build_function_type_list (void_type_node, sysv_va_ref,
32626 sysv_va_ref, NULL_TREE);
32628 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32629 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32630 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32631 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32632 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32633 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32634 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32635 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32636 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32637 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32638 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32639 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32642 static void
32643 ix86_init_builtin_types (void)
32645 tree float128_type_node, float80_type_node;
32647 /* The __float80 type. */
32648 float80_type_node = long_double_type_node;
32649 if (TYPE_MODE (float80_type_node) != XFmode)
32651 /* The __float80 type. */
32652 float80_type_node = make_node (REAL_TYPE);
32654 TYPE_PRECISION (float80_type_node) = 80;
32655 layout_type (float80_type_node);
32657 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32659 /* The __float128 type. */
32660 float128_type_node = make_node (REAL_TYPE);
32661 TYPE_PRECISION (float128_type_node) = 128;
32662 layout_type (float128_type_node);
32663 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32665 /* This macro is built by i386-builtin-types.awk. */
32666 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32669 static void
32670 ix86_init_builtins (void)
32672 tree t;
32674 ix86_init_builtin_types ();
32676 /* Builtins to get CPU type and features. */
32677 ix86_init_platform_type_builtins ();
32679 /* TFmode support builtins. */
32680 def_builtin_const (0, "__builtin_infq",
32681 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32682 def_builtin_const (0, "__builtin_huge_valq",
32683 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32685 /* We will expand them to normal call if SSE isn't available since
32686 they are used by libgcc. */
32687 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32688 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32689 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32690 TREE_READONLY (t) = 1;
32691 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32693 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32694 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32695 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32696 TREE_READONLY (t) = 1;
32697 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32699 ix86_init_tm_builtins ();
32700 ix86_init_mmx_sse_builtins ();
32702 if (TARGET_LP64)
32703 ix86_init_builtins_va_builtins_abi ();
32705 #ifdef SUBTARGET_INIT_BUILTINS
32706 SUBTARGET_INIT_BUILTINS;
32707 #endif
32710 /* Return the ix86 builtin for CODE. */
32712 static tree
32713 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32715 if (code >= IX86_BUILTIN_MAX)
32716 return error_mark_node;
32718 return ix86_builtins[code];
32721 /* Errors in the source file can cause expand_expr to return const0_rtx
32722 where we expect a vector. To avoid crashing, use one of the vector
32723 clear instructions. */
32724 static rtx
32725 safe_vector_operand (rtx x, enum machine_mode mode)
32727 if (x == const0_rtx)
32728 x = CONST0_RTX (mode);
32729 return x;
32732 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32734 static rtx
32735 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32737 rtx pat;
32738 tree arg0 = CALL_EXPR_ARG (exp, 0);
32739 tree arg1 = CALL_EXPR_ARG (exp, 1);
32740 rtx op0 = expand_normal (arg0);
32741 rtx op1 = expand_normal (arg1);
32742 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32743 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32744 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32746 if (VECTOR_MODE_P (mode0))
32747 op0 = safe_vector_operand (op0, mode0);
32748 if (VECTOR_MODE_P (mode1))
32749 op1 = safe_vector_operand (op1, mode1);
32751 if (optimize || !target
32752 || GET_MODE (target) != tmode
32753 || !insn_data[icode].operand[0].predicate (target, tmode))
32754 target = gen_reg_rtx (tmode);
32756 if (GET_MODE (op1) == SImode && mode1 == TImode)
32758 rtx x = gen_reg_rtx (V4SImode);
32759 emit_insn (gen_sse2_loadd (x, op1));
32760 op1 = gen_lowpart (TImode, x);
32763 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32764 op0 = copy_to_mode_reg (mode0, op0);
32765 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32766 op1 = copy_to_mode_reg (mode1, op1);
32768 pat = GEN_FCN (icode) (target, op0, op1);
32769 if (! pat)
32770 return 0;
32772 emit_insn (pat);
32774 return target;
32777 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32779 static rtx
32780 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32781 enum ix86_builtin_func_type m_type,
32782 enum rtx_code sub_code)
32784 rtx pat;
32785 int i;
32786 int nargs;
32787 bool comparison_p = false;
32788 bool tf_p = false;
32789 bool last_arg_constant = false;
32790 int num_memory = 0;
32791 struct {
32792 rtx op;
32793 enum machine_mode mode;
32794 } args[4];
32796 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32798 switch (m_type)
32800 case MULTI_ARG_4_DF2_DI_I:
32801 case MULTI_ARG_4_DF2_DI_I1:
32802 case MULTI_ARG_4_SF2_SI_I:
32803 case MULTI_ARG_4_SF2_SI_I1:
32804 nargs = 4;
32805 last_arg_constant = true;
32806 break;
32808 case MULTI_ARG_3_SF:
32809 case MULTI_ARG_3_DF:
32810 case MULTI_ARG_3_SF2:
32811 case MULTI_ARG_3_DF2:
32812 case MULTI_ARG_3_DI:
32813 case MULTI_ARG_3_SI:
32814 case MULTI_ARG_3_SI_DI:
32815 case MULTI_ARG_3_HI:
32816 case MULTI_ARG_3_HI_SI:
32817 case MULTI_ARG_3_QI:
32818 case MULTI_ARG_3_DI2:
32819 case MULTI_ARG_3_SI2:
32820 case MULTI_ARG_3_HI2:
32821 case MULTI_ARG_3_QI2:
32822 nargs = 3;
32823 break;
32825 case MULTI_ARG_2_SF:
32826 case MULTI_ARG_2_DF:
32827 case MULTI_ARG_2_DI:
32828 case MULTI_ARG_2_SI:
32829 case MULTI_ARG_2_HI:
32830 case MULTI_ARG_2_QI:
32831 nargs = 2;
32832 break;
32834 case MULTI_ARG_2_DI_IMM:
32835 case MULTI_ARG_2_SI_IMM:
32836 case MULTI_ARG_2_HI_IMM:
32837 case MULTI_ARG_2_QI_IMM:
32838 nargs = 2;
32839 last_arg_constant = true;
32840 break;
32842 case MULTI_ARG_1_SF:
32843 case MULTI_ARG_1_DF:
32844 case MULTI_ARG_1_SF2:
32845 case MULTI_ARG_1_DF2:
32846 case MULTI_ARG_1_DI:
32847 case MULTI_ARG_1_SI:
32848 case MULTI_ARG_1_HI:
32849 case MULTI_ARG_1_QI:
32850 case MULTI_ARG_1_SI_DI:
32851 case MULTI_ARG_1_HI_DI:
32852 case MULTI_ARG_1_HI_SI:
32853 case MULTI_ARG_1_QI_DI:
32854 case MULTI_ARG_1_QI_SI:
32855 case MULTI_ARG_1_QI_HI:
32856 nargs = 1;
32857 break;
32859 case MULTI_ARG_2_DI_CMP:
32860 case MULTI_ARG_2_SI_CMP:
32861 case MULTI_ARG_2_HI_CMP:
32862 case MULTI_ARG_2_QI_CMP:
32863 nargs = 2;
32864 comparison_p = true;
32865 break;
32867 case MULTI_ARG_2_SF_TF:
32868 case MULTI_ARG_2_DF_TF:
32869 case MULTI_ARG_2_DI_TF:
32870 case MULTI_ARG_2_SI_TF:
32871 case MULTI_ARG_2_HI_TF:
32872 case MULTI_ARG_2_QI_TF:
32873 nargs = 2;
32874 tf_p = true;
32875 break;
32877 default:
32878 gcc_unreachable ();
32881 if (optimize || !target
32882 || GET_MODE (target) != tmode
32883 || !insn_data[icode].operand[0].predicate (target, tmode))
32884 target = gen_reg_rtx (tmode);
32886 gcc_assert (nargs <= 4);
32888 for (i = 0; i < nargs; i++)
32890 tree arg = CALL_EXPR_ARG (exp, i);
32891 rtx op = expand_normal (arg);
32892 int adjust = (comparison_p) ? 1 : 0;
32893 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32895 if (last_arg_constant && i == nargs - 1)
32897 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32899 enum insn_code new_icode = icode;
32900 switch (icode)
32902 case CODE_FOR_xop_vpermil2v2df3:
32903 case CODE_FOR_xop_vpermil2v4sf3:
32904 case CODE_FOR_xop_vpermil2v4df3:
32905 case CODE_FOR_xop_vpermil2v8sf3:
32906 error ("the last argument must be a 2-bit immediate");
32907 return gen_reg_rtx (tmode);
32908 case CODE_FOR_xop_rotlv2di3:
32909 new_icode = CODE_FOR_rotlv2di3;
32910 goto xop_rotl;
32911 case CODE_FOR_xop_rotlv4si3:
32912 new_icode = CODE_FOR_rotlv4si3;
32913 goto xop_rotl;
32914 case CODE_FOR_xop_rotlv8hi3:
32915 new_icode = CODE_FOR_rotlv8hi3;
32916 goto xop_rotl;
32917 case CODE_FOR_xop_rotlv16qi3:
32918 new_icode = CODE_FOR_rotlv16qi3;
32919 xop_rotl:
32920 if (CONST_INT_P (op))
32922 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32923 op = GEN_INT (INTVAL (op) & mask);
32924 gcc_checking_assert
32925 (insn_data[icode].operand[i + 1].predicate (op, mode));
32927 else
32929 gcc_checking_assert
32930 (nargs == 2
32931 && insn_data[new_icode].operand[0].mode == tmode
32932 && insn_data[new_icode].operand[1].mode == tmode
32933 && insn_data[new_icode].operand[2].mode == mode
32934 && insn_data[new_icode].operand[0].predicate
32935 == insn_data[icode].operand[0].predicate
32936 && insn_data[new_icode].operand[1].predicate
32937 == insn_data[icode].operand[1].predicate);
32938 icode = new_icode;
32939 goto non_constant;
32941 break;
32942 default:
32943 gcc_unreachable ();
32947 else
32949 non_constant:
32950 if (VECTOR_MODE_P (mode))
32951 op = safe_vector_operand (op, mode);
32953 /* If we aren't optimizing, only allow one memory operand to be
32954 generated. */
32955 if (memory_operand (op, mode))
32956 num_memory++;
32958 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32960 if (optimize
32961 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32962 || num_memory > 1)
32963 op = force_reg (mode, op);
32966 args[i].op = op;
32967 args[i].mode = mode;
32970 switch (nargs)
32972 case 1:
32973 pat = GEN_FCN (icode) (target, args[0].op);
32974 break;
32976 case 2:
32977 if (tf_p)
32978 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32979 GEN_INT ((int)sub_code));
32980 else if (! comparison_p)
32981 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32982 else
32984 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32985 args[0].op,
32986 args[1].op);
32988 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32990 break;
32992 case 3:
32993 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32994 break;
32996 case 4:
32997 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32998 break;
33000 default:
33001 gcc_unreachable ();
33004 if (! pat)
33005 return 0;
33007 emit_insn (pat);
33008 return target;
33011 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33012 insns with vec_merge. */
33014 static rtx
33015 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33016 rtx target)
33018 rtx pat;
33019 tree arg0 = CALL_EXPR_ARG (exp, 0);
33020 rtx op1, op0 = expand_normal (arg0);
33021 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33022 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33024 if (optimize || !target
33025 || GET_MODE (target) != tmode
33026 || !insn_data[icode].operand[0].predicate (target, tmode))
33027 target = gen_reg_rtx (tmode);
33029 if (VECTOR_MODE_P (mode0))
33030 op0 = safe_vector_operand (op0, mode0);
33032 if ((optimize && !register_operand (op0, mode0))
33033 || !insn_data[icode].operand[1].predicate (op0, mode0))
33034 op0 = copy_to_mode_reg (mode0, op0);
33036 op1 = op0;
33037 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33038 op1 = copy_to_mode_reg (mode0, op1);
33040 pat = GEN_FCN (icode) (target, op0, op1);
33041 if (! pat)
33042 return 0;
33043 emit_insn (pat);
33044 return target;
33047 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33049 static rtx
33050 ix86_expand_sse_compare (const struct builtin_description *d,
33051 tree exp, rtx target, bool swap)
33053 rtx pat;
33054 tree arg0 = CALL_EXPR_ARG (exp, 0);
33055 tree arg1 = CALL_EXPR_ARG (exp, 1);
33056 rtx op0 = expand_normal (arg0);
33057 rtx op1 = expand_normal (arg1);
33058 rtx op2;
33059 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33060 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33061 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33062 enum rtx_code comparison = d->comparison;
33064 if (VECTOR_MODE_P (mode0))
33065 op0 = safe_vector_operand (op0, mode0);
33066 if (VECTOR_MODE_P (mode1))
33067 op1 = safe_vector_operand (op1, mode1);
33069 /* Swap operands if we have a comparison that isn't available in
33070 hardware. */
33071 if (swap)
33073 rtx tmp = gen_reg_rtx (mode1);
33074 emit_move_insn (tmp, op1);
33075 op1 = op0;
33076 op0 = tmp;
33079 if (optimize || !target
33080 || GET_MODE (target) != tmode
33081 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33082 target = gen_reg_rtx (tmode);
33084 if ((optimize && !register_operand (op0, mode0))
33085 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33086 op0 = copy_to_mode_reg (mode0, op0);
33087 if ((optimize && !register_operand (op1, mode1))
33088 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33089 op1 = copy_to_mode_reg (mode1, op1);
33091 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33092 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33093 if (! pat)
33094 return 0;
33095 emit_insn (pat);
33096 return target;
33099 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33101 static rtx
33102 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33103 rtx target)
33105 rtx pat;
33106 tree arg0 = CALL_EXPR_ARG (exp, 0);
33107 tree arg1 = CALL_EXPR_ARG (exp, 1);
33108 rtx op0 = expand_normal (arg0);
33109 rtx op1 = expand_normal (arg1);
33110 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33111 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33112 enum rtx_code comparison = d->comparison;
33114 if (VECTOR_MODE_P (mode0))
33115 op0 = safe_vector_operand (op0, mode0);
33116 if (VECTOR_MODE_P (mode1))
33117 op1 = safe_vector_operand (op1, mode1);
33119 /* Swap operands if we have a comparison that isn't available in
33120 hardware. */
33121 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33123 rtx tmp = op1;
33124 op1 = op0;
33125 op0 = tmp;
33128 target = gen_reg_rtx (SImode);
33129 emit_move_insn (target, const0_rtx);
33130 target = gen_rtx_SUBREG (QImode, target, 0);
33132 if ((optimize && !register_operand (op0, mode0))
33133 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33134 op0 = copy_to_mode_reg (mode0, op0);
33135 if ((optimize && !register_operand (op1, mode1))
33136 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33137 op1 = copy_to_mode_reg (mode1, op1);
33139 pat = GEN_FCN (d->icode) (op0, op1);
33140 if (! pat)
33141 return 0;
33142 emit_insn (pat);
33143 emit_insn (gen_rtx_SET (VOIDmode,
33144 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33145 gen_rtx_fmt_ee (comparison, QImode,
33146 SET_DEST (pat),
33147 const0_rtx)));
33149 return SUBREG_REG (target);
33152 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33154 static rtx
33155 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33156 rtx target)
33158 rtx pat;
33159 tree arg0 = CALL_EXPR_ARG (exp, 0);
33160 rtx op1, op0 = expand_normal (arg0);
33161 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33162 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33164 if (optimize || target == 0
33165 || GET_MODE (target) != tmode
33166 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33167 target = gen_reg_rtx (tmode);
33169 if (VECTOR_MODE_P (mode0))
33170 op0 = safe_vector_operand (op0, mode0);
33172 if ((optimize && !register_operand (op0, mode0))
33173 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33174 op0 = copy_to_mode_reg (mode0, op0);
33176 op1 = GEN_INT (d->comparison);
33178 pat = GEN_FCN (d->icode) (target, op0, op1);
33179 if (! pat)
33180 return 0;
33181 emit_insn (pat);
33182 return target;
33185 static rtx
33186 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33187 tree exp, rtx target)
33189 rtx pat;
33190 tree arg0 = CALL_EXPR_ARG (exp, 0);
33191 tree arg1 = CALL_EXPR_ARG (exp, 1);
33192 rtx op0 = expand_normal (arg0);
33193 rtx op1 = expand_normal (arg1);
33194 rtx op2;
33195 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33196 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33197 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33199 if (optimize || target == 0
33200 || GET_MODE (target) != tmode
33201 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33202 target = gen_reg_rtx (tmode);
33204 op0 = safe_vector_operand (op0, mode0);
33205 op1 = safe_vector_operand (op1, mode1);
33207 if ((optimize && !register_operand (op0, mode0))
33208 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33209 op0 = copy_to_mode_reg (mode0, op0);
33210 if ((optimize && !register_operand (op1, mode1))
33211 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33212 op1 = copy_to_mode_reg (mode1, op1);
33214 op2 = GEN_INT (d->comparison);
33216 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33217 if (! pat)
33218 return 0;
33219 emit_insn (pat);
33220 return target;
33223 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33225 static rtx
33226 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33227 rtx target)
33229 rtx pat;
33230 tree arg0 = CALL_EXPR_ARG (exp, 0);
33231 tree arg1 = CALL_EXPR_ARG (exp, 1);
33232 rtx op0 = expand_normal (arg0);
33233 rtx op1 = expand_normal (arg1);
33234 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33235 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33236 enum rtx_code comparison = d->comparison;
33238 if (VECTOR_MODE_P (mode0))
33239 op0 = safe_vector_operand (op0, mode0);
33240 if (VECTOR_MODE_P (mode1))
33241 op1 = safe_vector_operand (op1, mode1);
33243 target = gen_reg_rtx (SImode);
33244 emit_move_insn (target, const0_rtx);
33245 target = gen_rtx_SUBREG (QImode, target, 0);
33247 if ((optimize && !register_operand (op0, mode0))
33248 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33249 op0 = copy_to_mode_reg (mode0, op0);
33250 if ((optimize && !register_operand (op1, mode1))
33251 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33252 op1 = copy_to_mode_reg (mode1, op1);
33254 pat = GEN_FCN (d->icode) (op0, op1);
33255 if (! pat)
33256 return 0;
33257 emit_insn (pat);
33258 emit_insn (gen_rtx_SET (VOIDmode,
33259 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33260 gen_rtx_fmt_ee (comparison, QImode,
33261 SET_DEST (pat),
33262 const0_rtx)));
33264 return SUBREG_REG (target);
33267 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33269 static rtx
33270 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33271 tree exp, rtx target)
33273 rtx pat;
33274 tree arg0 = CALL_EXPR_ARG (exp, 0);
33275 tree arg1 = CALL_EXPR_ARG (exp, 1);
33276 tree arg2 = CALL_EXPR_ARG (exp, 2);
33277 tree arg3 = CALL_EXPR_ARG (exp, 3);
33278 tree arg4 = CALL_EXPR_ARG (exp, 4);
33279 rtx scratch0, scratch1;
33280 rtx op0 = expand_normal (arg0);
33281 rtx op1 = expand_normal (arg1);
33282 rtx op2 = expand_normal (arg2);
33283 rtx op3 = expand_normal (arg3);
33284 rtx op4 = expand_normal (arg4);
33285 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33287 tmode0 = insn_data[d->icode].operand[0].mode;
33288 tmode1 = insn_data[d->icode].operand[1].mode;
33289 modev2 = insn_data[d->icode].operand[2].mode;
33290 modei3 = insn_data[d->icode].operand[3].mode;
33291 modev4 = insn_data[d->icode].operand[4].mode;
33292 modei5 = insn_data[d->icode].operand[5].mode;
33293 modeimm = insn_data[d->icode].operand[6].mode;
33295 if (VECTOR_MODE_P (modev2))
33296 op0 = safe_vector_operand (op0, modev2);
33297 if (VECTOR_MODE_P (modev4))
33298 op2 = safe_vector_operand (op2, modev4);
33300 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33301 op0 = copy_to_mode_reg (modev2, op0);
33302 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33303 op1 = copy_to_mode_reg (modei3, op1);
33304 if ((optimize && !register_operand (op2, modev4))
33305 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33306 op2 = copy_to_mode_reg (modev4, op2);
33307 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33308 op3 = copy_to_mode_reg (modei5, op3);
33310 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33312 error ("the fifth argument must be an 8-bit immediate");
33313 return const0_rtx;
33316 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33318 if (optimize || !target
33319 || GET_MODE (target) != tmode0
33320 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33321 target = gen_reg_rtx (tmode0);
33323 scratch1 = gen_reg_rtx (tmode1);
33325 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33327 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33329 if (optimize || !target
33330 || GET_MODE (target) != tmode1
33331 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33332 target = gen_reg_rtx (tmode1);
33334 scratch0 = gen_reg_rtx (tmode0);
33336 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33338 else
33340 gcc_assert (d->flag);
33342 scratch0 = gen_reg_rtx (tmode0);
33343 scratch1 = gen_reg_rtx (tmode1);
33345 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33348 if (! pat)
33349 return 0;
33351 emit_insn (pat);
33353 if (d->flag)
33355 target = gen_reg_rtx (SImode);
33356 emit_move_insn (target, const0_rtx);
33357 target = gen_rtx_SUBREG (QImode, target, 0);
33359 emit_insn
33360 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33361 gen_rtx_fmt_ee (EQ, QImode,
33362 gen_rtx_REG ((enum machine_mode) d->flag,
33363 FLAGS_REG),
33364 const0_rtx)));
33365 return SUBREG_REG (target);
33367 else
33368 return target;
33372 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33374 static rtx
33375 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33376 tree exp, rtx target)
33378 rtx pat;
33379 tree arg0 = CALL_EXPR_ARG (exp, 0);
33380 tree arg1 = CALL_EXPR_ARG (exp, 1);
33381 tree arg2 = CALL_EXPR_ARG (exp, 2);
33382 rtx scratch0, scratch1;
33383 rtx op0 = expand_normal (arg0);
33384 rtx op1 = expand_normal (arg1);
33385 rtx op2 = expand_normal (arg2);
33386 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33388 tmode0 = insn_data[d->icode].operand[0].mode;
33389 tmode1 = insn_data[d->icode].operand[1].mode;
33390 modev2 = insn_data[d->icode].operand[2].mode;
33391 modev3 = insn_data[d->icode].operand[3].mode;
33392 modeimm = insn_data[d->icode].operand[4].mode;
33394 if (VECTOR_MODE_P (modev2))
33395 op0 = safe_vector_operand (op0, modev2);
33396 if (VECTOR_MODE_P (modev3))
33397 op1 = safe_vector_operand (op1, modev3);
33399 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33400 op0 = copy_to_mode_reg (modev2, op0);
33401 if ((optimize && !register_operand (op1, modev3))
33402 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33403 op1 = copy_to_mode_reg (modev3, op1);
33405 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33407 error ("the third argument must be an 8-bit immediate");
33408 return const0_rtx;
33411 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33413 if (optimize || !target
33414 || GET_MODE (target) != tmode0
33415 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33416 target = gen_reg_rtx (tmode0);
33418 scratch1 = gen_reg_rtx (tmode1);
33420 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33422 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33424 if (optimize || !target
33425 || GET_MODE (target) != tmode1
33426 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33427 target = gen_reg_rtx (tmode1);
33429 scratch0 = gen_reg_rtx (tmode0);
33431 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33433 else
33435 gcc_assert (d->flag);
33437 scratch0 = gen_reg_rtx (tmode0);
33438 scratch1 = gen_reg_rtx (tmode1);
33440 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33443 if (! pat)
33444 return 0;
33446 emit_insn (pat);
33448 if (d->flag)
33450 target = gen_reg_rtx (SImode);
33451 emit_move_insn (target, const0_rtx);
33452 target = gen_rtx_SUBREG (QImode, target, 0);
33454 emit_insn
33455 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33456 gen_rtx_fmt_ee (EQ, QImode,
33457 gen_rtx_REG ((enum machine_mode) d->flag,
33458 FLAGS_REG),
33459 const0_rtx)));
33460 return SUBREG_REG (target);
33462 else
33463 return target;
33466 /* Subroutine of ix86_expand_builtin to take care of insns with
33467 variable number of operands. */
33469 static rtx
33470 ix86_expand_args_builtin (const struct builtin_description *d,
33471 tree exp, rtx target)
33473 rtx pat, real_target;
33474 unsigned int i, nargs;
33475 unsigned int nargs_constant = 0;
33476 unsigned int mask_pos = 0;
33477 int num_memory = 0;
33478 struct
33480 rtx op;
33481 enum machine_mode mode;
33482 } args[6];
33483 bool last_arg_count = false;
33484 enum insn_code icode = d->icode;
33485 const struct insn_data_d *insn_p = &insn_data[icode];
33486 enum machine_mode tmode = insn_p->operand[0].mode;
33487 enum machine_mode rmode = VOIDmode;
33488 bool swap = false;
33489 enum rtx_code comparison = d->comparison;
33491 switch ((enum ix86_builtin_func_type) d->flag)
33493 case V2DF_FTYPE_V2DF_ROUND:
33494 case V4DF_FTYPE_V4DF_ROUND:
33495 case V4SF_FTYPE_V4SF_ROUND:
33496 case V8SF_FTYPE_V8SF_ROUND:
33497 case V4SI_FTYPE_V4SF_ROUND:
33498 case V8SI_FTYPE_V8SF_ROUND:
33499 return ix86_expand_sse_round (d, exp, target);
33500 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33501 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33502 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33503 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33504 case INT_FTYPE_V8SF_V8SF_PTEST:
33505 case INT_FTYPE_V4DI_V4DI_PTEST:
33506 case INT_FTYPE_V4DF_V4DF_PTEST:
33507 case INT_FTYPE_V4SF_V4SF_PTEST:
33508 case INT_FTYPE_V2DI_V2DI_PTEST:
33509 case INT_FTYPE_V2DF_V2DF_PTEST:
33510 return ix86_expand_sse_ptest (d, exp, target);
33511 case FLOAT128_FTYPE_FLOAT128:
33512 case FLOAT_FTYPE_FLOAT:
33513 case INT_FTYPE_INT:
33514 case UINT64_FTYPE_INT:
33515 case UINT16_FTYPE_UINT16:
33516 case INT64_FTYPE_INT64:
33517 case INT64_FTYPE_V4SF:
33518 case INT64_FTYPE_V2DF:
33519 case INT_FTYPE_V16QI:
33520 case INT_FTYPE_V8QI:
33521 case INT_FTYPE_V8SF:
33522 case INT_FTYPE_V4DF:
33523 case INT_FTYPE_V4SF:
33524 case INT_FTYPE_V2DF:
33525 case INT_FTYPE_V32QI:
33526 case V16QI_FTYPE_V16QI:
33527 case V8SI_FTYPE_V8SF:
33528 case V8SI_FTYPE_V4SI:
33529 case V8HI_FTYPE_V8HI:
33530 case V8HI_FTYPE_V16QI:
33531 case V8QI_FTYPE_V8QI:
33532 case V8SF_FTYPE_V8SF:
33533 case V8SF_FTYPE_V8SI:
33534 case V8SF_FTYPE_V4SF:
33535 case V8SF_FTYPE_V8HI:
33536 case V4SI_FTYPE_V4SI:
33537 case V4SI_FTYPE_V16QI:
33538 case V4SI_FTYPE_V4SF:
33539 case V4SI_FTYPE_V8SI:
33540 case V4SI_FTYPE_V8HI:
33541 case V4SI_FTYPE_V4DF:
33542 case V4SI_FTYPE_V2DF:
33543 case V4HI_FTYPE_V4HI:
33544 case V4DF_FTYPE_V4DF:
33545 case V4DF_FTYPE_V4SI:
33546 case V4DF_FTYPE_V4SF:
33547 case V4DF_FTYPE_V2DF:
33548 case V4SF_FTYPE_V4SF:
33549 case V4SF_FTYPE_V4SI:
33550 case V4SF_FTYPE_V8SF:
33551 case V4SF_FTYPE_V4DF:
33552 case V4SF_FTYPE_V8HI:
33553 case V4SF_FTYPE_V2DF:
33554 case V2DI_FTYPE_V2DI:
33555 case V2DI_FTYPE_V16QI:
33556 case V2DI_FTYPE_V8HI:
33557 case V2DI_FTYPE_V4SI:
33558 case V2DF_FTYPE_V2DF:
33559 case V2DF_FTYPE_V4SI:
33560 case V2DF_FTYPE_V4DF:
33561 case V2DF_FTYPE_V4SF:
33562 case V2DF_FTYPE_V2SI:
33563 case V2SI_FTYPE_V2SI:
33564 case V2SI_FTYPE_V4SF:
33565 case V2SI_FTYPE_V2SF:
33566 case V2SI_FTYPE_V2DF:
33567 case V2SF_FTYPE_V2SF:
33568 case V2SF_FTYPE_V2SI:
33569 case V32QI_FTYPE_V32QI:
33570 case V32QI_FTYPE_V16QI:
33571 case V16HI_FTYPE_V16HI:
33572 case V16HI_FTYPE_V8HI:
33573 case V8SI_FTYPE_V8SI:
33574 case V16HI_FTYPE_V16QI:
33575 case V8SI_FTYPE_V16QI:
33576 case V4DI_FTYPE_V16QI:
33577 case V8SI_FTYPE_V8HI:
33578 case V4DI_FTYPE_V8HI:
33579 case V4DI_FTYPE_V4SI:
33580 case V4DI_FTYPE_V2DI:
33581 case HI_FTYPE_HI:
33582 case UINT_FTYPE_V2DF:
33583 case UINT_FTYPE_V4SF:
33584 case UINT64_FTYPE_V2DF:
33585 case UINT64_FTYPE_V4SF:
33586 case V16QI_FTYPE_V8DI:
33587 case V16HI_FTYPE_V16SI:
33588 case V16SI_FTYPE_HI:
33589 case V16SI_FTYPE_V16SI:
33590 case V16SI_FTYPE_INT:
33591 case V16SF_FTYPE_FLOAT:
33592 case V16SF_FTYPE_V4SF:
33593 case V16SF_FTYPE_V16SF:
33594 case V8HI_FTYPE_V8DI:
33595 case V8UHI_FTYPE_V8UHI:
33596 case V8SI_FTYPE_V8DI:
33597 case V8USI_FTYPE_V8USI:
33598 case V8SF_FTYPE_V8DF:
33599 case V8DI_FTYPE_QI:
33600 case V8DI_FTYPE_INT64:
33601 case V8DI_FTYPE_V4DI:
33602 case V8DI_FTYPE_V8DI:
33603 case V8DF_FTYPE_DOUBLE:
33604 case V8DF_FTYPE_V4DF:
33605 case V8DF_FTYPE_V8DF:
33606 case V8DF_FTYPE_V8SI:
33607 nargs = 1;
33608 break;
33609 case V4SF_FTYPE_V4SF_VEC_MERGE:
33610 case V2DF_FTYPE_V2DF_VEC_MERGE:
33611 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33612 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33613 case V16QI_FTYPE_V16QI_V16QI:
33614 case V16QI_FTYPE_V8HI_V8HI:
33615 case V16SI_FTYPE_V16SI_V16SI:
33616 case V16SF_FTYPE_V16SF_V16SF:
33617 case V16SF_FTYPE_V16SF_V16SI:
33618 case V8QI_FTYPE_V8QI_V8QI:
33619 case V8QI_FTYPE_V4HI_V4HI:
33620 case V8HI_FTYPE_V8HI_V8HI:
33621 case V8HI_FTYPE_V16QI_V16QI:
33622 case V8HI_FTYPE_V4SI_V4SI:
33623 case V8SF_FTYPE_V8SF_V8SF:
33624 case V8SF_FTYPE_V8SF_V8SI:
33625 case V8DI_FTYPE_V8DI_V8DI:
33626 case V8DF_FTYPE_V8DF_V8DF:
33627 case V8DF_FTYPE_V8DF_V8DI:
33628 case V4SI_FTYPE_V4SI_V4SI:
33629 case V4SI_FTYPE_V8HI_V8HI:
33630 case V4SI_FTYPE_V4SF_V4SF:
33631 case V4SI_FTYPE_V2DF_V2DF:
33632 case V4HI_FTYPE_V4HI_V4HI:
33633 case V4HI_FTYPE_V8QI_V8QI:
33634 case V4HI_FTYPE_V2SI_V2SI:
33635 case V4DF_FTYPE_V4DF_V4DF:
33636 case V4DF_FTYPE_V4DF_V4DI:
33637 case V4SF_FTYPE_V4SF_V4SF:
33638 case V4SF_FTYPE_V4SF_V4SI:
33639 case V4SF_FTYPE_V4SF_V2SI:
33640 case V4SF_FTYPE_V4SF_V2DF:
33641 case V4SF_FTYPE_V4SF_UINT:
33642 case V4SF_FTYPE_V4SF_UINT64:
33643 case V4SF_FTYPE_V4SF_DI:
33644 case V4SF_FTYPE_V4SF_SI:
33645 case V2DI_FTYPE_V2DI_V2DI:
33646 case V2DI_FTYPE_V16QI_V16QI:
33647 case V2DI_FTYPE_V4SI_V4SI:
33648 case V2UDI_FTYPE_V4USI_V4USI:
33649 case V2DI_FTYPE_V2DI_V16QI:
33650 case V2DI_FTYPE_V2DF_V2DF:
33651 case V2SI_FTYPE_V2SI_V2SI:
33652 case V2SI_FTYPE_V4HI_V4HI:
33653 case V2SI_FTYPE_V2SF_V2SF:
33654 case V2DF_FTYPE_V2DF_V2DF:
33655 case V2DF_FTYPE_V2DF_V4SF:
33656 case V2DF_FTYPE_V2DF_V2DI:
33657 case V2DF_FTYPE_V2DF_DI:
33658 case V2DF_FTYPE_V2DF_SI:
33659 case V2DF_FTYPE_V2DF_UINT:
33660 case V2DF_FTYPE_V2DF_UINT64:
33661 case V2SF_FTYPE_V2SF_V2SF:
33662 case V1DI_FTYPE_V1DI_V1DI:
33663 case V1DI_FTYPE_V8QI_V8QI:
33664 case V1DI_FTYPE_V2SI_V2SI:
33665 case V32QI_FTYPE_V16HI_V16HI:
33666 case V16HI_FTYPE_V8SI_V8SI:
33667 case V32QI_FTYPE_V32QI_V32QI:
33668 case V16HI_FTYPE_V32QI_V32QI:
33669 case V16HI_FTYPE_V16HI_V16HI:
33670 case V8SI_FTYPE_V4DF_V4DF:
33671 case V8SI_FTYPE_V8SI_V8SI:
33672 case V8SI_FTYPE_V16HI_V16HI:
33673 case V4DI_FTYPE_V4DI_V4DI:
33674 case V4DI_FTYPE_V8SI_V8SI:
33675 case V4UDI_FTYPE_V8USI_V8USI:
33676 case QI_FTYPE_V8DI_V8DI:
33677 case HI_FTYPE_V16SI_V16SI:
33678 if (comparison == UNKNOWN)
33679 return ix86_expand_binop_builtin (icode, exp, target);
33680 nargs = 2;
33681 break;
33682 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33683 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33684 gcc_assert (comparison != UNKNOWN);
33685 nargs = 2;
33686 swap = true;
33687 break;
33688 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33689 case V16HI_FTYPE_V16HI_SI_COUNT:
33690 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33691 case V8SI_FTYPE_V8SI_SI_COUNT:
33692 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33693 case V4DI_FTYPE_V4DI_INT_COUNT:
33694 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33695 case V8HI_FTYPE_V8HI_SI_COUNT:
33696 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33697 case V4SI_FTYPE_V4SI_SI_COUNT:
33698 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33699 case V4HI_FTYPE_V4HI_SI_COUNT:
33700 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33701 case V2DI_FTYPE_V2DI_SI_COUNT:
33702 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33703 case V2SI_FTYPE_V2SI_SI_COUNT:
33704 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33705 case V1DI_FTYPE_V1DI_SI_COUNT:
33706 nargs = 2;
33707 last_arg_count = true;
33708 break;
33709 case UINT64_FTYPE_UINT64_UINT64:
33710 case UINT_FTYPE_UINT_UINT:
33711 case UINT_FTYPE_UINT_USHORT:
33712 case UINT_FTYPE_UINT_UCHAR:
33713 case UINT16_FTYPE_UINT16_INT:
33714 case UINT8_FTYPE_UINT8_INT:
33715 case HI_FTYPE_HI_HI:
33716 case V16SI_FTYPE_V8DF_V8DF:
33717 nargs = 2;
33718 break;
33719 case V2DI_FTYPE_V2DI_INT_CONVERT:
33720 nargs = 2;
33721 rmode = V1TImode;
33722 nargs_constant = 1;
33723 break;
33724 case V4DI_FTYPE_V4DI_INT_CONVERT:
33725 nargs = 2;
33726 rmode = V2TImode;
33727 nargs_constant = 1;
33728 break;
33729 case V8HI_FTYPE_V8HI_INT:
33730 case V8HI_FTYPE_V8SF_INT:
33731 case V16HI_FTYPE_V16SF_INT:
33732 case V8HI_FTYPE_V4SF_INT:
33733 case V8SF_FTYPE_V8SF_INT:
33734 case V4SF_FTYPE_V16SF_INT:
33735 case V16SF_FTYPE_V16SF_INT:
33736 case V4SI_FTYPE_V4SI_INT:
33737 case V4SI_FTYPE_V8SI_INT:
33738 case V4HI_FTYPE_V4HI_INT:
33739 case V4DF_FTYPE_V4DF_INT:
33740 case V4DF_FTYPE_V8DF_INT:
33741 case V4SF_FTYPE_V4SF_INT:
33742 case V4SF_FTYPE_V8SF_INT:
33743 case V2DI_FTYPE_V2DI_INT:
33744 case V2DF_FTYPE_V2DF_INT:
33745 case V2DF_FTYPE_V4DF_INT:
33746 case V16HI_FTYPE_V16HI_INT:
33747 case V8SI_FTYPE_V8SI_INT:
33748 case V16SI_FTYPE_V16SI_INT:
33749 case V4SI_FTYPE_V16SI_INT:
33750 case V4DI_FTYPE_V4DI_INT:
33751 case V2DI_FTYPE_V4DI_INT:
33752 case V4DI_FTYPE_V8DI_INT:
33753 case HI_FTYPE_HI_INT:
33754 nargs = 2;
33755 nargs_constant = 1;
33756 break;
33757 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33758 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33759 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33760 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33761 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33762 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33763 case HI_FTYPE_V16SI_V16SI_HI:
33764 case QI_FTYPE_V8DI_V8DI_QI:
33765 case V16HI_FTYPE_V16SI_V16HI_HI:
33766 case V16QI_FTYPE_V16SI_V16QI_HI:
33767 case V16QI_FTYPE_V8DI_V16QI_QI:
33768 case V16SF_FTYPE_V16SF_V16SF_HI:
33769 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33770 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33771 case V16SF_FTYPE_V16SI_V16SF_HI:
33772 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33773 case V16SF_FTYPE_V4SF_V16SF_HI:
33774 case V16SI_FTYPE_SI_V16SI_HI:
33775 case V16SI_FTYPE_V16HI_V16SI_HI:
33776 case V16SI_FTYPE_V16QI_V16SI_HI:
33777 case V16SI_FTYPE_V16SF_V16SI_HI:
33778 case V16SI_FTYPE_V16SI_V16SI_HI:
33779 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33780 case V16SI_FTYPE_V4SI_V16SI_HI:
33781 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33782 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33783 case V8DF_FTYPE_V2DF_V8DF_QI:
33784 case V8DF_FTYPE_V4DF_V8DF_QI:
33785 case V8DF_FTYPE_V8DF_V8DF_QI:
33786 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33787 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33788 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33789 case V8DF_FTYPE_V8SF_V8DF_QI:
33790 case V8DF_FTYPE_V8SI_V8DF_QI:
33791 case V8DI_FTYPE_DI_V8DI_QI:
33792 case V8DI_FTYPE_V16QI_V8DI_QI:
33793 case V8DI_FTYPE_V2DI_V8DI_QI:
33794 case V8DI_FTYPE_V4DI_V8DI_QI:
33795 case V8DI_FTYPE_V8DI_V8DI_QI:
33796 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33797 case V8DI_FTYPE_V8HI_V8DI_QI:
33798 case V8DI_FTYPE_V8SI_V8DI_QI:
33799 case V8HI_FTYPE_V8DI_V8HI_QI:
33800 case V8SF_FTYPE_V8DF_V8SF_QI:
33801 case V8SI_FTYPE_V8DF_V8SI_QI:
33802 case V8SI_FTYPE_V8DI_V8SI_QI:
33803 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33804 nargs = 3;
33805 break;
33806 case V32QI_FTYPE_V32QI_V32QI_INT:
33807 case V16HI_FTYPE_V16HI_V16HI_INT:
33808 case V16QI_FTYPE_V16QI_V16QI_INT:
33809 case V4DI_FTYPE_V4DI_V4DI_INT:
33810 case V8HI_FTYPE_V8HI_V8HI_INT:
33811 case V8SI_FTYPE_V8SI_V8SI_INT:
33812 case V8SI_FTYPE_V8SI_V4SI_INT:
33813 case V8SF_FTYPE_V8SF_V8SF_INT:
33814 case V8SF_FTYPE_V8SF_V4SF_INT:
33815 case V4SI_FTYPE_V4SI_V4SI_INT:
33816 case V4DF_FTYPE_V4DF_V4DF_INT:
33817 case V16SF_FTYPE_V16SF_V16SF_INT:
33818 case V16SF_FTYPE_V16SF_V4SF_INT:
33819 case V16SI_FTYPE_V16SI_V4SI_INT:
33820 case V4DF_FTYPE_V4DF_V2DF_INT:
33821 case V4SF_FTYPE_V4SF_V4SF_INT:
33822 case V2DI_FTYPE_V2DI_V2DI_INT:
33823 case V4DI_FTYPE_V4DI_V2DI_INT:
33824 case V2DF_FTYPE_V2DF_V2DF_INT:
33825 case QI_FTYPE_V8DI_V8DI_INT:
33826 case QI_FTYPE_V8DF_V8DF_INT:
33827 case QI_FTYPE_V2DF_V2DF_INT:
33828 case QI_FTYPE_V4SF_V4SF_INT:
33829 case HI_FTYPE_V16SI_V16SI_INT:
33830 case HI_FTYPE_V16SF_V16SF_INT:
33831 nargs = 3;
33832 nargs_constant = 1;
33833 break;
33834 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33835 nargs = 3;
33836 rmode = V4DImode;
33837 nargs_constant = 1;
33838 break;
33839 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33840 nargs = 3;
33841 rmode = V2DImode;
33842 nargs_constant = 1;
33843 break;
33844 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33845 nargs = 3;
33846 rmode = DImode;
33847 nargs_constant = 1;
33848 break;
33849 case V2DI_FTYPE_V2DI_UINT_UINT:
33850 nargs = 3;
33851 nargs_constant = 2;
33852 break;
33853 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33854 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33855 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33856 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33857 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33858 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33859 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33860 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33861 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33862 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33863 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33864 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33865 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33866 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33867 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33868 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33869 nargs = 4;
33870 break;
33871 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33872 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33873 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33874 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33875 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33876 nargs = 4;
33877 nargs_constant = 1;
33878 break;
33879 case QI_FTYPE_V2DF_V2DF_INT_QI:
33880 case QI_FTYPE_V4SF_V4SF_INT_QI:
33881 nargs = 4;
33882 mask_pos = 1;
33883 nargs_constant = 1;
33884 break;
33885 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33886 nargs = 4;
33887 nargs_constant = 2;
33888 break;
33889 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33890 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33891 nargs = 4;
33892 break;
33893 case QI_FTYPE_V8DI_V8DI_INT_QI:
33894 case HI_FTYPE_V16SI_V16SI_INT_HI:
33895 case QI_FTYPE_V8DF_V8DF_INT_QI:
33896 case HI_FTYPE_V16SF_V16SF_INT_HI:
33897 mask_pos = 1;
33898 nargs = 4;
33899 nargs_constant = 1;
33900 break;
33901 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33902 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33903 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33904 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33905 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33906 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33907 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33908 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33909 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33910 nargs = 4;
33911 mask_pos = 2;
33912 nargs_constant = 1;
33913 break;
33914 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33915 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33916 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33917 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33918 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33919 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33920 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33921 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33922 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33923 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33924 nargs = 5;
33925 mask_pos = 2;
33926 nargs_constant = 1;
33927 break;
33928 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33929 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33930 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33931 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33932 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33933 nargs = 5;
33934 mask_pos = 1;
33935 nargs_constant = 1;
33936 break;
33938 default:
33939 gcc_unreachable ();
33942 gcc_assert (nargs <= ARRAY_SIZE (args));
33944 if (comparison != UNKNOWN)
33946 gcc_assert (nargs == 2);
33947 return ix86_expand_sse_compare (d, exp, target, swap);
33950 if (rmode == VOIDmode || rmode == tmode)
33952 if (optimize
33953 || target == 0
33954 || GET_MODE (target) != tmode
33955 || !insn_p->operand[0].predicate (target, tmode))
33956 target = gen_reg_rtx (tmode);
33957 real_target = target;
33959 else
33961 real_target = gen_reg_rtx (tmode);
33962 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
33965 for (i = 0; i < nargs; i++)
33967 tree arg = CALL_EXPR_ARG (exp, i);
33968 rtx op = expand_normal (arg);
33969 enum machine_mode mode = insn_p->operand[i + 1].mode;
33970 bool match = insn_p->operand[i + 1].predicate (op, mode);
33972 if (last_arg_count && (i + 1) == nargs)
33974 /* SIMD shift insns take either an 8-bit immediate or
33975 register as count. But builtin functions take int as
33976 count. If count doesn't match, we put it in register. */
33977 if (!match)
33979 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
33980 if (!insn_p->operand[i + 1].predicate (op, mode))
33981 op = copy_to_reg (op);
33984 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33985 (!mask_pos && (nargs - i) <= nargs_constant))
33987 if (!match)
33988 switch (icode)
33990 case CODE_FOR_avx2_inserti128:
33991 case CODE_FOR_avx2_extracti128:
33992 error ("the last argument must be an 1-bit immediate");
33993 return const0_rtx;
33995 case CODE_FOR_avx512f_cmpv8di3_mask:
33996 case CODE_FOR_avx512f_cmpv16si3_mask:
33997 case CODE_FOR_avx512f_ucmpv8di3_mask:
33998 case CODE_FOR_avx512f_ucmpv16si3_mask:
33999 error ("the last argument must be a 3-bit immediate");
34000 return const0_rtx;
34002 case CODE_FOR_sse4_1_roundsd:
34003 case CODE_FOR_sse4_1_roundss:
34005 case CODE_FOR_sse4_1_roundpd:
34006 case CODE_FOR_sse4_1_roundps:
34007 case CODE_FOR_avx_roundpd256:
34008 case CODE_FOR_avx_roundps256:
34010 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34011 case CODE_FOR_sse4_1_roundps_sfix:
34012 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34013 case CODE_FOR_avx_roundps_sfix256:
34015 case CODE_FOR_sse4_1_blendps:
34016 case CODE_FOR_avx_blendpd256:
34017 case CODE_FOR_avx_vpermilv4df:
34018 case CODE_FOR_avx512f_getmantv8df_mask:
34019 case CODE_FOR_avx512f_getmantv16sf_mask:
34020 error ("the last argument must be a 4-bit immediate");
34021 return const0_rtx;
34023 case CODE_FOR_sha1rnds4:
34024 case CODE_FOR_sse4_1_blendpd:
34025 case CODE_FOR_avx_vpermilv2df:
34026 case CODE_FOR_xop_vpermil2v2df3:
34027 case CODE_FOR_xop_vpermil2v4sf3:
34028 case CODE_FOR_xop_vpermil2v4df3:
34029 case CODE_FOR_xop_vpermil2v8sf3:
34030 case CODE_FOR_avx512f_vinsertf32x4_mask:
34031 case CODE_FOR_avx512f_vinserti32x4_mask:
34032 case CODE_FOR_avx512f_vextractf32x4_mask:
34033 case CODE_FOR_avx512f_vextracti32x4_mask:
34034 error ("the last argument must be a 2-bit immediate");
34035 return const0_rtx;
34037 case CODE_FOR_avx_vextractf128v4df:
34038 case CODE_FOR_avx_vextractf128v8sf:
34039 case CODE_FOR_avx_vextractf128v8si:
34040 case CODE_FOR_avx_vinsertf128v4df:
34041 case CODE_FOR_avx_vinsertf128v8sf:
34042 case CODE_FOR_avx_vinsertf128v8si:
34043 case CODE_FOR_avx512f_vinsertf64x4_mask:
34044 case CODE_FOR_avx512f_vinserti64x4_mask:
34045 case CODE_FOR_avx512f_vextractf64x4_mask:
34046 case CODE_FOR_avx512f_vextracti64x4_mask:
34047 error ("the last argument must be a 1-bit immediate");
34048 return const0_rtx;
34050 case CODE_FOR_avx_vmcmpv2df3:
34051 case CODE_FOR_avx_vmcmpv4sf3:
34052 case CODE_FOR_avx_cmpv2df3:
34053 case CODE_FOR_avx_cmpv4sf3:
34054 case CODE_FOR_avx_cmpv4df3:
34055 case CODE_FOR_avx_cmpv8sf3:
34056 case CODE_FOR_avx512f_cmpv8df3_mask:
34057 case CODE_FOR_avx512f_cmpv16sf3_mask:
34058 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34059 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34060 error ("the last argument must be a 5-bit immediate");
34061 return const0_rtx;
34063 default:
34064 switch (nargs_constant)
34066 case 2:
34067 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34068 (!mask_pos && (nargs - i) == nargs_constant))
34070 error ("the next to last argument must be an 8-bit immediate");
34071 break;
34073 case 1:
34074 error ("the last argument must be an 8-bit immediate");
34075 break;
34076 default:
34077 gcc_unreachable ();
34079 return const0_rtx;
34082 else
34084 if (VECTOR_MODE_P (mode))
34085 op = safe_vector_operand (op, mode);
34087 /* If we aren't optimizing, only allow one memory operand to
34088 be generated. */
34089 if (memory_operand (op, mode))
34090 num_memory++;
34092 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34094 if (optimize || !match || num_memory > 1)
34095 op = copy_to_mode_reg (mode, op);
34097 else
34099 op = copy_to_reg (op);
34100 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34104 args[i].op = op;
34105 args[i].mode = mode;
34108 switch (nargs)
34110 case 1:
34111 pat = GEN_FCN (icode) (real_target, args[0].op);
34112 break;
34113 case 2:
34114 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34115 break;
34116 case 3:
34117 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34118 args[2].op);
34119 break;
34120 case 4:
34121 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34122 args[2].op, args[3].op);
34123 break;
34124 case 5:
34125 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34126 args[2].op, args[3].op, args[4].op);
34127 case 6:
34128 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34129 args[2].op, args[3].op, args[4].op,
34130 args[5].op);
34131 break;
34132 default:
34133 gcc_unreachable ();
34136 if (! pat)
34137 return 0;
34139 emit_insn (pat);
34140 return target;
34143 /* Transform pattern of following layout:
34144 (parallel [
34145 set (A B)
34146 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34148 into:
34149 (set (A B))
34152 (parallel [ A B
34154 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34157 into:
34158 (parallel [ A B ... ]) */
34160 static rtx
34161 ix86_erase_embedded_rounding (rtx pat)
34163 if (GET_CODE (pat) == INSN)
34164 pat = PATTERN (pat);
34166 gcc_assert (GET_CODE (pat) == PARALLEL);
34168 if (XVECLEN (pat, 0) == 2)
34170 rtx p0 = XVECEXP (pat, 0, 0);
34171 rtx p1 = XVECEXP (pat, 0, 1);
34173 gcc_assert (GET_CODE (p0) == SET
34174 && GET_CODE (p1) == UNSPEC
34175 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34177 return p0;
34179 else
34181 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34182 int i = 0;
34183 int j = 0;
34185 for (; i < XVECLEN (pat, 0); ++i)
34187 rtx elem = XVECEXP (pat, 0, i);
34188 if (GET_CODE (elem) != UNSPEC
34189 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34190 res [j++] = elem;
34193 /* No more than 1 occurence was removed. */
34194 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34196 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34200 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34201 with rounding. */
34202 static rtx
34203 ix86_expand_sse_comi_round (const struct builtin_description *d,
34204 tree exp, rtx target)
34206 rtx pat, set_dst;
34207 tree arg0 = CALL_EXPR_ARG (exp, 0);
34208 tree arg1 = CALL_EXPR_ARG (exp, 1);
34209 tree arg2 = CALL_EXPR_ARG (exp, 2);
34210 tree arg3 = CALL_EXPR_ARG (exp, 3);
34211 rtx op0 = expand_normal (arg0);
34212 rtx op1 = expand_normal (arg1);
34213 rtx op2 = expand_normal (arg2);
34214 rtx op3 = expand_normal (arg3);
34215 enum insn_code icode = d->icode;
34216 const struct insn_data_d *insn_p = &insn_data[icode];
34217 enum machine_mode mode0 = insn_p->operand[0].mode;
34218 enum machine_mode mode1 = insn_p->operand[1].mode;
34219 enum rtx_code comparison = UNEQ;
34220 bool need_ucomi = false;
34222 /* See avxintrin.h for values. */
34223 enum rtx_code comi_comparisons[32] =
34225 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34226 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34227 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34229 bool need_ucomi_values[32] =
34231 true, false, false, true, true, false, false, true,
34232 true, false, false, true, true, false, false, true,
34233 false, true, true, false, false, true, true, false,
34234 false, true, true, false, false, true, true, false
34237 if (!CONST_INT_P (op2))
34239 error ("the third argument must be comparison constant");
34240 return const0_rtx;
34242 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34244 error ("incorect comparison mode");
34245 return const0_rtx;
34248 if (!insn_p->operand[2].predicate (op3, SImode))
34250 error ("incorrect rounding operand");
34251 return const0_rtx;
34254 comparison = comi_comparisons[INTVAL (op2)];
34255 need_ucomi = need_ucomi_values[INTVAL (op2)];
34257 if (VECTOR_MODE_P (mode0))
34258 op0 = safe_vector_operand (op0, mode0);
34259 if (VECTOR_MODE_P (mode1))
34260 op1 = safe_vector_operand (op1, mode1);
34262 target = gen_reg_rtx (SImode);
34263 emit_move_insn (target, const0_rtx);
34264 target = gen_rtx_SUBREG (QImode, target, 0);
34266 if ((optimize && !register_operand (op0, mode0))
34267 || !insn_p->operand[0].predicate (op0, mode0))
34268 op0 = copy_to_mode_reg (mode0, op0);
34269 if ((optimize && !register_operand (op1, mode1))
34270 || !insn_p->operand[1].predicate (op1, mode1))
34271 op1 = copy_to_mode_reg (mode1, op1);
34273 if (need_ucomi)
34274 icode = icode == CODE_FOR_sse_comi_round
34275 ? CODE_FOR_sse_ucomi_round
34276 : CODE_FOR_sse2_ucomi_round;
34278 pat = GEN_FCN (icode) (op0, op1, op3);
34279 if (! pat)
34280 return 0;
34282 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34283 if (INTVAL (op3) == NO_ROUND)
34285 pat = ix86_erase_embedded_rounding (pat);
34286 if (! pat)
34287 return 0;
34289 set_dst = SET_DEST (pat);
34291 else
34293 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34294 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34297 emit_insn (pat);
34298 emit_insn (gen_rtx_SET (VOIDmode,
34299 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34300 gen_rtx_fmt_ee (comparison, QImode,
34301 set_dst,
34302 const0_rtx)));
34304 return SUBREG_REG (target);
34307 static rtx
34308 ix86_expand_round_builtin (const struct builtin_description *d,
34309 tree exp, rtx target)
34311 rtx pat;
34312 unsigned int i, nargs;
34313 struct
34315 rtx op;
34316 enum machine_mode mode;
34317 } args[6];
34318 enum insn_code icode = d->icode;
34319 const struct insn_data_d *insn_p = &insn_data[icode];
34320 enum machine_mode tmode = insn_p->operand[0].mode;
34321 unsigned int nargs_constant = 0;
34322 unsigned int redundant_embed_rnd = 0;
34324 switch ((enum ix86_builtin_func_type) d->flag)
34326 case UINT64_FTYPE_V2DF_INT:
34327 case UINT64_FTYPE_V4SF_INT:
34328 case UINT_FTYPE_V2DF_INT:
34329 case UINT_FTYPE_V4SF_INT:
34330 case INT64_FTYPE_V2DF_INT:
34331 case INT64_FTYPE_V4SF_INT:
34332 case INT_FTYPE_V2DF_INT:
34333 case INT_FTYPE_V4SF_INT:
34334 nargs = 2;
34335 break;
34336 case V4SF_FTYPE_V4SF_UINT_INT:
34337 case V4SF_FTYPE_V4SF_UINT64_INT:
34338 case V2DF_FTYPE_V2DF_UINT64_INT:
34339 case V4SF_FTYPE_V4SF_INT_INT:
34340 case V4SF_FTYPE_V4SF_INT64_INT:
34341 case V2DF_FTYPE_V2DF_INT64_INT:
34342 case V4SF_FTYPE_V4SF_V4SF_INT:
34343 case V2DF_FTYPE_V2DF_V2DF_INT:
34344 case V4SF_FTYPE_V4SF_V2DF_INT:
34345 case V2DF_FTYPE_V2DF_V4SF_INT:
34346 nargs = 3;
34347 break;
34348 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34349 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34350 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34351 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34352 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34353 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34354 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34355 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34356 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34357 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34358 nargs = 4;
34359 break;
34360 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34361 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34362 nargs_constant = 2;
34363 nargs = 4;
34364 break;
34365 case INT_FTYPE_V4SF_V4SF_INT_INT:
34366 case INT_FTYPE_V2DF_V2DF_INT_INT:
34367 return ix86_expand_sse_comi_round (d, exp, target);
34368 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34369 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34370 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34371 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34372 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34373 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34374 nargs = 5;
34375 break;
34376 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34377 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34378 nargs_constant = 4;
34379 nargs = 5;
34380 break;
34381 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34382 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34383 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34384 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34385 nargs_constant = 3;
34386 nargs = 5;
34387 break;
34388 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34389 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34390 nargs = 6;
34391 nargs_constant = 4;
34392 break;
34393 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34394 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34395 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34396 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34397 nargs = 6;
34398 nargs_constant = 3;
34399 break;
34400 default:
34401 gcc_unreachable ();
34403 gcc_assert (nargs <= ARRAY_SIZE (args));
34405 if (optimize
34406 || target == 0
34407 || GET_MODE (target) != tmode
34408 || !insn_p->operand[0].predicate (target, tmode))
34409 target = gen_reg_rtx (tmode);
34411 for (i = 0; i < nargs; i++)
34413 tree arg = CALL_EXPR_ARG (exp, i);
34414 rtx op = expand_normal (arg);
34415 enum machine_mode mode = insn_p->operand[i + 1].mode;
34416 bool match = insn_p->operand[i + 1].predicate (op, mode);
34418 if (i == nargs - nargs_constant)
34420 if (!match)
34422 switch (icode)
34424 case CODE_FOR_avx512f_getmantv8df_mask_round:
34425 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34426 case CODE_FOR_avx512f_getmantv2df_round:
34427 case CODE_FOR_avx512f_getmantv4sf_round:
34428 error ("the immediate argument must be a 4-bit immediate");
34429 return const0_rtx;
34430 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34431 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34432 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34433 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34434 error ("the immediate argument must be a 5-bit immediate");
34435 return const0_rtx;
34436 default:
34437 error ("the immediate argument must be an 8-bit immediate");
34438 return const0_rtx;
34442 else if (i == nargs-1)
34444 if (!insn_p->operand[nargs].predicate (op, SImode))
34446 error ("incorrect rounding operand");
34447 return const0_rtx;
34450 /* If there is no rounding use normal version of the pattern. */
34451 if (INTVAL (op) == NO_ROUND)
34452 redundant_embed_rnd = 1;
34454 else
34456 if (VECTOR_MODE_P (mode))
34457 op = safe_vector_operand (op, mode);
34459 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34461 if (optimize || !match)
34462 op = copy_to_mode_reg (mode, op);
34464 else
34466 op = copy_to_reg (op);
34467 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34471 args[i].op = op;
34472 args[i].mode = mode;
34475 switch (nargs)
34477 case 1:
34478 pat = GEN_FCN (icode) (target, args[0].op);
34479 break;
34480 case 2:
34481 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34482 break;
34483 case 3:
34484 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34485 args[2].op);
34486 break;
34487 case 4:
34488 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34489 args[2].op, args[3].op);
34490 break;
34491 case 5:
34492 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34493 args[2].op, args[3].op, args[4].op);
34494 case 6:
34495 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34496 args[2].op, args[3].op, args[4].op,
34497 args[5].op);
34498 break;
34499 default:
34500 gcc_unreachable ();
34503 if (!pat)
34504 return 0;
34506 if (redundant_embed_rnd)
34507 pat = ix86_erase_embedded_rounding (pat);
34509 emit_insn (pat);
34510 return target;
34513 /* Subroutine of ix86_expand_builtin to take care of special insns
34514 with variable number of operands. */
34516 static rtx
34517 ix86_expand_special_args_builtin (const struct builtin_description *d,
34518 tree exp, rtx target)
34520 tree arg;
34521 rtx pat, op;
34522 unsigned int i, nargs, arg_adjust, memory;
34523 bool aligned_mem = false;
34524 struct
34526 rtx op;
34527 enum machine_mode mode;
34528 } args[3];
34529 enum insn_code icode = d->icode;
34530 bool last_arg_constant = false;
34531 const struct insn_data_d *insn_p = &insn_data[icode];
34532 enum machine_mode tmode = insn_p->operand[0].mode;
34533 enum { load, store } klass;
34535 switch ((enum ix86_builtin_func_type) d->flag)
34537 case VOID_FTYPE_VOID:
34538 emit_insn (GEN_FCN (icode) (target));
34539 return 0;
34540 case VOID_FTYPE_UINT64:
34541 case VOID_FTYPE_UNSIGNED:
34542 nargs = 0;
34543 klass = store;
34544 memory = 0;
34545 break;
34547 case INT_FTYPE_VOID:
34548 case UINT64_FTYPE_VOID:
34549 case UNSIGNED_FTYPE_VOID:
34550 nargs = 0;
34551 klass = load;
34552 memory = 0;
34553 break;
34554 case UINT64_FTYPE_PUNSIGNED:
34555 case V2DI_FTYPE_PV2DI:
34556 case V4DI_FTYPE_PV4DI:
34557 case V32QI_FTYPE_PCCHAR:
34558 case V16QI_FTYPE_PCCHAR:
34559 case V8SF_FTYPE_PCV4SF:
34560 case V8SF_FTYPE_PCFLOAT:
34561 case V4SF_FTYPE_PCFLOAT:
34562 case V4DF_FTYPE_PCV2DF:
34563 case V4DF_FTYPE_PCDOUBLE:
34564 case V2DF_FTYPE_PCDOUBLE:
34565 case VOID_FTYPE_PVOID:
34566 case V16SI_FTYPE_PV4SI:
34567 case V16SF_FTYPE_PV4SF:
34568 case V8DI_FTYPE_PV4DI:
34569 case V8DI_FTYPE_PV8DI:
34570 case V8DF_FTYPE_PV4DF:
34571 nargs = 1;
34572 klass = load;
34573 memory = 0;
34574 switch (icode)
34576 case CODE_FOR_sse4_1_movntdqa:
34577 case CODE_FOR_avx2_movntdqa:
34578 case CODE_FOR_avx512f_movntdqa:
34579 aligned_mem = true;
34580 break;
34581 default:
34582 break;
34584 break;
34585 case VOID_FTYPE_PV2SF_V4SF:
34586 case VOID_FTYPE_PV8DI_V8DI:
34587 case VOID_FTYPE_PV4DI_V4DI:
34588 case VOID_FTYPE_PV2DI_V2DI:
34589 case VOID_FTYPE_PCHAR_V32QI:
34590 case VOID_FTYPE_PCHAR_V16QI:
34591 case VOID_FTYPE_PFLOAT_V16SF:
34592 case VOID_FTYPE_PFLOAT_V8SF:
34593 case VOID_FTYPE_PFLOAT_V4SF:
34594 case VOID_FTYPE_PDOUBLE_V8DF:
34595 case VOID_FTYPE_PDOUBLE_V4DF:
34596 case VOID_FTYPE_PDOUBLE_V2DF:
34597 case VOID_FTYPE_PLONGLONG_LONGLONG:
34598 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34599 case VOID_FTYPE_PINT_INT:
34600 nargs = 1;
34601 klass = store;
34602 /* Reserve memory operand for target. */
34603 memory = ARRAY_SIZE (args);
34604 switch (icode)
34606 /* These builtins and instructions require the memory
34607 to be properly aligned. */
34608 case CODE_FOR_avx_movntv4di:
34609 case CODE_FOR_sse2_movntv2di:
34610 case CODE_FOR_avx_movntv8sf:
34611 case CODE_FOR_sse_movntv4sf:
34612 case CODE_FOR_sse4a_vmmovntv4sf:
34613 case CODE_FOR_avx_movntv4df:
34614 case CODE_FOR_sse2_movntv2df:
34615 case CODE_FOR_sse4a_vmmovntv2df:
34616 case CODE_FOR_sse2_movntidi:
34617 case CODE_FOR_sse_movntq:
34618 case CODE_FOR_sse2_movntisi:
34619 case CODE_FOR_avx512f_movntv16sf:
34620 case CODE_FOR_avx512f_movntv8df:
34621 case CODE_FOR_avx512f_movntv8di:
34622 aligned_mem = true;
34623 break;
34624 default:
34625 break;
34627 break;
34628 case V4SF_FTYPE_V4SF_PCV2SF:
34629 case V2DF_FTYPE_V2DF_PCDOUBLE:
34630 nargs = 2;
34631 klass = load;
34632 memory = 1;
34633 break;
34634 case V8SF_FTYPE_PCV8SF_V8SI:
34635 case V4DF_FTYPE_PCV4DF_V4DI:
34636 case V4SF_FTYPE_PCV4SF_V4SI:
34637 case V2DF_FTYPE_PCV2DF_V2DI:
34638 case V8SI_FTYPE_PCV8SI_V8SI:
34639 case V4DI_FTYPE_PCV4DI_V4DI:
34640 case V4SI_FTYPE_PCV4SI_V4SI:
34641 case V2DI_FTYPE_PCV2DI_V2DI:
34642 nargs = 2;
34643 klass = load;
34644 memory = 0;
34645 break;
34646 case VOID_FTYPE_PV8DF_V8DF_QI:
34647 case VOID_FTYPE_PV16SF_V16SF_HI:
34648 case VOID_FTYPE_PV8DI_V8DI_QI:
34649 case VOID_FTYPE_PV16SI_V16SI_HI:
34650 switch (icode)
34652 /* These builtins and instructions require the memory
34653 to be properly aligned. */
34654 case CODE_FOR_avx512f_storev16sf_mask:
34655 case CODE_FOR_avx512f_storev16si_mask:
34656 case CODE_FOR_avx512f_storev8df_mask:
34657 case CODE_FOR_avx512f_storev8di_mask:
34658 aligned_mem = true;
34659 break;
34660 default:
34661 break;
34663 /* FALLTHRU */
34664 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34665 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34666 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34667 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34668 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34669 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34670 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34671 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34672 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34673 case VOID_FTYPE_PFLOAT_V4SF_QI:
34674 case VOID_FTYPE_PV8SI_V8DI_QI:
34675 case VOID_FTYPE_PV8HI_V8DI_QI:
34676 case VOID_FTYPE_PV16HI_V16SI_HI:
34677 case VOID_FTYPE_PV16QI_V8DI_QI:
34678 case VOID_FTYPE_PV16QI_V16SI_HI:
34679 nargs = 2;
34680 klass = store;
34681 /* Reserve memory operand for target. */
34682 memory = ARRAY_SIZE (args);
34683 break;
34684 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34685 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34686 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34687 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34688 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34689 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34690 nargs = 3;
34691 klass = load;
34692 memory = 0;
34693 switch (icode)
34695 /* These builtins and instructions require the memory
34696 to be properly aligned. */
34697 case CODE_FOR_avx512f_loadv16sf_mask:
34698 case CODE_FOR_avx512f_loadv16si_mask:
34699 case CODE_FOR_avx512f_loadv8df_mask:
34700 case CODE_FOR_avx512f_loadv8di_mask:
34701 aligned_mem = true;
34702 break;
34703 default:
34704 break;
34706 break;
34707 case VOID_FTYPE_UINT_UINT_UINT:
34708 case VOID_FTYPE_UINT64_UINT_UINT:
34709 case UCHAR_FTYPE_UINT_UINT_UINT:
34710 case UCHAR_FTYPE_UINT64_UINT_UINT:
34711 nargs = 3;
34712 klass = load;
34713 memory = ARRAY_SIZE (args);
34714 last_arg_constant = true;
34715 break;
34716 default:
34717 gcc_unreachable ();
34720 gcc_assert (nargs <= ARRAY_SIZE (args));
34722 if (klass == store)
34724 arg = CALL_EXPR_ARG (exp, 0);
34725 op = expand_normal (arg);
34726 gcc_assert (target == 0);
34727 if (memory)
34729 op = ix86_zero_extend_to_Pmode (op);
34730 target = gen_rtx_MEM (tmode, op);
34731 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34732 on it. Try to improve it using get_pointer_alignment,
34733 and if the special builtin is one that requires strict
34734 mode alignment, also from it's GET_MODE_ALIGNMENT.
34735 Failure to do so could lead to ix86_legitimate_combined_insn
34736 rejecting all changes to such insns. */
34737 unsigned int align = get_pointer_alignment (arg);
34738 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34739 align = GET_MODE_ALIGNMENT (tmode);
34740 if (MEM_ALIGN (target) < align)
34741 set_mem_align (target, align);
34743 else
34744 target = force_reg (tmode, op);
34745 arg_adjust = 1;
34747 else
34749 arg_adjust = 0;
34750 if (optimize
34751 || target == 0
34752 || !register_operand (target, tmode)
34753 || GET_MODE (target) != tmode)
34754 target = gen_reg_rtx (tmode);
34757 for (i = 0; i < nargs; i++)
34759 enum machine_mode mode = insn_p->operand[i + 1].mode;
34760 bool match;
34762 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34763 op = expand_normal (arg);
34764 match = insn_p->operand[i + 1].predicate (op, mode);
34766 if (last_arg_constant && (i + 1) == nargs)
34768 if (!match)
34770 if (icode == CODE_FOR_lwp_lwpvalsi3
34771 || icode == CODE_FOR_lwp_lwpinssi3
34772 || icode == CODE_FOR_lwp_lwpvaldi3
34773 || icode == CODE_FOR_lwp_lwpinsdi3)
34774 error ("the last argument must be a 32-bit immediate");
34775 else
34776 error ("the last argument must be an 8-bit immediate");
34777 return const0_rtx;
34780 else
34782 if (i == memory)
34784 /* This must be the memory operand. */
34785 op = ix86_zero_extend_to_Pmode (op);
34786 op = gen_rtx_MEM (mode, op);
34787 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34788 on it. Try to improve it using get_pointer_alignment,
34789 and if the special builtin is one that requires strict
34790 mode alignment, also from it's GET_MODE_ALIGNMENT.
34791 Failure to do so could lead to ix86_legitimate_combined_insn
34792 rejecting all changes to such insns. */
34793 unsigned int align = get_pointer_alignment (arg);
34794 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34795 align = GET_MODE_ALIGNMENT (mode);
34796 if (MEM_ALIGN (op) < align)
34797 set_mem_align (op, align);
34799 else
34801 /* This must be register. */
34802 if (VECTOR_MODE_P (mode))
34803 op = safe_vector_operand (op, mode);
34805 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34806 op = copy_to_mode_reg (mode, op);
34807 else
34809 op = copy_to_reg (op);
34810 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34815 args[i].op = op;
34816 args[i].mode = mode;
34819 switch (nargs)
34821 case 0:
34822 pat = GEN_FCN (icode) (target);
34823 break;
34824 case 1:
34825 pat = GEN_FCN (icode) (target, args[0].op);
34826 break;
34827 case 2:
34828 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34829 break;
34830 case 3:
34831 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34832 break;
34833 default:
34834 gcc_unreachable ();
34837 if (! pat)
34838 return 0;
34839 emit_insn (pat);
34840 return klass == store ? 0 : target;
34843 /* Return the integer constant in ARG. Constrain it to be in the range
34844 of the subparts of VEC_TYPE; issue an error if not. */
34846 static int
34847 get_element_number (tree vec_type, tree arg)
34849 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34851 if (!tree_fits_uhwi_p (arg)
34852 || (elt = tree_to_uhwi (arg), elt > max))
34854 error ("selector must be an integer constant in the range 0..%wi", max);
34855 return 0;
34858 return elt;
34861 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34862 ix86_expand_vector_init. We DO have language-level syntax for this, in
34863 the form of (type){ init-list }. Except that since we can't place emms
34864 instructions from inside the compiler, we can't allow the use of MMX
34865 registers unless the user explicitly asks for it. So we do *not* define
34866 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34867 we have builtins invoked by mmintrin.h that gives us license to emit
34868 these sorts of instructions. */
34870 static rtx
34871 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34873 enum machine_mode tmode = TYPE_MODE (type);
34874 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34875 int i, n_elt = GET_MODE_NUNITS (tmode);
34876 rtvec v = rtvec_alloc (n_elt);
34878 gcc_assert (VECTOR_MODE_P (tmode));
34879 gcc_assert (call_expr_nargs (exp) == n_elt);
34881 for (i = 0; i < n_elt; ++i)
34883 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34884 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34887 if (!target || !register_operand (target, tmode))
34888 target = gen_reg_rtx (tmode);
34890 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34891 return target;
34894 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34895 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34896 had a language-level syntax for referencing vector elements. */
34898 static rtx
34899 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34901 enum machine_mode tmode, mode0;
34902 tree arg0, arg1;
34903 int elt;
34904 rtx op0;
34906 arg0 = CALL_EXPR_ARG (exp, 0);
34907 arg1 = CALL_EXPR_ARG (exp, 1);
34909 op0 = expand_normal (arg0);
34910 elt = get_element_number (TREE_TYPE (arg0), arg1);
34912 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34913 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34914 gcc_assert (VECTOR_MODE_P (mode0));
34916 op0 = force_reg (mode0, op0);
34918 if (optimize || !target || !register_operand (target, tmode))
34919 target = gen_reg_rtx (tmode);
34921 ix86_expand_vector_extract (true, target, op0, elt);
34923 return target;
34926 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34927 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34928 a language-level syntax for referencing vector elements. */
34930 static rtx
34931 ix86_expand_vec_set_builtin (tree exp)
34933 enum machine_mode tmode, mode1;
34934 tree arg0, arg1, arg2;
34935 int elt;
34936 rtx op0, op1, target;
34938 arg0 = CALL_EXPR_ARG (exp, 0);
34939 arg1 = CALL_EXPR_ARG (exp, 1);
34940 arg2 = CALL_EXPR_ARG (exp, 2);
34942 tmode = TYPE_MODE (TREE_TYPE (arg0));
34943 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34944 gcc_assert (VECTOR_MODE_P (tmode));
34946 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34947 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34948 elt = get_element_number (TREE_TYPE (arg0), arg2);
34950 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34951 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34953 op0 = force_reg (tmode, op0);
34954 op1 = force_reg (mode1, op1);
34956 /* OP0 is the source of these builtin functions and shouldn't be
34957 modified. Create a copy, use it and return it as target. */
34958 target = gen_reg_rtx (tmode);
34959 emit_move_insn (target, op0);
34960 ix86_expand_vector_set (true, target, op1, elt);
34962 return target;
34965 /* Expand an expression EXP that calls a built-in function,
34966 with result going to TARGET if that's convenient
34967 (and in mode MODE if that's convenient).
34968 SUBTARGET may be used as the target for computing one of EXP's operands.
34969 IGNORE is nonzero if the value is to be ignored. */
34971 static rtx
34972 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
34973 enum machine_mode mode, int ignore)
34975 const struct builtin_description *d;
34976 size_t i;
34977 enum insn_code icode;
34978 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
34979 tree arg0, arg1, arg2, arg3, arg4;
34980 rtx op0, op1, op2, op3, op4, pat, insn;
34981 enum machine_mode mode0, mode1, mode2, mode3, mode4;
34982 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
34984 /* For CPU builtins that can be folded, fold first and expand the fold. */
34985 switch (fcode)
34987 case IX86_BUILTIN_CPU_INIT:
34989 /* Make it call __cpu_indicator_init in libgcc. */
34990 tree call_expr, fndecl, type;
34991 type = build_function_type_list (integer_type_node, NULL_TREE);
34992 fndecl = build_fn_decl ("__cpu_indicator_init", type);
34993 call_expr = build_call_expr (fndecl, 0);
34994 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
34996 case IX86_BUILTIN_CPU_IS:
34997 case IX86_BUILTIN_CPU_SUPPORTS:
34999 tree arg0 = CALL_EXPR_ARG (exp, 0);
35000 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35001 gcc_assert (fold_expr != NULL_TREE);
35002 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35006 /* Determine whether the builtin function is available under the current ISA.
35007 Originally the builtin was not created if it wasn't applicable to the
35008 current ISA based on the command line switches. With function specific
35009 options, we need to check in the context of the function making the call
35010 whether it is supported. */
35011 if (ix86_builtins_isa[fcode].isa
35012 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35014 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35015 NULL, (enum fpmath_unit) 0, false);
35017 if (!opts)
35018 error ("%qE needs unknown isa option", fndecl);
35019 else
35021 gcc_assert (opts != NULL);
35022 error ("%qE needs isa option %s", fndecl, opts);
35023 free (opts);
35025 return const0_rtx;
35028 switch (fcode)
35030 case IX86_BUILTIN_MASKMOVQ:
35031 case IX86_BUILTIN_MASKMOVDQU:
35032 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35033 ? CODE_FOR_mmx_maskmovq
35034 : CODE_FOR_sse2_maskmovdqu);
35035 /* Note the arg order is different from the operand order. */
35036 arg1 = CALL_EXPR_ARG (exp, 0);
35037 arg2 = CALL_EXPR_ARG (exp, 1);
35038 arg0 = CALL_EXPR_ARG (exp, 2);
35039 op0 = expand_normal (arg0);
35040 op1 = expand_normal (arg1);
35041 op2 = expand_normal (arg2);
35042 mode0 = insn_data[icode].operand[0].mode;
35043 mode1 = insn_data[icode].operand[1].mode;
35044 mode2 = insn_data[icode].operand[2].mode;
35046 op0 = ix86_zero_extend_to_Pmode (op0);
35047 op0 = gen_rtx_MEM (mode1, op0);
35049 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35050 op0 = copy_to_mode_reg (mode0, op0);
35051 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35052 op1 = copy_to_mode_reg (mode1, op1);
35053 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35054 op2 = copy_to_mode_reg (mode2, op2);
35055 pat = GEN_FCN (icode) (op0, op1, op2);
35056 if (! pat)
35057 return 0;
35058 emit_insn (pat);
35059 return 0;
35061 case IX86_BUILTIN_LDMXCSR:
35062 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35063 target = assign_386_stack_local (SImode, SLOT_TEMP);
35064 emit_move_insn (target, op0);
35065 emit_insn (gen_sse_ldmxcsr (target));
35066 return 0;
35068 case IX86_BUILTIN_STMXCSR:
35069 target = assign_386_stack_local (SImode, SLOT_TEMP);
35070 emit_insn (gen_sse_stmxcsr (target));
35071 return copy_to_mode_reg (SImode, target);
35073 case IX86_BUILTIN_CLFLUSH:
35074 arg0 = CALL_EXPR_ARG (exp, 0);
35075 op0 = expand_normal (arg0);
35076 icode = CODE_FOR_sse2_clflush;
35077 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35078 op0 = ix86_zero_extend_to_Pmode (op0);
35080 emit_insn (gen_sse2_clflush (op0));
35081 return 0;
35083 case IX86_BUILTIN_MONITOR:
35084 arg0 = CALL_EXPR_ARG (exp, 0);
35085 arg1 = CALL_EXPR_ARG (exp, 1);
35086 arg2 = CALL_EXPR_ARG (exp, 2);
35087 op0 = expand_normal (arg0);
35088 op1 = expand_normal (arg1);
35089 op2 = expand_normal (arg2);
35090 if (!REG_P (op0))
35091 op0 = ix86_zero_extend_to_Pmode (op0);
35092 if (!REG_P (op1))
35093 op1 = copy_to_mode_reg (SImode, op1);
35094 if (!REG_P (op2))
35095 op2 = copy_to_mode_reg (SImode, op2);
35096 emit_insn (ix86_gen_monitor (op0, op1, op2));
35097 return 0;
35099 case IX86_BUILTIN_MWAIT:
35100 arg0 = CALL_EXPR_ARG (exp, 0);
35101 arg1 = CALL_EXPR_ARG (exp, 1);
35102 op0 = expand_normal (arg0);
35103 op1 = expand_normal (arg1);
35104 if (!REG_P (op0))
35105 op0 = copy_to_mode_reg (SImode, op0);
35106 if (!REG_P (op1))
35107 op1 = copy_to_mode_reg (SImode, op1);
35108 emit_insn (gen_sse3_mwait (op0, op1));
35109 return 0;
35111 case IX86_BUILTIN_VEC_INIT_V2SI:
35112 case IX86_BUILTIN_VEC_INIT_V4HI:
35113 case IX86_BUILTIN_VEC_INIT_V8QI:
35114 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35116 case IX86_BUILTIN_VEC_EXT_V2DF:
35117 case IX86_BUILTIN_VEC_EXT_V2DI:
35118 case IX86_BUILTIN_VEC_EXT_V4SF:
35119 case IX86_BUILTIN_VEC_EXT_V4SI:
35120 case IX86_BUILTIN_VEC_EXT_V8HI:
35121 case IX86_BUILTIN_VEC_EXT_V2SI:
35122 case IX86_BUILTIN_VEC_EXT_V4HI:
35123 case IX86_BUILTIN_VEC_EXT_V16QI:
35124 return ix86_expand_vec_ext_builtin (exp, target);
35126 case IX86_BUILTIN_VEC_SET_V2DI:
35127 case IX86_BUILTIN_VEC_SET_V4SF:
35128 case IX86_BUILTIN_VEC_SET_V4SI:
35129 case IX86_BUILTIN_VEC_SET_V8HI:
35130 case IX86_BUILTIN_VEC_SET_V4HI:
35131 case IX86_BUILTIN_VEC_SET_V16QI:
35132 return ix86_expand_vec_set_builtin (exp);
35134 case IX86_BUILTIN_INFQ:
35135 case IX86_BUILTIN_HUGE_VALQ:
35137 REAL_VALUE_TYPE inf;
35138 rtx tmp;
35140 real_inf (&inf);
35141 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35143 tmp = validize_mem (force_const_mem (mode, tmp));
35145 if (target == 0)
35146 target = gen_reg_rtx (mode);
35148 emit_move_insn (target, tmp);
35149 return target;
35152 case IX86_BUILTIN_RDPMC:
35153 case IX86_BUILTIN_RDTSC:
35154 case IX86_BUILTIN_RDTSCP:
35156 op0 = gen_reg_rtx (DImode);
35157 op1 = gen_reg_rtx (DImode);
35159 if (fcode == IX86_BUILTIN_RDPMC)
35161 arg0 = CALL_EXPR_ARG (exp, 0);
35162 op2 = expand_normal (arg0);
35163 if (!register_operand (op2, SImode))
35164 op2 = copy_to_mode_reg (SImode, op2);
35166 insn = (TARGET_64BIT
35167 ? gen_rdpmc_rex64 (op0, op1, op2)
35168 : gen_rdpmc (op0, op2));
35169 emit_insn (insn);
35171 else if (fcode == IX86_BUILTIN_RDTSC)
35173 insn = (TARGET_64BIT
35174 ? gen_rdtsc_rex64 (op0, op1)
35175 : gen_rdtsc (op0));
35176 emit_insn (insn);
35178 else
35180 op2 = gen_reg_rtx (SImode);
35182 insn = (TARGET_64BIT
35183 ? gen_rdtscp_rex64 (op0, op1, op2)
35184 : gen_rdtscp (op0, op2));
35185 emit_insn (insn);
35187 arg0 = CALL_EXPR_ARG (exp, 0);
35188 op4 = expand_normal (arg0);
35189 if (!address_operand (op4, VOIDmode))
35191 op4 = convert_memory_address (Pmode, op4);
35192 op4 = copy_addr_to_reg (op4);
35194 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35197 if (target == 0)
35199 /* mode is VOIDmode if __builtin_rd* has been called
35200 without lhs. */
35201 if (mode == VOIDmode)
35202 return target;
35203 target = gen_reg_rtx (mode);
35206 if (TARGET_64BIT)
35208 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35209 op1, 1, OPTAB_DIRECT);
35210 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35211 op0, 1, OPTAB_DIRECT);
35214 emit_move_insn (target, op0);
35215 return target;
35217 case IX86_BUILTIN_FXSAVE:
35218 case IX86_BUILTIN_FXRSTOR:
35219 case IX86_BUILTIN_FXSAVE64:
35220 case IX86_BUILTIN_FXRSTOR64:
35221 case IX86_BUILTIN_FNSTENV:
35222 case IX86_BUILTIN_FLDENV:
35223 case IX86_BUILTIN_FNSTSW:
35224 mode0 = BLKmode;
35225 switch (fcode)
35227 case IX86_BUILTIN_FXSAVE:
35228 icode = CODE_FOR_fxsave;
35229 break;
35230 case IX86_BUILTIN_FXRSTOR:
35231 icode = CODE_FOR_fxrstor;
35232 break;
35233 case IX86_BUILTIN_FXSAVE64:
35234 icode = CODE_FOR_fxsave64;
35235 break;
35236 case IX86_BUILTIN_FXRSTOR64:
35237 icode = CODE_FOR_fxrstor64;
35238 break;
35239 case IX86_BUILTIN_FNSTENV:
35240 icode = CODE_FOR_fnstenv;
35241 break;
35242 case IX86_BUILTIN_FLDENV:
35243 icode = CODE_FOR_fldenv;
35244 break;
35245 case IX86_BUILTIN_FNSTSW:
35246 icode = CODE_FOR_fnstsw;
35247 mode0 = HImode;
35248 break;
35249 default:
35250 gcc_unreachable ();
35253 arg0 = CALL_EXPR_ARG (exp, 0);
35254 op0 = expand_normal (arg0);
35256 if (!address_operand (op0, VOIDmode))
35258 op0 = convert_memory_address (Pmode, op0);
35259 op0 = copy_addr_to_reg (op0);
35261 op0 = gen_rtx_MEM (mode0, op0);
35263 pat = GEN_FCN (icode) (op0);
35264 if (pat)
35265 emit_insn (pat);
35266 return 0;
35268 case IX86_BUILTIN_XSAVE:
35269 case IX86_BUILTIN_XRSTOR:
35270 case IX86_BUILTIN_XSAVE64:
35271 case IX86_BUILTIN_XRSTOR64:
35272 case IX86_BUILTIN_XSAVEOPT:
35273 case IX86_BUILTIN_XSAVEOPT64:
35274 arg0 = CALL_EXPR_ARG (exp, 0);
35275 arg1 = CALL_EXPR_ARG (exp, 1);
35276 op0 = expand_normal (arg0);
35277 op1 = expand_normal (arg1);
35279 if (!address_operand (op0, VOIDmode))
35281 op0 = convert_memory_address (Pmode, op0);
35282 op0 = copy_addr_to_reg (op0);
35284 op0 = gen_rtx_MEM (BLKmode, op0);
35286 op1 = force_reg (DImode, op1);
35288 if (TARGET_64BIT)
35290 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35291 NULL, 1, OPTAB_DIRECT);
35292 switch (fcode)
35294 case IX86_BUILTIN_XSAVE:
35295 icode = CODE_FOR_xsave_rex64;
35296 break;
35297 case IX86_BUILTIN_XRSTOR:
35298 icode = CODE_FOR_xrstor_rex64;
35299 break;
35300 case IX86_BUILTIN_XSAVE64:
35301 icode = CODE_FOR_xsave64;
35302 break;
35303 case IX86_BUILTIN_XRSTOR64:
35304 icode = CODE_FOR_xrstor64;
35305 break;
35306 case IX86_BUILTIN_XSAVEOPT:
35307 icode = CODE_FOR_xsaveopt_rex64;
35308 break;
35309 case IX86_BUILTIN_XSAVEOPT64:
35310 icode = CODE_FOR_xsaveopt64;
35311 break;
35312 default:
35313 gcc_unreachable ();
35316 op2 = gen_lowpart (SImode, op2);
35317 op1 = gen_lowpart (SImode, op1);
35318 pat = GEN_FCN (icode) (op0, op1, op2);
35320 else
35322 switch (fcode)
35324 case IX86_BUILTIN_XSAVE:
35325 icode = CODE_FOR_xsave;
35326 break;
35327 case IX86_BUILTIN_XRSTOR:
35328 icode = CODE_FOR_xrstor;
35329 break;
35330 case IX86_BUILTIN_XSAVEOPT:
35331 icode = CODE_FOR_xsaveopt;
35332 break;
35333 default:
35334 gcc_unreachable ();
35336 pat = GEN_FCN (icode) (op0, op1);
35339 if (pat)
35340 emit_insn (pat);
35341 return 0;
35343 case IX86_BUILTIN_LLWPCB:
35344 arg0 = CALL_EXPR_ARG (exp, 0);
35345 op0 = expand_normal (arg0);
35346 icode = CODE_FOR_lwp_llwpcb;
35347 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35348 op0 = ix86_zero_extend_to_Pmode (op0);
35349 emit_insn (gen_lwp_llwpcb (op0));
35350 return 0;
35352 case IX86_BUILTIN_SLWPCB:
35353 icode = CODE_FOR_lwp_slwpcb;
35354 if (!target
35355 || !insn_data[icode].operand[0].predicate (target, Pmode))
35356 target = gen_reg_rtx (Pmode);
35357 emit_insn (gen_lwp_slwpcb (target));
35358 return target;
35360 case IX86_BUILTIN_BEXTRI32:
35361 case IX86_BUILTIN_BEXTRI64:
35362 arg0 = CALL_EXPR_ARG (exp, 0);
35363 arg1 = CALL_EXPR_ARG (exp, 1);
35364 op0 = expand_normal (arg0);
35365 op1 = expand_normal (arg1);
35366 icode = (fcode == IX86_BUILTIN_BEXTRI32
35367 ? CODE_FOR_tbm_bextri_si
35368 : CODE_FOR_tbm_bextri_di);
35369 if (!CONST_INT_P (op1))
35371 error ("last argument must be an immediate");
35372 return const0_rtx;
35374 else
35376 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35377 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35378 op1 = GEN_INT (length);
35379 op2 = GEN_INT (lsb_index);
35380 pat = GEN_FCN (icode) (target, op0, op1, op2);
35381 if (pat)
35382 emit_insn (pat);
35383 return target;
35386 case IX86_BUILTIN_RDRAND16_STEP:
35387 icode = CODE_FOR_rdrandhi_1;
35388 mode0 = HImode;
35389 goto rdrand_step;
35391 case IX86_BUILTIN_RDRAND32_STEP:
35392 icode = CODE_FOR_rdrandsi_1;
35393 mode0 = SImode;
35394 goto rdrand_step;
35396 case IX86_BUILTIN_RDRAND64_STEP:
35397 icode = CODE_FOR_rdranddi_1;
35398 mode0 = DImode;
35400 rdrand_step:
35401 op0 = gen_reg_rtx (mode0);
35402 emit_insn (GEN_FCN (icode) (op0));
35404 arg0 = CALL_EXPR_ARG (exp, 0);
35405 op1 = expand_normal (arg0);
35406 if (!address_operand (op1, VOIDmode))
35408 op1 = convert_memory_address (Pmode, op1);
35409 op1 = copy_addr_to_reg (op1);
35411 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35413 op1 = gen_reg_rtx (SImode);
35414 emit_move_insn (op1, CONST1_RTX (SImode));
35416 /* Emit SImode conditional move. */
35417 if (mode0 == HImode)
35419 op2 = gen_reg_rtx (SImode);
35420 emit_insn (gen_zero_extendhisi2 (op2, op0));
35422 else if (mode0 == SImode)
35423 op2 = op0;
35424 else
35425 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35427 if (target == 0)
35428 target = gen_reg_rtx (SImode);
35430 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35431 const0_rtx);
35432 emit_insn (gen_rtx_SET (VOIDmode, target,
35433 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35434 return target;
35436 case IX86_BUILTIN_RDSEED16_STEP:
35437 icode = CODE_FOR_rdseedhi_1;
35438 mode0 = HImode;
35439 goto rdseed_step;
35441 case IX86_BUILTIN_RDSEED32_STEP:
35442 icode = CODE_FOR_rdseedsi_1;
35443 mode0 = SImode;
35444 goto rdseed_step;
35446 case IX86_BUILTIN_RDSEED64_STEP:
35447 icode = CODE_FOR_rdseeddi_1;
35448 mode0 = DImode;
35450 rdseed_step:
35451 op0 = gen_reg_rtx (mode0);
35452 emit_insn (GEN_FCN (icode) (op0));
35454 arg0 = CALL_EXPR_ARG (exp, 0);
35455 op1 = expand_normal (arg0);
35456 if (!address_operand (op1, VOIDmode))
35458 op1 = convert_memory_address (Pmode, op1);
35459 op1 = copy_addr_to_reg (op1);
35461 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35463 op2 = gen_reg_rtx (QImode);
35465 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35466 const0_rtx);
35467 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35469 if (target == 0)
35470 target = gen_reg_rtx (SImode);
35472 emit_insn (gen_zero_extendqisi2 (target, op2));
35473 return target;
35475 case IX86_BUILTIN_ADDCARRYX32:
35476 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35477 mode0 = SImode;
35478 goto addcarryx;
35480 case IX86_BUILTIN_ADDCARRYX64:
35481 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35482 mode0 = DImode;
35484 addcarryx:
35485 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35486 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35487 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35488 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35490 op0 = gen_reg_rtx (QImode);
35492 /* Generate CF from input operand. */
35493 op1 = expand_normal (arg0);
35494 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35495 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35497 /* Gen ADCX instruction to compute X+Y+CF. */
35498 op2 = expand_normal (arg1);
35499 op3 = expand_normal (arg2);
35501 if (!REG_P (op2))
35502 op2 = copy_to_mode_reg (mode0, op2);
35503 if (!REG_P (op3))
35504 op3 = copy_to_mode_reg (mode0, op3);
35506 op0 = gen_reg_rtx (mode0);
35508 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35509 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35510 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35512 /* Store the result. */
35513 op4 = expand_normal (arg3);
35514 if (!address_operand (op4, VOIDmode))
35516 op4 = convert_memory_address (Pmode, op4);
35517 op4 = copy_addr_to_reg (op4);
35519 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35521 /* Return current CF value. */
35522 if (target == 0)
35523 target = gen_reg_rtx (QImode);
35525 PUT_MODE (pat, QImode);
35526 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35527 return target;
35529 case IX86_BUILTIN_READ_FLAGS:
35530 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35532 if (optimize
35533 || target == NULL_RTX
35534 || !nonimmediate_operand (target, word_mode)
35535 || GET_MODE (target) != word_mode)
35536 target = gen_reg_rtx (word_mode);
35538 emit_insn (gen_pop (target));
35539 return target;
35541 case IX86_BUILTIN_WRITE_FLAGS:
35543 arg0 = CALL_EXPR_ARG (exp, 0);
35544 op0 = expand_normal (arg0);
35545 if (!general_no_elim_operand (op0, word_mode))
35546 op0 = copy_to_mode_reg (word_mode, op0);
35548 emit_insn (gen_push (op0));
35549 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35550 return 0;
35552 case IX86_BUILTIN_KORTESTC16:
35553 icode = CODE_FOR_kortestchi;
35554 mode0 = HImode;
35555 mode1 = CCCmode;
35556 goto kortest;
35558 case IX86_BUILTIN_KORTESTZ16:
35559 icode = CODE_FOR_kortestzhi;
35560 mode0 = HImode;
35561 mode1 = CCZmode;
35563 kortest:
35564 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35565 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35566 op0 = expand_normal (arg0);
35567 op1 = expand_normal (arg1);
35569 op0 = copy_to_reg (op0);
35570 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35571 op1 = copy_to_reg (op1);
35572 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35574 target = gen_reg_rtx (QImode);
35575 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35577 /* Emit kortest. */
35578 emit_insn (GEN_FCN (icode) (op0, op1));
35579 /* And use setcc to return result from flags. */
35580 ix86_expand_setcc (target, EQ,
35581 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35582 return target;
35584 case IX86_BUILTIN_GATHERSIV2DF:
35585 icode = CODE_FOR_avx2_gathersiv2df;
35586 goto gather_gen;
35587 case IX86_BUILTIN_GATHERSIV4DF:
35588 icode = CODE_FOR_avx2_gathersiv4df;
35589 goto gather_gen;
35590 case IX86_BUILTIN_GATHERDIV2DF:
35591 icode = CODE_FOR_avx2_gatherdiv2df;
35592 goto gather_gen;
35593 case IX86_BUILTIN_GATHERDIV4DF:
35594 icode = CODE_FOR_avx2_gatherdiv4df;
35595 goto gather_gen;
35596 case IX86_BUILTIN_GATHERSIV4SF:
35597 icode = CODE_FOR_avx2_gathersiv4sf;
35598 goto gather_gen;
35599 case IX86_BUILTIN_GATHERSIV8SF:
35600 icode = CODE_FOR_avx2_gathersiv8sf;
35601 goto gather_gen;
35602 case IX86_BUILTIN_GATHERDIV4SF:
35603 icode = CODE_FOR_avx2_gatherdiv4sf;
35604 goto gather_gen;
35605 case IX86_BUILTIN_GATHERDIV8SF:
35606 icode = CODE_FOR_avx2_gatherdiv8sf;
35607 goto gather_gen;
35608 case IX86_BUILTIN_GATHERSIV2DI:
35609 icode = CODE_FOR_avx2_gathersiv2di;
35610 goto gather_gen;
35611 case IX86_BUILTIN_GATHERSIV4DI:
35612 icode = CODE_FOR_avx2_gathersiv4di;
35613 goto gather_gen;
35614 case IX86_BUILTIN_GATHERDIV2DI:
35615 icode = CODE_FOR_avx2_gatherdiv2di;
35616 goto gather_gen;
35617 case IX86_BUILTIN_GATHERDIV4DI:
35618 icode = CODE_FOR_avx2_gatherdiv4di;
35619 goto gather_gen;
35620 case IX86_BUILTIN_GATHERSIV4SI:
35621 icode = CODE_FOR_avx2_gathersiv4si;
35622 goto gather_gen;
35623 case IX86_BUILTIN_GATHERSIV8SI:
35624 icode = CODE_FOR_avx2_gathersiv8si;
35625 goto gather_gen;
35626 case IX86_BUILTIN_GATHERDIV4SI:
35627 icode = CODE_FOR_avx2_gatherdiv4si;
35628 goto gather_gen;
35629 case IX86_BUILTIN_GATHERDIV8SI:
35630 icode = CODE_FOR_avx2_gatherdiv8si;
35631 goto gather_gen;
35632 case IX86_BUILTIN_GATHERALTSIV4DF:
35633 icode = CODE_FOR_avx2_gathersiv4df;
35634 goto gather_gen;
35635 case IX86_BUILTIN_GATHERALTDIV8SF:
35636 icode = CODE_FOR_avx2_gatherdiv8sf;
35637 goto gather_gen;
35638 case IX86_BUILTIN_GATHERALTSIV4DI:
35639 icode = CODE_FOR_avx2_gathersiv4di;
35640 goto gather_gen;
35641 case IX86_BUILTIN_GATHERALTDIV8SI:
35642 icode = CODE_FOR_avx2_gatherdiv8si;
35643 goto gather_gen;
35644 case IX86_BUILTIN_GATHER3SIV16SF:
35645 icode = CODE_FOR_avx512f_gathersiv16sf;
35646 goto gather_gen;
35647 case IX86_BUILTIN_GATHER3SIV8DF:
35648 icode = CODE_FOR_avx512f_gathersiv8df;
35649 goto gather_gen;
35650 case IX86_BUILTIN_GATHER3DIV16SF:
35651 icode = CODE_FOR_avx512f_gatherdiv16sf;
35652 goto gather_gen;
35653 case IX86_BUILTIN_GATHER3DIV8DF:
35654 icode = CODE_FOR_avx512f_gatherdiv8df;
35655 goto gather_gen;
35656 case IX86_BUILTIN_GATHER3SIV16SI:
35657 icode = CODE_FOR_avx512f_gathersiv16si;
35658 goto gather_gen;
35659 case IX86_BUILTIN_GATHER3SIV8DI:
35660 icode = CODE_FOR_avx512f_gathersiv8di;
35661 goto gather_gen;
35662 case IX86_BUILTIN_GATHER3DIV16SI:
35663 icode = CODE_FOR_avx512f_gatherdiv16si;
35664 goto gather_gen;
35665 case IX86_BUILTIN_GATHER3DIV8DI:
35666 icode = CODE_FOR_avx512f_gatherdiv8di;
35667 goto gather_gen;
35668 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35669 icode = CODE_FOR_avx512f_gathersiv8df;
35670 goto gather_gen;
35671 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35672 icode = CODE_FOR_avx512f_gatherdiv16sf;
35673 goto gather_gen;
35674 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35675 icode = CODE_FOR_avx512f_gathersiv8di;
35676 goto gather_gen;
35677 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35678 icode = CODE_FOR_avx512f_gatherdiv16si;
35679 goto gather_gen;
35680 case IX86_BUILTIN_SCATTERSIV16SF:
35681 icode = CODE_FOR_avx512f_scattersiv16sf;
35682 goto scatter_gen;
35683 case IX86_BUILTIN_SCATTERSIV8DF:
35684 icode = CODE_FOR_avx512f_scattersiv8df;
35685 goto scatter_gen;
35686 case IX86_BUILTIN_SCATTERDIV16SF:
35687 icode = CODE_FOR_avx512f_scatterdiv16sf;
35688 goto scatter_gen;
35689 case IX86_BUILTIN_SCATTERDIV8DF:
35690 icode = CODE_FOR_avx512f_scatterdiv8df;
35691 goto scatter_gen;
35692 case IX86_BUILTIN_SCATTERSIV16SI:
35693 icode = CODE_FOR_avx512f_scattersiv16si;
35694 goto scatter_gen;
35695 case IX86_BUILTIN_SCATTERSIV8DI:
35696 icode = CODE_FOR_avx512f_scattersiv8di;
35697 goto scatter_gen;
35698 case IX86_BUILTIN_SCATTERDIV16SI:
35699 icode = CODE_FOR_avx512f_scatterdiv16si;
35700 goto scatter_gen;
35701 case IX86_BUILTIN_SCATTERDIV8DI:
35702 icode = CODE_FOR_avx512f_scatterdiv8di;
35703 goto scatter_gen;
35705 case IX86_BUILTIN_GATHERPFDPD:
35706 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35707 goto vec_prefetch_gen;
35708 case IX86_BUILTIN_GATHERPFDPS:
35709 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35710 goto vec_prefetch_gen;
35711 case IX86_BUILTIN_GATHERPFQPD:
35712 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35713 goto vec_prefetch_gen;
35714 case IX86_BUILTIN_GATHERPFQPS:
35715 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35716 goto vec_prefetch_gen;
35717 case IX86_BUILTIN_SCATTERPFDPD:
35718 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35719 goto vec_prefetch_gen;
35720 case IX86_BUILTIN_SCATTERPFDPS:
35721 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35722 goto vec_prefetch_gen;
35723 case IX86_BUILTIN_SCATTERPFQPD:
35724 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35725 goto vec_prefetch_gen;
35726 case IX86_BUILTIN_SCATTERPFQPS:
35727 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35728 goto vec_prefetch_gen;
35730 gather_gen:
35731 rtx half;
35732 rtx (*gen) (rtx, rtx);
35734 arg0 = CALL_EXPR_ARG (exp, 0);
35735 arg1 = CALL_EXPR_ARG (exp, 1);
35736 arg2 = CALL_EXPR_ARG (exp, 2);
35737 arg3 = CALL_EXPR_ARG (exp, 3);
35738 arg4 = CALL_EXPR_ARG (exp, 4);
35739 op0 = expand_normal (arg0);
35740 op1 = expand_normal (arg1);
35741 op2 = expand_normal (arg2);
35742 op3 = expand_normal (arg3);
35743 op4 = expand_normal (arg4);
35744 /* Note the arg order is different from the operand order. */
35745 mode0 = insn_data[icode].operand[1].mode;
35746 mode2 = insn_data[icode].operand[3].mode;
35747 mode3 = insn_data[icode].operand[4].mode;
35748 mode4 = insn_data[icode].operand[5].mode;
35750 if (target == NULL_RTX
35751 || GET_MODE (target) != insn_data[icode].operand[0].mode
35752 || !insn_data[icode].operand[0].predicate (target,
35753 GET_MODE (target)))
35754 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35755 else
35756 subtarget = target;
35758 switch (fcode)
35760 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35761 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35762 half = gen_reg_rtx (V8SImode);
35763 if (!nonimmediate_operand (op2, V16SImode))
35764 op2 = copy_to_mode_reg (V16SImode, op2);
35765 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35766 op2 = half;
35767 break;
35768 case IX86_BUILTIN_GATHERALTSIV4DF:
35769 case IX86_BUILTIN_GATHERALTSIV4DI:
35770 half = gen_reg_rtx (V4SImode);
35771 if (!nonimmediate_operand (op2, V8SImode))
35772 op2 = copy_to_mode_reg (V8SImode, op2);
35773 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35774 op2 = half;
35775 break;
35776 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35777 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35778 half = gen_reg_rtx (mode0);
35779 if (mode0 == V8SFmode)
35780 gen = gen_vec_extract_lo_v16sf;
35781 else
35782 gen = gen_vec_extract_lo_v16si;
35783 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35784 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35785 emit_insn (gen (half, op0));
35786 op0 = half;
35787 if (GET_MODE (op3) != VOIDmode)
35789 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35790 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35791 emit_insn (gen (half, op3));
35792 op3 = half;
35794 break;
35795 case IX86_BUILTIN_GATHERALTDIV8SF:
35796 case IX86_BUILTIN_GATHERALTDIV8SI:
35797 half = gen_reg_rtx (mode0);
35798 if (mode0 == V4SFmode)
35799 gen = gen_vec_extract_lo_v8sf;
35800 else
35801 gen = gen_vec_extract_lo_v8si;
35802 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35803 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35804 emit_insn (gen (half, op0));
35805 op0 = half;
35806 if (GET_MODE (op3) != VOIDmode)
35808 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35809 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35810 emit_insn (gen (half, op3));
35811 op3 = half;
35813 break;
35814 default:
35815 break;
35818 /* Force memory operand only with base register here. But we
35819 don't want to do it on memory operand for other builtin
35820 functions. */
35821 op1 = ix86_zero_extend_to_Pmode (op1);
35823 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35824 op0 = copy_to_mode_reg (mode0, op0);
35825 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35826 op1 = copy_to_mode_reg (Pmode, op1);
35827 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35828 op2 = copy_to_mode_reg (mode2, op2);
35829 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35831 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35832 op3 = copy_to_mode_reg (mode3, op3);
35834 else
35836 op3 = copy_to_reg (op3);
35837 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35839 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35841 error ("the last argument must be scale 1, 2, 4, 8");
35842 return const0_rtx;
35845 /* Optimize. If mask is known to have all high bits set,
35846 replace op0 with pc_rtx to signal that the instruction
35847 overwrites the whole destination and doesn't use its
35848 previous contents. */
35849 if (optimize)
35851 if (TREE_CODE (arg3) == INTEGER_CST)
35853 if (integer_all_onesp (arg3))
35854 op0 = pc_rtx;
35856 else if (TREE_CODE (arg3) == VECTOR_CST)
35858 unsigned int negative = 0;
35859 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35861 tree cst = VECTOR_CST_ELT (arg3, i);
35862 if (TREE_CODE (cst) == INTEGER_CST
35863 && tree_int_cst_sign_bit (cst))
35864 negative++;
35865 else if (TREE_CODE (cst) == REAL_CST
35866 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35867 negative++;
35869 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35870 op0 = pc_rtx;
35872 else if (TREE_CODE (arg3) == SSA_NAME
35873 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35875 /* Recognize also when mask is like:
35876 __v2df src = _mm_setzero_pd ();
35877 __v2df mask = _mm_cmpeq_pd (src, src);
35879 __v8sf src = _mm256_setzero_ps ();
35880 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35881 as that is a cheaper way to load all ones into
35882 a register than having to load a constant from
35883 memory. */
35884 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35885 if (is_gimple_call (def_stmt))
35887 tree fndecl = gimple_call_fndecl (def_stmt);
35888 if (fndecl
35889 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35890 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35892 case IX86_BUILTIN_CMPPD:
35893 case IX86_BUILTIN_CMPPS:
35894 case IX86_BUILTIN_CMPPD256:
35895 case IX86_BUILTIN_CMPPS256:
35896 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35897 break;
35898 /* FALLTHRU */
35899 case IX86_BUILTIN_CMPEQPD:
35900 case IX86_BUILTIN_CMPEQPS:
35901 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35902 && initializer_zerop (gimple_call_arg (def_stmt,
35903 1)))
35904 op0 = pc_rtx;
35905 break;
35906 default:
35907 break;
35913 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
35914 if (! pat)
35915 return const0_rtx;
35916 emit_insn (pat);
35918 switch (fcode)
35920 case IX86_BUILTIN_GATHER3DIV16SF:
35921 if (target == NULL_RTX)
35922 target = gen_reg_rtx (V8SFmode);
35923 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
35924 break;
35925 case IX86_BUILTIN_GATHER3DIV16SI:
35926 if (target == NULL_RTX)
35927 target = gen_reg_rtx (V8SImode);
35928 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
35929 break;
35930 case IX86_BUILTIN_GATHERDIV8SF:
35931 if (target == NULL_RTX)
35932 target = gen_reg_rtx (V4SFmode);
35933 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
35934 break;
35935 case IX86_BUILTIN_GATHERDIV8SI:
35936 if (target == NULL_RTX)
35937 target = gen_reg_rtx (V4SImode);
35938 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
35939 break;
35940 default:
35941 target = subtarget;
35942 break;
35944 return target;
35946 scatter_gen:
35947 arg0 = CALL_EXPR_ARG (exp, 0);
35948 arg1 = CALL_EXPR_ARG (exp, 1);
35949 arg2 = CALL_EXPR_ARG (exp, 2);
35950 arg3 = CALL_EXPR_ARG (exp, 3);
35951 arg4 = CALL_EXPR_ARG (exp, 4);
35952 op0 = expand_normal (arg0);
35953 op1 = expand_normal (arg1);
35954 op2 = expand_normal (arg2);
35955 op3 = expand_normal (arg3);
35956 op4 = expand_normal (arg4);
35957 mode1 = insn_data[icode].operand[1].mode;
35958 mode2 = insn_data[icode].operand[2].mode;
35959 mode3 = insn_data[icode].operand[3].mode;
35960 mode4 = insn_data[icode].operand[4].mode;
35962 /* Force memory operand only with base register here. But we
35963 don't want to do it on memory operand for other builtin
35964 functions. */
35965 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
35967 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35968 op0 = copy_to_mode_reg (Pmode, op0);
35970 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
35972 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35973 op1 = copy_to_mode_reg (mode1, op1);
35975 else
35977 op1 = copy_to_reg (op1);
35978 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
35981 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35982 op2 = copy_to_mode_reg (mode2, op2);
35984 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35985 op3 = copy_to_mode_reg (mode3, op3);
35987 if (!insn_data[icode].operand[4].predicate (op4, mode4))
35989 error ("the last argument must be scale 1, 2, 4, 8");
35990 return const0_rtx;
35993 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
35994 if (! pat)
35995 return const0_rtx;
35997 emit_insn (pat);
35998 return 0;
36000 vec_prefetch_gen:
36001 arg0 = CALL_EXPR_ARG (exp, 0);
36002 arg1 = CALL_EXPR_ARG (exp, 1);
36003 arg2 = CALL_EXPR_ARG (exp, 2);
36004 arg3 = CALL_EXPR_ARG (exp, 3);
36005 arg4 = CALL_EXPR_ARG (exp, 4);
36006 op0 = expand_normal (arg0);
36007 op1 = expand_normal (arg1);
36008 op2 = expand_normal (arg2);
36009 op3 = expand_normal (arg3);
36010 op4 = expand_normal (arg4);
36011 mode0 = insn_data[icode].operand[0].mode;
36012 mode1 = insn_data[icode].operand[1].mode;
36013 mode3 = insn_data[icode].operand[3].mode;
36014 mode4 = insn_data[icode].operand[4].mode;
36016 if (GET_MODE (op0) == mode0
36017 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36019 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36020 op0 = copy_to_mode_reg (mode0, op0);
36022 else if (op0 != constm1_rtx)
36024 op0 = copy_to_reg (op0);
36025 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36028 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36029 op1 = copy_to_mode_reg (mode1, op1);
36031 /* Force memory operand only with base register here. But we
36032 don't want to do it on memory operand for other builtin
36033 functions. */
36034 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36036 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36037 op2 = copy_to_mode_reg (Pmode, op2);
36039 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36041 error ("the forth argument must be scale 1, 2, 4, 8");
36042 return const0_rtx;
36045 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36047 error ("incorrect hint operand");
36048 return const0_rtx;
36051 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36052 if (! pat)
36053 return const0_rtx;
36055 emit_insn (pat);
36057 return 0;
36059 case IX86_BUILTIN_XABORT:
36060 icode = CODE_FOR_xabort;
36061 arg0 = CALL_EXPR_ARG (exp, 0);
36062 op0 = expand_normal (arg0);
36063 mode0 = insn_data[icode].operand[0].mode;
36064 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36066 error ("the xabort's argument must be an 8-bit immediate");
36067 return const0_rtx;
36069 emit_insn (gen_xabort (op0));
36070 return 0;
36072 default:
36073 break;
36076 for (i = 0, d = bdesc_special_args;
36077 i < ARRAY_SIZE (bdesc_special_args);
36078 i++, d++)
36079 if (d->code == fcode)
36080 return ix86_expand_special_args_builtin (d, exp, target);
36082 for (i = 0, d = bdesc_args;
36083 i < ARRAY_SIZE (bdesc_args);
36084 i++, d++)
36085 if (d->code == fcode)
36086 switch (fcode)
36088 case IX86_BUILTIN_FABSQ:
36089 case IX86_BUILTIN_COPYSIGNQ:
36090 if (!TARGET_SSE)
36091 /* Emit a normal call if SSE isn't available. */
36092 return expand_call (exp, target, ignore);
36093 default:
36094 return ix86_expand_args_builtin (d, exp, target);
36097 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36098 if (d->code == fcode)
36099 return ix86_expand_sse_comi (d, exp, target);
36101 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36102 if (d->code == fcode)
36103 return ix86_expand_round_builtin (d, exp, target);
36105 for (i = 0, d = bdesc_pcmpestr;
36106 i < ARRAY_SIZE (bdesc_pcmpestr);
36107 i++, d++)
36108 if (d->code == fcode)
36109 return ix86_expand_sse_pcmpestr (d, exp, target);
36111 for (i = 0, d = bdesc_pcmpistr;
36112 i < ARRAY_SIZE (bdesc_pcmpistr);
36113 i++, d++)
36114 if (d->code == fcode)
36115 return ix86_expand_sse_pcmpistr (d, exp, target);
36117 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36118 if (d->code == fcode)
36119 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36120 (enum ix86_builtin_func_type)
36121 d->flag, d->comparison);
36123 gcc_unreachable ();
36126 /* This returns the target-specific builtin with code CODE if
36127 current_function_decl has visibility on this builtin, which is checked
36128 using isa flags. Returns NULL_TREE otherwise. */
36130 static tree ix86_get_builtin (enum ix86_builtins code)
36132 struct cl_target_option *opts;
36133 tree target_tree = NULL_TREE;
36135 /* Determine the isa flags of current_function_decl. */
36137 if (current_function_decl)
36138 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36140 if (target_tree == NULL)
36141 target_tree = target_option_default_node;
36143 opts = TREE_TARGET_OPTION (target_tree);
36145 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36146 return ix86_builtin_decl (code, true);
36147 else
36148 return NULL_TREE;
36151 /* Returns a function decl for a vectorized version of the builtin function
36152 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36153 if it is not available. */
36155 static tree
36156 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36157 tree type_in)
36159 enum machine_mode in_mode, out_mode;
36160 int in_n, out_n;
36161 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36163 if (TREE_CODE (type_out) != VECTOR_TYPE
36164 || TREE_CODE (type_in) != VECTOR_TYPE
36165 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36166 return NULL_TREE;
36168 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36169 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36170 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36171 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36173 switch (fn)
36175 case BUILT_IN_SQRT:
36176 if (out_mode == DFmode && in_mode == DFmode)
36178 if (out_n == 2 && in_n == 2)
36179 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36180 else if (out_n == 4 && in_n == 4)
36181 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36182 else if (out_n == 8 && in_n == 8)
36183 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36185 break;
36187 case BUILT_IN_EXP2F:
36188 if (out_mode == SFmode && in_mode == SFmode)
36190 if (out_n == 16 && in_n == 16)
36191 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36193 break;
36195 case BUILT_IN_SQRTF:
36196 if (out_mode == SFmode && in_mode == SFmode)
36198 if (out_n == 4 && in_n == 4)
36199 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36200 else if (out_n == 8 && in_n == 8)
36201 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36202 else if (out_n == 16 && in_n == 16)
36203 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36205 break;
36207 case BUILT_IN_IFLOOR:
36208 case BUILT_IN_LFLOOR:
36209 case BUILT_IN_LLFLOOR:
36210 /* The round insn does not trap on denormals. */
36211 if (flag_trapping_math || !TARGET_ROUND)
36212 break;
36214 if (out_mode == SImode && in_mode == DFmode)
36216 if (out_n == 4 && in_n == 2)
36217 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36218 else if (out_n == 8 && in_n == 4)
36219 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36220 else if (out_n == 16 && in_n == 8)
36221 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36223 break;
36225 case BUILT_IN_IFLOORF:
36226 case BUILT_IN_LFLOORF:
36227 case BUILT_IN_LLFLOORF:
36228 /* The round insn does not trap on denormals. */
36229 if (flag_trapping_math || !TARGET_ROUND)
36230 break;
36232 if (out_mode == SImode && in_mode == SFmode)
36234 if (out_n == 4 && in_n == 4)
36235 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36236 else if (out_n == 8 && in_n == 8)
36237 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36239 break;
36241 case BUILT_IN_ICEIL:
36242 case BUILT_IN_LCEIL:
36243 case BUILT_IN_LLCEIL:
36244 /* The round insn does not trap on denormals. */
36245 if (flag_trapping_math || !TARGET_ROUND)
36246 break;
36248 if (out_mode == SImode && in_mode == DFmode)
36250 if (out_n == 4 && in_n == 2)
36251 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36252 else if (out_n == 8 && in_n == 4)
36253 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36254 else if (out_n == 16 && in_n == 8)
36255 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36257 break;
36259 case BUILT_IN_ICEILF:
36260 case BUILT_IN_LCEILF:
36261 case BUILT_IN_LLCEILF:
36262 /* The round insn does not trap on denormals. */
36263 if (flag_trapping_math || !TARGET_ROUND)
36264 break;
36266 if (out_mode == SImode && in_mode == SFmode)
36268 if (out_n == 4 && in_n == 4)
36269 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36270 else if (out_n == 8 && in_n == 8)
36271 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36273 break;
36275 case BUILT_IN_IRINT:
36276 case BUILT_IN_LRINT:
36277 case BUILT_IN_LLRINT:
36278 if (out_mode == SImode && in_mode == DFmode)
36280 if (out_n == 4 && in_n == 2)
36281 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36282 else if (out_n == 8 && in_n == 4)
36283 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36285 break;
36287 case BUILT_IN_IRINTF:
36288 case BUILT_IN_LRINTF:
36289 case BUILT_IN_LLRINTF:
36290 if (out_mode == SImode && in_mode == SFmode)
36292 if (out_n == 4 && in_n == 4)
36293 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36294 else if (out_n == 8 && in_n == 8)
36295 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36297 break;
36299 case BUILT_IN_IROUND:
36300 case BUILT_IN_LROUND:
36301 case BUILT_IN_LLROUND:
36302 /* The round insn does not trap on denormals. */
36303 if (flag_trapping_math || !TARGET_ROUND)
36304 break;
36306 if (out_mode == SImode && in_mode == DFmode)
36308 if (out_n == 4 && in_n == 2)
36309 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36310 else if (out_n == 8 && in_n == 4)
36311 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36312 else if (out_n == 16 && in_n == 8)
36313 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36315 break;
36317 case BUILT_IN_IROUNDF:
36318 case BUILT_IN_LROUNDF:
36319 case BUILT_IN_LLROUNDF:
36320 /* The round insn does not trap on denormals. */
36321 if (flag_trapping_math || !TARGET_ROUND)
36322 break;
36324 if (out_mode == SImode && in_mode == SFmode)
36326 if (out_n == 4 && in_n == 4)
36327 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36328 else if (out_n == 8 && in_n == 8)
36329 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36331 break;
36333 case BUILT_IN_COPYSIGN:
36334 if (out_mode == DFmode && in_mode == DFmode)
36336 if (out_n == 2 && in_n == 2)
36337 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36338 else if (out_n == 4 && in_n == 4)
36339 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36340 else if (out_n == 8 && in_n == 8)
36341 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36343 break;
36345 case BUILT_IN_COPYSIGNF:
36346 if (out_mode == SFmode && in_mode == SFmode)
36348 if (out_n == 4 && in_n == 4)
36349 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36350 else if (out_n == 8 && in_n == 8)
36351 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36352 else if (out_n == 16 && in_n == 16)
36353 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36355 break;
36357 case BUILT_IN_FLOOR:
36358 /* The round insn does not trap on denormals. */
36359 if (flag_trapping_math || !TARGET_ROUND)
36360 break;
36362 if (out_mode == DFmode && in_mode == DFmode)
36364 if (out_n == 2 && in_n == 2)
36365 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36366 else if (out_n == 4 && in_n == 4)
36367 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36369 break;
36371 case BUILT_IN_FLOORF:
36372 /* The round insn does not trap on denormals. */
36373 if (flag_trapping_math || !TARGET_ROUND)
36374 break;
36376 if (out_mode == SFmode && in_mode == SFmode)
36378 if (out_n == 4 && in_n == 4)
36379 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36380 else if (out_n == 8 && in_n == 8)
36381 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36383 break;
36385 case BUILT_IN_CEIL:
36386 /* The round insn does not trap on denormals. */
36387 if (flag_trapping_math || !TARGET_ROUND)
36388 break;
36390 if (out_mode == DFmode && in_mode == DFmode)
36392 if (out_n == 2 && in_n == 2)
36393 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36394 else if (out_n == 4 && in_n == 4)
36395 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36397 break;
36399 case BUILT_IN_CEILF:
36400 /* The round insn does not trap on denormals. */
36401 if (flag_trapping_math || !TARGET_ROUND)
36402 break;
36404 if (out_mode == SFmode && in_mode == SFmode)
36406 if (out_n == 4 && in_n == 4)
36407 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36408 else if (out_n == 8 && in_n == 8)
36409 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36411 break;
36413 case BUILT_IN_TRUNC:
36414 /* The round insn does not trap on denormals. */
36415 if (flag_trapping_math || !TARGET_ROUND)
36416 break;
36418 if (out_mode == DFmode && in_mode == DFmode)
36420 if (out_n == 2 && in_n == 2)
36421 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36422 else if (out_n == 4 && in_n == 4)
36423 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36425 break;
36427 case BUILT_IN_TRUNCF:
36428 /* The round insn does not trap on denormals. */
36429 if (flag_trapping_math || !TARGET_ROUND)
36430 break;
36432 if (out_mode == SFmode && in_mode == SFmode)
36434 if (out_n == 4 && in_n == 4)
36435 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36436 else if (out_n == 8 && in_n == 8)
36437 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36439 break;
36441 case BUILT_IN_RINT:
36442 /* The round insn does not trap on denormals. */
36443 if (flag_trapping_math || !TARGET_ROUND)
36444 break;
36446 if (out_mode == DFmode && in_mode == DFmode)
36448 if (out_n == 2 && in_n == 2)
36449 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36450 else if (out_n == 4 && in_n == 4)
36451 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36453 break;
36455 case BUILT_IN_RINTF:
36456 /* The round insn does not trap on denormals. */
36457 if (flag_trapping_math || !TARGET_ROUND)
36458 break;
36460 if (out_mode == SFmode && in_mode == SFmode)
36462 if (out_n == 4 && in_n == 4)
36463 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36464 else if (out_n == 8 && in_n == 8)
36465 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36467 break;
36469 case BUILT_IN_ROUND:
36470 /* The round insn does not trap on denormals. */
36471 if (flag_trapping_math || !TARGET_ROUND)
36472 break;
36474 if (out_mode == DFmode && in_mode == DFmode)
36476 if (out_n == 2 && in_n == 2)
36477 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36478 else if (out_n == 4 && in_n == 4)
36479 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36481 break;
36483 case BUILT_IN_ROUNDF:
36484 /* The round insn does not trap on denormals. */
36485 if (flag_trapping_math || !TARGET_ROUND)
36486 break;
36488 if (out_mode == SFmode && in_mode == SFmode)
36490 if (out_n == 4 && in_n == 4)
36491 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36492 else if (out_n == 8 && in_n == 8)
36493 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36495 break;
36497 case BUILT_IN_FMA:
36498 if (out_mode == DFmode && in_mode == DFmode)
36500 if (out_n == 2 && in_n == 2)
36501 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36502 if (out_n == 4 && in_n == 4)
36503 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36505 break;
36507 case BUILT_IN_FMAF:
36508 if (out_mode == SFmode && in_mode == SFmode)
36510 if (out_n == 4 && in_n == 4)
36511 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36512 if (out_n == 8 && in_n == 8)
36513 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36515 break;
36517 default:
36518 break;
36521 /* Dispatch to a handler for a vectorization library. */
36522 if (ix86_veclib_handler)
36523 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36524 type_in);
36526 return NULL_TREE;
36529 /* Handler for an SVML-style interface to
36530 a library with vectorized intrinsics. */
36532 static tree
36533 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36535 char name[20];
36536 tree fntype, new_fndecl, args;
36537 unsigned arity;
36538 const char *bname;
36539 enum machine_mode el_mode, in_mode;
36540 int n, in_n;
36542 /* The SVML is suitable for unsafe math only. */
36543 if (!flag_unsafe_math_optimizations)
36544 return NULL_TREE;
36546 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36547 n = TYPE_VECTOR_SUBPARTS (type_out);
36548 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36549 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36550 if (el_mode != in_mode
36551 || n != in_n)
36552 return NULL_TREE;
36554 switch (fn)
36556 case BUILT_IN_EXP:
36557 case BUILT_IN_LOG:
36558 case BUILT_IN_LOG10:
36559 case BUILT_IN_POW:
36560 case BUILT_IN_TANH:
36561 case BUILT_IN_TAN:
36562 case BUILT_IN_ATAN:
36563 case BUILT_IN_ATAN2:
36564 case BUILT_IN_ATANH:
36565 case BUILT_IN_CBRT:
36566 case BUILT_IN_SINH:
36567 case BUILT_IN_SIN:
36568 case BUILT_IN_ASINH:
36569 case BUILT_IN_ASIN:
36570 case BUILT_IN_COSH:
36571 case BUILT_IN_COS:
36572 case BUILT_IN_ACOSH:
36573 case BUILT_IN_ACOS:
36574 if (el_mode != DFmode || n != 2)
36575 return NULL_TREE;
36576 break;
36578 case BUILT_IN_EXPF:
36579 case BUILT_IN_LOGF:
36580 case BUILT_IN_LOG10F:
36581 case BUILT_IN_POWF:
36582 case BUILT_IN_TANHF:
36583 case BUILT_IN_TANF:
36584 case BUILT_IN_ATANF:
36585 case BUILT_IN_ATAN2F:
36586 case BUILT_IN_ATANHF:
36587 case BUILT_IN_CBRTF:
36588 case BUILT_IN_SINHF:
36589 case BUILT_IN_SINF:
36590 case BUILT_IN_ASINHF:
36591 case BUILT_IN_ASINF:
36592 case BUILT_IN_COSHF:
36593 case BUILT_IN_COSF:
36594 case BUILT_IN_ACOSHF:
36595 case BUILT_IN_ACOSF:
36596 if (el_mode != SFmode || n != 4)
36597 return NULL_TREE;
36598 break;
36600 default:
36601 return NULL_TREE;
36604 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36606 if (fn == BUILT_IN_LOGF)
36607 strcpy (name, "vmlsLn4");
36608 else if (fn == BUILT_IN_LOG)
36609 strcpy (name, "vmldLn2");
36610 else if (n == 4)
36612 sprintf (name, "vmls%s", bname+10);
36613 name[strlen (name)-1] = '4';
36615 else
36616 sprintf (name, "vmld%s2", bname+10);
36618 /* Convert to uppercase. */
36619 name[4] &= ~0x20;
36621 arity = 0;
36622 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36623 args;
36624 args = TREE_CHAIN (args))
36625 arity++;
36627 if (arity == 1)
36628 fntype = build_function_type_list (type_out, type_in, NULL);
36629 else
36630 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36632 /* Build a function declaration for the vectorized function. */
36633 new_fndecl = build_decl (BUILTINS_LOCATION,
36634 FUNCTION_DECL, get_identifier (name), fntype);
36635 TREE_PUBLIC (new_fndecl) = 1;
36636 DECL_EXTERNAL (new_fndecl) = 1;
36637 DECL_IS_NOVOPS (new_fndecl) = 1;
36638 TREE_READONLY (new_fndecl) = 1;
36640 return new_fndecl;
36643 /* Handler for an ACML-style interface to
36644 a library with vectorized intrinsics. */
36646 static tree
36647 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36649 char name[20] = "__vr.._";
36650 tree fntype, new_fndecl, args;
36651 unsigned arity;
36652 const char *bname;
36653 enum machine_mode el_mode, in_mode;
36654 int n, in_n;
36656 /* The ACML is 64bits only and suitable for unsafe math only as
36657 it does not correctly support parts of IEEE with the required
36658 precision such as denormals. */
36659 if (!TARGET_64BIT
36660 || !flag_unsafe_math_optimizations)
36661 return NULL_TREE;
36663 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36664 n = TYPE_VECTOR_SUBPARTS (type_out);
36665 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36666 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36667 if (el_mode != in_mode
36668 || n != in_n)
36669 return NULL_TREE;
36671 switch (fn)
36673 case BUILT_IN_SIN:
36674 case BUILT_IN_COS:
36675 case BUILT_IN_EXP:
36676 case BUILT_IN_LOG:
36677 case BUILT_IN_LOG2:
36678 case BUILT_IN_LOG10:
36679 name[4] = 'd';
36680 name[5] = '2';
36681 if (el_mode != DFmode
36682 || n != 2)
36683 return NULL_TREE;
36684 break;
36686 case BUILT_IN_SINF:
36687 case BUILT_IN_COSF:
36688 case BUILT_IN_EXPF:
36689 case BUILT_IN_POWF:
36690 case BUILT_IN_LOGF:
36691 case BUILT_IN_LOG2F:
36692 case BUILT_IN_LOG10F:
36693 name[4] = 's';
36694 name[5] = '4';
36695 if (el_mode != SFmode
36696 || n != 4)
36697 return NULL_TREE;
36698 break;
36700 default:
36701 return NULL_TREE;
36704 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36705 sprintf (name + 7, "%s", bname+10);
36707 arity = 0;
36708 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36709 args;
36710 args = TREE_CHAIN (args))
36711 arity++;
36713 if (arity == 1)
36714 fntype = build_function_type_list (type_out, type_in, NULL);
36715 else
36716 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36718 /* Build a function declaration for the vectorized function. */
36719 new_fndecl = build_decl (BUILTINS_LOCATION,
36720 FUNCTION_DECL, get_identifier (name), fntype);
36721 TREE_PUBLIC (new_fndecl) = 1;
36722 DECL_EXTERNAL (new_fndecl) = 1;
36723 DECL_IS_NOVOPS (new_fndecl) = 1;
36724 TREE_READONLY (new_fndecl) = 1;
36726 return new_fndecl;
36729 /* Returns a decl of a function that implements gather load with
36730 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36731 Return NULL_TREE if it is not available. */
36733 static tree
36734 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36735 const_tree index_type, int scale)
36737 bool si;
36738 enum ix86_builtins code;
36740 if (! TARGET_AVX2)
36741 return NULL_TREE;
36743 if ((TREE_CODE (index_type) != INTEGER_TYPE
36744 && !POINTER_TYPE_P (index_type))
36745 || (TYPE_MODE (index_type) != SImode
36746 && TYPE_MODE (index_type) != DImode))
36747 return NULL_TREE;
36749 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36750 return NULL_TREE;
36752 /* v*gather* insn sign extends index to pointer mode. */
36753 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36754 && TYPE_UNSIGNED (index_type))
36755 return NULL_TREE;
36757 if (scale <= 0
36758 || scale > 8
36759 || (scale & (scale - 1)) != 0)
36760 return NULL_TREE;
36762 si = TYPE_MODE (index_type) == SImode;
36763 switch (TYPE_MODE (mem_vectype))
36765 case V2DFmode:
36766 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36767 break;
36768 case V4DFmode:
36769 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36770 break;
36771 case V2DImode:
36772 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36773 break;
36774 case V4DImode:
36775 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36776 break;
36777 case V4SFmode:
36778 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36779 break;
36780 case V8SFmode:
36781 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36782 break;
36783 case V4SImode:
36784 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36785 break;
36786 case V8SImode:
36787 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36788 break;
36789 case V8DFmode:
36790 if (TARGET_AVX512F)
36791 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36792 else
36793 return NULL_TREE;
36794 break;
36795 case V8DImode:
36796 if (TARGET_AVX512F)
36797 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36798 else
36799 return NULL_TREE;
36800 break;
36801 case V16SFmode:
36802 if (TARGET_AVX512F)
36803 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36804 else
36805 return NULL_TREE;
36806 break;
36807 case V16SImode:
36808 if (TARGET_AVX512F)
36809 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36810 else
36811 return NULL_TREE;
36812 break;
36813 default:
36814 return NULL_TREE;
36817 return ix86_get_builtin (code);
36820 /* Returns a code for a target-specific builtin that implements
36821 reciprocal of the function, or NULL_TREE if not available. */
36823 static tree
36824 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36825 bool sqrt ATTRIBUTE_UNUSED)
36827 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36828 && flag_finite_math_only && !flag_trapping_math
36829 && flag_unsafe_math_optimizations))
36830 return NULL_TREE;
36832 if (md_fn)
36833 /* Machine dependent builtins. */
36834 switch (fn)
36836 /* Vectorized version of sqrt to rsqrt conversion. */
36837 case IX86_BUILTIN_SQRTPS_NR:
36838 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36840 case IX86_BUILTIN_SQRTPS_NR256:
36841 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36843 default:
36844 return NULL_TREE;
36846 else
36847 /* Normal builtins. */
36848 switch (fn)
36850 /* Sqrt to rsqrt conversion. */
36851 case BUILT_IN_SQRTF:
36852 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36854 default:
36855 return NULL_TREE;
36859 /* Helper for avx_vpermilps256_operand et al. This is also used by
36860 the expansion functions to turn the parallel back into a mask.
36861 The return value is 0 for no match and the imm8+1 for a match. */
36864 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36866 unsigned i, nelt = GET_MODE_NUNITS (mode);
36867 unsigned mask = 0;
36868 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36870 if (XVECLEN (par, 0) != (int) nelt)
36871 return 0;
36873 /* Validate that all of the elements are constants, and not totally
36874 out of range. Copy the data into an integral array to make the
36875 subsequent checks easier. */
36876 for (i = 0; i < nelt; ++i)
36878 rtx er = XVECEXP (par, 0, i);
36879 unsigned HOST_WIDE_INT ei;
36881 if (!CONST_INT_P (er))
36882 return 0;
36883 ei = INTVAL (er);
36884 if (ei >= nelt)
36885 return 0;
36886 ipar[i] = ei;
36889 switch (mode)
36891 case V8DFmode:
36892 /* In the 512-bit DFmode case, we can only move elements within
36893 a 128-bit lane. First fill the second part of the mask,
36894 then fallthru. */
36895 for (i = 4; i < 6; ++i)
36897 if (ipar[i] < 4 || ipar[i] >= 6)
36898 return 0;
36899 mask |= (ipar[i] - 4) << i;
36901 for (i = 6; i < 8; ++i)
36903 if (ipar[i] < 6)
36904 return 0;
36905 mask |= (ipar[i] - 6) << i;
36907 /* FALLTHRU */
36909 case V4DFmode:
36910 /* In the 256-bit DFmode case, we can only move elements within
36911 a 128-bit lane. */
36912 for (i = 0; i < 2; ++i)
36914 if (ipar[i] >= 2)
36915 return 0;
36916 mask |= ipar[i] << i;
36918 for (i = 2; i < 4; ++i)
36920 if (ipar[i] < 2)
36921 return 0;
36922 mask |= (ipar[i] - 2) << i;
36924 break;
36926 case V16SFmode:
36927 /* In 512 bit SFmode case, permutation in the upper 256 bits
36928 must mirror the permutation in the lower 256-bits. */
36929 for (i = 0; i < 8; ++i)
36930 if (ipar[i] + 8 != ipar[i + 8])
36931 return 0;
36932 /* FALLTHRU */
36934 case V8SFmode:
36935 /* In 256 bit SFmode case, we have full freedom of
36936 movement within the low 128-bit lane, but the high 128-bit
36937 lane must mirror the exact same pattern. */
36938 for (i = 0; i < 4; ++i)
36939 if (ipar[i] + 4 != ipar[i + 4])
36940 return 0;
36941 nelt = 4;
36942 /* FALLTHRU */
36944 case V2DFmode:
36945 case V4SFmode:
36946 /* In the 128-bit case, we've full freedom in the placement of
36947 the elements from the source operand. */
36948 for (i = 0; i < nelt; ++i)
36949 mask |= ipar[i] << (i * (nelt / 2));
36950 break;
36952 default:
36953 gcc_unreachable ();
36956 /* Make sure success has a non-zero value by adding one. */
36957 return mask + 1;
36960 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
36961 the expansion functions to turn the parallel back into a mask.
36962 The return value is 0 for no match and the imm8+1 for a match. */
36965 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
36967 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
36968 unsigned mask = 0;
36969 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
36971 if (XVECLEN (par, 0) != (int) nelt)
36972 return 0;
36974 /* Validate that all of the elements are constants, and not totally
36975 out of range. Copy the data into an integral array to make the
36976 subsequent checks easier. */
36977 for (i = 0; i < nelt; ++i)
36979 rtx er = XVECEXP (par, 0, i);
36980 unsigned HOST_WIDE_INT ei;
36982 if (!CONST_INT_P (er))
36983 return 0;
36984 ei = INTVAL (er);
36985 if (ei >= 2 * nelt)
36986 return 0;
36987 ipar[i] = ei;
36990 /* Validate that the halves of the permute are halves. */
36991 for (i = 0; i < nelt2 - 1; ++i)
36992 if (ipar[i] + 1 != ipar[i + 1])
36993 return 0;
36994 for (i = nelt2; i < nelt - 1; ++i)
36995 if (ipar[i] + 1 != ipar[i + 1])
36996 return 0;
36998 /* Reconstruct the mask. */
36999 for (i = 0; i < 2; ++i)
37001 unsigned e = ipar[i * nelt2];
37002 if (e % nelt2)
37003 return 0;
37004 e /= nelt2;
37005 mask |= e << (i * 4);
37008 /* Make sure success has a non-zero value by adding one. */
37009 return mask + 1;
37012 /* Store OPERAND to the memory after reload is completed. This means
37013 that we can't easily use assign_stack_local. */
37015 ix86_force_to_memory (enum machine_mode mode, rtx operand)
37017 rtx result;
37019 gcc_assert (reload_completed);
37020 if (ix86_using_red_zone ())
37022 result = gen_rtx_MEM (mode,
37023 gen_rtx_PLUS (Pmode,
37024 stack_pointer_rtx,
37025 GEN_INT (-RED_ZONE_SIZE)));
37026 emit_move_insn (result, operand);
37028 else if (TARGET_64BIT)
37030 switch (mode)
37032 case HImode:
37033 case SImode:
37034 operand = gen_lowpart (DImode, operand);
37035 /* FALLTHRU */
37036 case DImode:
37037 emit_insn (
37038 gen_rtx_SET (VOIDmode,
37039 gen_rtx_MEM (DImode,
37040 gen_rtx_PRE_DEC (DImode,
37041 stack_pointer_rtx)),
37042 operand));
37043 break;
37044 default:
37045 gcc_unreachable ();
37047 result = gen_rtx_MEM (mode, stack_pointer_rtx);
37049 else
37051 switch (mode)
37053 case DImode:
37055 rtx operands[2];
37056 split_double_mode (mode, &operand, 1, operands, operands + 1);
37057 emit_insn (
37058 gen_rtx_SET (VOIDmode,
37059 gen_rtx_MEM (SImode,
37060 gen_rtx_PRE_DEC (Pmode,
37061 stack_pointer_rtx)),
37062 operands[1]));
37063 emit_insn (
37064 gen_rtx_SET (VOIDmode,
37065 gen_rtx_MEM (SImode,
37066 gen_rtx_PRE_DEC (Pmode,
37067 stack_pointer_rtx)),
37068 operands[0]));
37070 break;
37071 case HImode:
37072 /* Store HImodes as SImodes. */
37073 operand = gen_lowpart (SImode, operand);
37074 /* FALLTHRU */
37075 case SImode:
37076 emit_insn (
37077 gen_rtx_SET (VOIDmode,
37078 gen_rtx_MEM (GET_MODE (operand),
37079 gen_rtx_PRE_DEC (SImode,
37080 stack_pointer_rtx)),
37081 operand));
37082 break;
37083 default:
37084 gcc_unreachable ();
37086 result = gen_rtx_MEM (mode, stack_pointer_rtx);
37088 return result;
37091 /* Free operand from the memory. */
37092 void
37093 ix86_free_from_memory (enum machine_mode mode)
37095 if (!ix86_using_red_zone ())
37097 int size;
37099 if (mode == DImode || TARGET_64BIT)
37100 size = 8;
37101 else
37102 size = 4;
37103 /* Use LEA to deallocate stack space. In peephole2 it will be converted
37104 to pop or add instruction if registers are available. */
37105 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
37106 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
37107 GEN_INT (size))));
37111 /* Return a register priority for hard reg REGNO. */
37112 static int
37113 ix86_register_priority (int hard_regno)
37115 /* ebp and r13 as the base always wants a displacement, r12 as the
37116 base always wants an index. So discourage their usage in an
37117 address. */
37118 if (hard_regno == R12_REG || hard_regno == R13_REG)
37119 return 0;
37120 if (hard_regno == BP_REG)
37121 return 1;
37122 /* New x86-64 int registers result in bigger code size. Discourage
37123 them. */
37124 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37125 return 2;
37126 /* New x86-64 SSE registers result in bigger code size. Discourage
37127 them. */
37128 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37129 return 2;
37130 /* Usage of AX register results in smaller code. Prefer it. */
37131 if (hard_regno == 0)
37132 return 4;
37133 return 3;
37136 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37138 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37139 QImode must go into class Q_REGS.
37140 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37141 movdf to do mem-to-mem moves through integer regs. */
37143 static reg_class_t
37144 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37146 enum machine_mode mode = GET_MODE (x);
37148 /* We're only allowed to return a subclass of CLASS. Many of the
37149 following checks fail for NO_REGS, so eliminate that early. */
37150 if (regclass == NO_REGS)
37151 return NO_REGS;
37153 /* All classes can load zeros. */
37154 if (x == CONST0_RTX (mode))
37155 return regclass;
37157 /* Force constants into memory if we are loading a (nonzero) constant into
37158 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37159 instructions to load from a constant. */
37160 if (CONSTANT_P (x)
37161 && (MAYBE_MMX_CLASS_P (regclass)
37162 || MAYBE_SSE_CLASS_P (regclass)
37163 || MAYBE_MASK_CLASS_P (regclass)))
37164 return NO_REGS;
37166 /* Prefer SSE regs only, if we can use them for math. */
37167 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37168 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37170 /* Floating-point constants need more complex checks. */
37171 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37173 /* General regs can load everything. */
37174 if (reg_class_subset_p (regclass, GENERAL_REGS))
37175 return regclass;
37177 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37178 zero above. We only want to wind up preferring 80387 registers if
37179 we plan on doing computation with them. */
37180 if (TARGET_80387
37181 && standard_80387_constant_p (x) > 0)
37183 /* Limit class to non-sse. */
37184 if (regclass == FLOAT_SSE_REGS)
37185 return FLOAT_REGS;
37186 if (regclass == FP_TOP_SSE_REGS)
37187 return FP_TOP_REG;
37188 if (regclass == FP_SECOND_SSE_REGS)
37189 return FP_SECOND_REG;
37190 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37191 return regclass;
37194 return NO_REGS;
37197 /* Generally when we see PLUS here, it's the function invariant
37198 (plus soft-fp const_int). Which can only be computed into general
37199 regs. */
37200 if (GET_CODE (x) == PLUS)
37201 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37203 /* QImode constants are easy to load, but non-constant QImode data
37204 must go into Q_REGS. */
37205 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37207 if (reg_class_subset_p (regclass, Q_REGS))
37208 return regclass;
37209 if (reg_class_subset_p (Q_REGS, regclass))
37210 return Q_REGS;
37211 return NO_REGS;
37214 return regclass;
37217 /* Discourage putting floating-point values in SSE registers unless
37218 SSE math is being used, and likewise for the 387 registers. */
37219 static reg_class_t
37220 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37222 enum machine_mode mode = GET_MODE (x);
37224 /* Restrict the output reload class to the register bank that we are doing
37225 math on. If we would like not to return a subset of CLASS, reject this
37226 alternative: if reload cannot do this, it will still use its choice. */
37227 mode = GET_MODE (x);
37228 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37229 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37231 if (X87_FLOAT_MODE_P (mode))
37233 if (regclass == FP_TOP_SSE_REGS)
37234 return FP_TOP_REG;
37235 else if (regclass == FP_SECOND_SSE_REGS)
37236 return FP_SECOND_REG;
37237 else
37238 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37241 return regclass;
37244 static reg_class_t
37245 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37246 enum machine_mode mode, secondary_reload_info *sri)
37248 /* Double-word spills from general registers to non-offsettable memory
37249 references (zero-extended addresses) require special handling. */
37250 if (TARGET_64BIT
37251 && MEM_P (x)
37252 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37253 && INTEGER_CLASS_P (rclass)
37254 && !offsettable_memref_p (x))
37256 sri->icode = (in_p
37257 ? CODE_FOR_reload_noff_load
37258 : CODE_FOR_reload_noff_store);
37259 /* Add the cost of moving address to a temporary. */
37260 sri->extra_cost = 1;
37262 return NO_REGS;
37265 /* QImode spills from non-QI registers require
37266 intermediate register on 32bit targets. */
37267 if (mode == QImode
37268 && (MAYBE_MASK_CLASS_P (rclass)
37269 || (!TARGET_64BIT && !in_p
37270 && INTEGER_CLASS_P (rclass)
37271 && MAYBE_NON_Q_CLASS_P (rclass))))
37273 int regno;
37275 if (REG_P (x))
37276 regno = REGNO (x);
37277 else
37278 regno = -1;
37280 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37281 regno = true_regnum (x);
37283 /* Return Q_REGS if the operand is in memory. */
37284 if (regno == -1)
37285 return Q_REGS;
37288 /* This condition handles corner case where an expression involving
37289 pointers gets vectorized. We're trying to use the address of a
37290 stack slot as a vector initializer.
37292 (set (reg:V2DI 74 [ vect_cst_.2 ])
37293 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37295 Eventually frame gets turned into sp+offset like this:
37297 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37298 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37299 (const_int 392 [0x188]))))
37301 That later gets turned into:
37303 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37304 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37305 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37307 We'll have the following reload recorded:
37309 Reload 0: reload_in (DI) =
37310 (plus:DI (reg/f:DI 7 sp)
37311 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37312 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37313 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37314 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37315 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37316 reload_reg_rtx: (reg:V2DI 22 xmm1)
37318 Which isn't going to work since SSE instructions can't handle scalar
37319 additions. Returning GENERAL_REGS forces the addition into integer
37320 register and reload can handle subsequent reloads without problems. */
37322 if (in_p && GET_CODE (x) == PLUS
37323 && SSE_CLASS_P (rclass)
37324 && SCALAR_INT_MODE_P (mode))
37325 return GENERAL_REGS;
37327 return NO_REGS;
37330 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37332 static bool
37333 ix86_class_likely_spilled_p (reg_class_t rclass)
37335 switch (rclass)
37337 case AREG:
37338 case DREG:
37339 case CREG:
37340 case BREG:
37341 case AD_REGS:
37342 case SIREG:
37343 case DIREG:
37344 case SSE_FIRST_REG:
37345 case FP_TOP_REG:
37346 case FP_SECOND_REG:
37347 return true;
37349 default:
37350 break;
37353 return false;
37356 /* If we are copying between general and FP registers, we need a memory
37357 location. The same is true for SSE and MMX registers.
37359 To optimize register_move_cost performance, allow inline variant.
37361 The macro can't work reliably when one of the CLASSES is class containing
37362 registers from multiple units (SSE, MMX, integer). We avoid this by never
37363 combining those units in single alternative in the machine description.
37364 Ensure that this constraint holds to avoid unexpected surprises.
37366 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37367 enforce these sanity checks. */
37369 static inline bool
37370 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37371 enum machine_mode mode, int strict)
37373 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37374 return false;
37375 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37376 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37377 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37378 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37379 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37380 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37382 gcc_assert (!strict || lra_in_progress);
37383 return true;
37386 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37387 return true;
37389 /* ??? This is a lie. We do have moves between mmx/general, and for
37390 mmx/sse2. But by saying we need secondary memory we discourage the
37391 register allocator from using the mmx registers unless needed. */
37392 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37393 return true;
37395 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37397 /* SSE1 doesn't have any direct moves from other classes. */
37398 if (!TARGET_SSE2)
37399 return true;
37401 /* If the target says that inter-unit moves are more expensive
37402 than moving through memory, then don't generate them. */
37403 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37404 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37405 return true;
37407 /* Between SSE and general, we have moves no larger than word size. */
37408 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37409 return true;
37412 return false;
37415 bool
37416 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37417 enum machine_mode mode, int strict)
37419 return inline_secondary_memory_needed (class1, class2, mode, strict);
37422 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37424 On the 80386, this is the size of MODE in words,
37425 except in the FP regs, where a single reg is always enough. */
37427 static unsigned char
37428 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37430 if (MAYBE_INTEGER_CLASS_P (rclass))
37432 if (mode == XFmode)
37433 return (TARGET_64BIT ? 2 : 3);
37434 else if (mode == XCmode)
37435 return (TARGET_64BIT ? 4 : 6);
37436 else
37437 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37439 else
37441 if (COMPLEX_MODE_P (mode))
37442 return 2;
37443 else
37444 return 1;
37448 /* Return true if the registers in CLASS cannot represent the change from
37449 modes FROM to TO. */
37451 bool
37452 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37453 enum reg_class regclass)
37455 if (from == to)
37456 return false;
37458 /* x87 registers can't do subreg at all, as all values are reformatted
37459 to extended precision. */
37460 if (MAYBE_FLOAT_CLASS_P (regclass))
37461 return true;
37463 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37465 /* Vector registers do not support QI or HImode loads. If we don't
37466 disallow a change to these modes, reload will assume it's ok to
37467 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37468 the vec_dupv4hi pattern. */
37469 if (GET_MODE_SIZE (from) < 4)
37470 return true;
37472 /* Vector registers do not support subreg with nonzero offsets, which
37473 are otherwise valid for integer registers. Since we can't see
37474 whether we have a nonzero offset from here, prohibit all
37475 nonparadoxical subregs changing size. */
37476 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37477 return true;
37480 return false;
37483 /* Return the cost of moving data of mode M between a
37484 register and memory. A value of 2 is the default; this cost is
37485 relative to those in `REGISTER_MOVE_COST'.
37487 This function is used extensively by register_move_cost that is used to
37488 build tables at startup. Make it inline in this case.
37489 When IN is 2, return maximum of in and out move cost.
37491 If moving between registers and memory is more expensive than
37492 between two registers, you should define this macro to express the
37493 relative cost.
37495 Model also increased moving costs of QImode registers in non
37496 Q_REGS classes.
37498 static inline int
37499 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37500 int in)
37502 int cost;
37503 if (FLOAT_CLASS_P (regclass))
37505 int index;
37506 switch (mode)
37508 case SFmode:
37509 index = 0;
37510 break;
37511 case DFmode:
37512 index = 1;
37513 break;
37514 case XFmode:
37515 index = 2;
37516 break;
37517 default:
37518 return 100;
37520 if (in == 2)
37521 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37522 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37524 if (SSE_CLASS_P (regclass))
37526 int index;
37527 switch (GET_MODE_SIZE (mode))
37529 case 4:
37530 index = 0;
37531 break;
37532 case 8:
37533 index = 1;
37534 break;
37535 case 16:
37536 index = 2;
37537 break;
37538 default:
37539 return 100;
37541 if (in == 2)
37542 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37543 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37545 if (MMX_CLASS_P (regclass))
37547 int index;
37548 switch (GET_MODE_SIZE (mode))
37550 case 4:
37551 index = 0;
37552 break;
37553 case 8:
37554 index = 1;
37555 break;
37556 default:
37557 return 100;
37559 if (in)
37560 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37561 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37563 switch (GET_MODE_SIZE (mode))
37565 case 1:
37566 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37568 if (!in)
37569 return ix86_cost->int_store[0];
37570 if (TARGET_PARTIAL_REG_DEPENDENCY
37571 && optimize_function_for_speed_p (cfun))
37572 cost = ix86_cost->movzbl_load;
37573 else
37574 cost = ix86_cost->int_load[0];
37575 if (in == 2)
37576 return MAX (cost, ix86_cost->int_store[0]);
37577 return cost;
37579 else
37581 if (in == 2)
37582 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37583 if (in)
37584 return ix86_cost->movzbl_load;
37585 else
37586 return ix86_cost->int_store[0] + 4;
37588 break;
37589 case 2:
37590 if (in == 2)
37591 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37592 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37593 default:
37594 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37595 if (mode == TFmode)
37596 mode = XFmode;
37597 if (in == 2)
37598 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37599 else if (in)
37600 cost = ix86_cost->int_load[2];
37601 else
37602 cost = ix86_cost->int_store[2];
37603 return (cost * (((int) GET_MODE_SIZE (mode)
37604 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37608 static int
37609 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37610 bool in)
37612 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37616 /* Return the cost of moving data from a register in class CLASS1 to
37617 one in class CLASS2.
37619 It is not required that the cost always equal 2 when FROM is the same as TO;
37620 on some machines it is expensive to move between registers if they are not
37621 general registers. */
37623 static int
37624 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37625 reg_class_t class2_i)
37627 enum reg_class class1 = (enum reg_class) class1_i;
37628 enum reg_class class2 = (enum reg_class) class2_i;
37630 /* In case we require secondary memory, compute cost of the store followed
37631 by load. In order to avoid bad register allocation choices, we need
37632 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37634 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37636 int cost = 1;
37638 cost += inline_memory_move_cost (mode, class1, 2);
37639 cost += inline_memory_move_cost (mode, class2, 2);
37641 /* In case of copying from general_purpose_register we may emit multiple
37642 stores followed by single load causing memory size mismatch stall.
37643 Count this as arbitrarily high cost of 20. */
37644 if (targetm.class_max_nregs (class1, mode)
37645 > targetm.class_max_nregs (class2, mode))
37646 cost += 20;
37648 /* In the case of FP/MMX moves, the registers actually overlap, and we
37649 have to switch modes in order to treat them differently. */
37650 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37651 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37652 cost += 20;
37654 return cost;
37657 /* Moves between SSE/MMX and integer unit are expensive. */
37658 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37659 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37661 /* ??? By keeping returned value relatively high, we limit the number
37662 of moves between integer and MMX/SSE registers for all targets.
37663 Additionally, high value prevents problem with x86_modes_tieable_p(),
37664 where integer modes in MMX/SSE registers are not tieable
37665 because of missing QImode and HImode moves to, from or between
37666 MMX/SSE registers. */
37667 return MAX (8, ix86_cost->mmxsse_to_integer);
37669 if (MAYBE_FLOAT_CLASS_P (class1))
37670 return ix86_cost->fp_move;
37671 if (MAYBE_SSE_CLASS_P (class1))
37672 return ix86_cost->sse_move;
37673 if (MAYBE_MMX_CLASS_P (class1))
37674 return ix86_cost->mmx_move;
37675 return 2;
37678 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37679 MODE. */
37681 bool
37682 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37684 /* Flags and only flags can only hold CCmode values. */
37685 if (CC_REGNO_P (regno))
37686 return GET_MODE_CLASS (mode) == MODE_CC;
37687 if (GET_MODE_CLASS (mode) == MODE_CC
37688 || GET_MODE_CLASS (mode) == MODE_RANDOM
37689 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37690 return false;
37691 if (STACK_REGNO_P (regno))
37692 return VALID_FP_MODE_P (mode);
37693 if (MASK_REGNO_P (regno))
37694 return VALID_MASK_REG_MODE (mode);
37695 if (SSE_REGNO_P (regno))
37697 /* We implement the move patterns for all vector modes into and
37698 out of SSE registers, even when no operation instructions
37699 are available. */
37701 /* For AVX-512 we allow, regardless of regno:
37702 - XI mode
37703 - any of 512-bit wide vector mode
37704 - any scalar mode. */
37705 if (TARGET_AVX512F
37706 && (mode == XImode
37707 || VALID_AVX512F_REG_MODE (mode)
37708 || VALID_AVX512F_SCALAR_MODE (mode)))
37709 return true;
37711 /* xmm16-xmm31 are only available for AVX-512. */
37712 if (EXT_REX_SSE_REGNO_P (regno))
37713 return false;
37715 /* OImode and AVX modes are available only when AVX is enabled. */
37716 return ((TARGET_AVX
37717 && VALID_AVX256_REG_OR_OI_MODE (mode))
37718 || VALID_SSE_REG_MODE (mode)
37719 || VALID_SSE2_REG_MODE (mode)
37720 || VALID_MMX_REG_MODE (mode)
37721 || VALID_MMX_REG_MODE_3DNOW (mode));
37723 if (MMX_REGNO_P (regno))
37725 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37726 so if the register is available at all, then we can move data of
37727 the given mode into or out of it. */
37728 return (VALID_MMX_REG_MODE (mode)
37729 || VALID_MMX_REG_MODE_3DNOW (mode));
37732 if (mode == QImode)
37734 /* Take care for QImode values - they can be in non-QI regs,
37735 but then they do cause partial register stalls. */
37736 if (ANY_QI_REGNO_P (regno))
37737 return true;
37738 if (!TARGET_PARTIAL_REG_STALL)
37739 return true;
37740 /* LRA checks if the hard register is OK for the given mode.
37741 QImode values can live in non-QI regs, so we allow all
37742 registers here. */
37743 if (lra_in_progress)
37744 return true;
37745 return !can_create_pseudo_p ();
37747 /* We handle both integer and floats in the general purpose registers. */
37748 else if (VALID_INT_MODE_P (mode))
37749 return true;
37750 else if (VALID_FP_MODE_P (mode))
37751 return true;
37752 else if (VALID_DFP_MODE_P (mode))
37753 return true;
37754 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37755 on to use that value in smaller contexts, this can easily force a
37756 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37757 supporting DImode, allow it. */
37758 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37759 return true;
37761 return false;
37764 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37765 tieable integer mode. */
37767 static bool
37768 ix86_tieable_integer_mode_p (enum machine_mode mode)
37770 switch (mode)
37772 case HImode:
37773 case SImode:
37774 return true;
37776 case QImode:
37777 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37779 case DImode:
37780 return TARGET_64BIT;
37782 default:
37783 return false;
37787 /* Return true if MODE1 is accessible in a register that can hold MODE2
37788 without copying. That is, all register classes that can hold MODE2
37789 can also hold MODE1. */
37791 bool
37792 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37794 if (mode1 == mode2)
37795 return true;
37797 if (ix86_tieable_integer_mode_p (mode1)
37798 && ix86_tieable_integer_mode_p (mode2))
37799 return true;
37801 /* MODE2 being XFmode implies fp stack or general regs, which means we
37802 can tie any smaller floating point modes to it. Note that we do not
37803 tie this with TFmode. */
37804 if (mode2 == XFmode)
37805 return mode1 == SFmode || mode1 == DFmode;
37807 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37808 that we can tie it with SFmode. */
37809 if (mode2 == DFmode)
37810 return mode1 == SFmode;
37812 /* If MODE2 is only appropriate for an SSE register, then tie with
37813 any other mode acceptable to SSE registers. */
37814 if (GET_MODE_SIZE (mode2) == 32
37815 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37816 return (GET_MODE_SIZE (mode1) == 32
37817 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37818 if (GET_MODE_SIZE (mode2) == 16
37819 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37820 return (GET_MODE_SIZE (mode1) == 16
37821 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37823 /* If MODE2 is appropriate for an MMX register, then tie
37824 with any other mode acceptable to MMX registers. */
37825 if (GET_MODE_SIZE (mode2) == 8
37826 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37827 return (GET_MODE_SIZE (mode1) == 8
37828 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37830 return false;
37833 /* Return the cost of moving between two registers of mode MODE. */
37835 static int
37836 ix86_set_reg_reg_cost (enum machine_mode mode)
37838 unsigned int units = UNITS_PER_WORD;
37840 switch (GET_MODE_CLASS (mode))
37842 default:
37843 break;
37845 case MODE_CC:
37846 units = GET_MODE_SIZE (CCmode);
37847 break;
37849 case MODE_FLOAT:
37850 if ((TARGET_SSE && mode == TFmode)
37851 || (TARGET_80387 && mode == XFmode)
37852 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37853 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37854 units = GET_MODE_SIZE (mode);
37855 break;
37857 case MODE_COMPLEX_FLOAT:
37858 if ((TARGET_SSE && mode == TCmode)
37859 || (TARGET_80387 && mode == XCmode)
37860 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37861 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37862 units = GET_MODE_SIZE (mode);
37863 break;
37865 case MODE_VECTOR_INT:
37866 case MODE_VECTOR_FLOAT:
37867 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37868 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37869 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37870 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37871 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37872 units = GET_MODE_SIZE (mode);
37875 /* Return the cost of moving between two registers of mode MODE,
37876 assuming that the move will be in pieces of at most UNITS bytes. */
37877 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37880 /* Compute a (partial) cost for rtx X. Return true if the complete
37881 cost has been computed, and false if subexpressions should be
37882 scanned. In either case, *TOTAL contains the cost result. */
37884 static bool
37885 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37886 bool speed)
37888 rtx mask;
37889 enum rtx_code code = (enum rtx_code) code_i;
37890 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37891 enum machine_mode mode = GET_MODE (x);
37892 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37894 switch (code)
37896 case SET:
37897 if (register_operand (SET_DEST (x), VOIDmode)
37898 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37900 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37901 return true;
37903 return false;
37905 case CONST_INT:
37906 case CONST:
37907 case LABEL_REF:
37908 case SYMBOL_REF:
37909 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37910 *total = 3;
37911 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37912 *total = 2;
37913 else if (flag_pic && SYMBOLIC_CONST (x)
37914 && (!TARGET_64BIT
37915 || (!GET_CODE (x) != LABEL_REF
37916 && (GET_CODE (x) != SYMBOL_REF
37917 || !SYMBOL_REF_LOCAL_P (x)))))
37918 *total = 1;
37919 else
37920 *total = 0;
37921 return true;
37923 case CONST_DOUBLE:
37924 if (mode == VOIDmode)
37926 *total = 0;
37927 return true;
37929 switch (standard_80387_constant_p (x))
37931 case 1: /* 0.0 */
37932 *total = 1;
37933 return true;
37934 default: /* Other constants */
37935 *total = 2;
37936 return true;
37937 case 0:
37938 case -1:
37939 break;
37941 if (SSE_FLOAT_MODE_P (mode))
37943 case CONST_VECTOR:
37944 switch (standard_sse_constant_p (x))
37946 case 0:
37947 break;
37948 case 1: /* 0: xor eliminates false dependency */
37949 *total = 0;
37950 return true;
37951 default: /* -1: cmp contains false dependency */
37952 *total = 1;
37953 return true;
37956 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37957 it'll probably end up. Add a penalty for size. */
37958 *total = (COSTS_N_INSNS (1)
37959 + (flag_pic != 0 && !TARGET_64BIT)
37960 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37961 return true;
37963 case ZERO_EXTEND:
37964 /* The zero extensions is often completely free on x86_64, so make
37965 it as cheap as possible. */
37966 if (TARGET_64BIT && mode == DImode
37967 && GET_MODE (XEXP (x, 0)) == SImode)
37968 *total = 1;
37969 else if (TARGET_ZERO_EXTEND_WITH_AND)
37970 *total = cost->add;
37971 else
37972 *total = cost->movzx;
37973 return false;
37975 case SIGN_EXTEND:
37976 *total = cost->movsx;
37977 return false;
37979 case ASHIFT:
37980 if (SCALAR_INT_MODE_P (mode)
37981 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37982 && CONST_INT_P (XEXP (x, 1)))
37984 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37985 if (value == 1)
37987 *total = cost->add;
37988 return false;
37990 if ((value == 2 || value == 3)
37991 && cost->lea <= cost->shift_const)
37993 *total = cost->lea;
37994 return false;
37997 /* FALLTHRU */
37999 case ROTATE:
38000 case ASHIFTRT:
38001 case LSHIFTRT:
38002 case ROTATERT:
38003 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38005 /* ??? Should be SSE vector operation cost. */
38006 /* At least for published AMD latencies, this really is the same
38007 as the latency for a simple fpu operation like fabs. */
38008 /* V*QImode is emulated with 1-11 insns. */
38009 if (mode == V16QImode || mode == V32QImode)
38011 int count = 11;
38012 if (TARGET_XOP && mode == V16QImode)
38014 /* For XOP we use vpshab, which requires a broadcast of the
38015 value to the variable shift insn. For constants this
38016 means a V16Q const in mem; even when we can perform the
38017 shift with one insn set the cost to prefer paddb. */
38018 if (CONSTANT_P (XEXP (x, 1)))
38020 *total = (cost->fabs
38021 + rtx_cost (XEXP (x, 0), code, 0, speed)
38022 + (speed ? 2 : COSTS_N_BYTES (16)));
38023 return true;
38025 count = 3;
38027 else if (TARGET_SSSE3)
38028 count = 7;
38029 *total = cost->fabs * count;
38031 else
38032 *total = cost->fabs;
38034 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38036 if (CONST_INT_P (XEXP (x, 1)))
38038 if (INTVAL (XEXP (x, 1)) > 32)
38039 *total = cost->shift_const + COSTS_N_INSNS (2);
38040 else
38041 *total = cost->shift_const * 2;
38043 else
38045 if (GET_CODE (XEXP (x, 1)) == AND)
38046 *total = cost->shift_var * 2;
38047 else
38048 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38051 else
38053 if (CONST_INT_P (XEXP (x, 1)))
38054 *total = cost->shift_const;
38055 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38056 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38058 /* Return the cost after shift-and truncation. */
38059 *total = cost->shift_var;
38060 return true;
38062 else
38063 *total = cost->shift_var;
38065 return false;
38067 case FMA:
38069 rtx sub;
38071 gcc_assert (FLOAT_MODE_P (mode));
38072 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38074 /* ??? SSE scalar/vector cost should be used here. */
38075 /* ??? Bald assumption that fma has the same cost as fmul. */
38076 *total = cost->fmul;
38077 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38079 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38080 sub = XEXP (x, 0);
38081 if (GET_CODE (sub) == NEG)
38082 sub = XEXP (sub, 0);
38083 *total += rtx_cost (sub, FMA, 0, speed);
38085 sub = XEXP (x, 2);
38086 if (GET_CODE (sub) == NEG)
38087 sub = XEXP (sub, 0);
38088 *total += rtx_cost (sub, FMA, 2, speed);
38089 return true;
38092 case MULT:
38093 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38095 /* ??? SSE scalar cost should be used here. */
38096 *total = cost->fmul;
38097 return false;
38099 else if (X87_FLOAT_MODE_P (mode))
38101 *total = cost->fmul;
38102 return false;
38104 else if (FLOAT_MODE_P (mode))
38106 /* ??? SSE vector cost should be used here. */
38107 *total = cost->fmul;
38108 return false;
38110 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38112 /* V*QImode is emulated with 7-13 insns. */
38113 if (mode == V16QImode || mode == V32QImode)
38115 int extra = 11;
38116 if (TARGET_XOP && mode == V16QImode)
38117 extra = 5;
38118 else if (TARGET_SSSE3)
38119 extra = 6;
38120 *total = cost->fmul * 2 + cost->fabs * extra;
38122 /* V*DImode is emulated with 5-8 insns. */
38123 else if (mode == V2DImode || mode == V4DImode)
38125 if (TARGET_XOP && mode == V2DImode)
38126 *total = cost->fmul * 2 + cost->fabs * 3;
38127 else
38128 *total = cost->fmul * 3 + cost->fabs * 5;
38130 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38131 insns, including two PMULUDQ. */
38132 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38133 *total = cost->fmul * 2 + cost->fabs * 5;
38134 else
38135 *total = cost->fmul;
38136 return false;
38138 else
38140 rtx op0 = XEXP (x, 0);
38141 rtx op1 = XEXP (x, 1);
38142 int nbits;
38143 if (CONST_INT_P (XEXP (x, 1)))
38145 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38146 for (nbits = 0; value != 0; value &= value - 1)
38147 nbits++;
38149 else
38150 /* This is arbitrary. */
38151 nbits = 7;
38153 /* Compute costs correctly for widening multiplication. */
38154 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38155 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38156 == GET_MODE_SIZE (mode))
38158 int is_mulwiden = 0;
38159 enum machine_mode inner_mode = GET_MODE (op0);
38161 if (GET_CODE (op0) == GET_CODE (op1))
38162 is_mulwiden = 1, op1 = XEXP (op1, 0);
38163 else if (CONST_INT_P (op1))
38165 if (GET_CODE (op0) == SIGN_EXTEND)
38166 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38167 == INTVAL (op1);
38168 else
38169 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38172 if (is_mulwiden)
38173 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38176 *total = (cost->mult_init[MODE_INDEX (mode)]
38177 + nbits * cost->mult_bit
38178 + rtx_cost (op0, outer_code, opno, speed)
38179 + rtx_cost (op1, outer_code, opno, speed));
38181 return true;
38184 case DIV:
38185 case UDIV:
38186 case MOD:
38187 case UMOD:
38188 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38189 /* ??? SSE cost should be used here. */
38190 *total = cost->fdiv;
38191 else if (X87_FLOAT_MODE_P (mode))
38192 *total = cost->fdiv;
38193 else if (FLOAT_MODE_P (mode))
38194 /* ??? SSE vector cost should be used here. */
38195 *total = cost->fdiv;
38196 else
38197 *total = cost->divide[MODE_INDEX (mode)];
38198 return false;
38200 case PLUS:
38201 if (GET_MODE_CLASS (mode) == MODE_INT
38202 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38204 if (GET_CODE (XEXP (x, 0)) == PLUS
38205 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38206 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38207 && CONSTANT_P (XEXP (x, 1)))
38209 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38210 if (val == 2 || val == 4 || val == 8)
38212 *total = cost->lea;
38213 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38214 outer_code, opno, speed);
38215 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38216 outer_code, opno, speed);
38217 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38218 return true;
38221 else if (GET_CODE (XEXP (x, 0)) == MULT
38222 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38224 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38225 if (val == 2 || val == 4 || val == 8)
38227 *total = cost->lea;
38228 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38229 outer_code, opno, speed);
38230 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38231 return true;
38234 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38236 *total = cost->lea;
38237 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38238 outer_code, opno, speed);
38239 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38240 outer_code, opno, speed);
38241 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38242 return true;
38245 /* FALLTHRU */
38247 case MINUS:
38248 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38250 /* ??? SSE cost should be used here. */
38251 *total = cost->fadd;
38252 return false;
38254 else if (X87_FLOAT_MODE_P (mode))
38256 *total = cost->fadd;
38257 return false;
38259 else if (FLOAT_MODE_P (mode))
38261 /* ??? SSE vector cost should be used here. */
38262 *total = cost->fadd;
38263 return false;
38265 /* FALLTHRU */
38267 case AND:
38268 case IOR:
38269 case XOR:
38270 if (GET_MODE_CLASS (mode) == MODE_INT
38271 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38273 *total = (cost->add * 2
38274 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38275 << (GET_MODE (XEXP (x, 0)) != DImode))
38276 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38277 << (GET_MODE (XEXP (x, 1)) != DImode)));
38278 return true;
38280 /* FALLTHRU */
38282 case NEG:
38283 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38285 /* ??? SSE cost should be used here. */
38286 *total = cost->fchs;
38287 return false;
38289 else if (X87_FLOAT_MODE_P (mode))
38291 *total = cost->fchs;
38292 return false;
38294 else if (FLOAT_MODE_P (mode))
38296 /* ??? SSE vector cost should be used here. */
38297 *total = cost->fchs;
38298 return false;
38300 /* FALLTHRU */
38302 case NOT:
38303 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38305 /* ??? Should be SSE vector operation cost. */
38306 /* At least for published AMD latencies, this really is the same
38307 as the latency for a simple fpu operation like fabs. */
38308 *total = cost->fabs;
38310 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38311 *total = cost->add * 2;
38312 else
38313 *total = cost->add;
38314 return false;
38316 case COMPARE:
38317 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38318 && XEXP (XEXP (x, 0), 1) == const1_rtx
38319 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38320 && XEXP (x, 1) == const0_rtx)
38322 /* This kind of construct is implemented using test[bwl].
38323 Treat it as if we had an AND. */
38324 *total = (cost->add
38325 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38326 + rtx_cost (const1_rtx, outer_code, opno, speed));
38327 return true;
38329 return false;
38331 case FLOAT_EXTEND:
38332 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38333 *total = 0;
38334 return false;
38336 case ABS:
38337 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38338 /* ??? SSE cost should be used here. */
38339 *total = cost->fabs;
38340 else if (X87_FLOAT_MODE_P (mode))
38341 *total = cost->fabs;
38342 else if (FLOAT_MODE_P (mode))
38343 /* ??? SSE vector cost should be used here. */
38344 *total = cost->fabs;
38345 return false;
38347 case SQRT:
38348 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38349 /* ??? SSE cost should be used here. */
38350 *total = cost->fsqrt;
38351 else if (X87_FLOAT_MODE_P (mode))
38352 *total = cost->fsqrt;
38353 else if (FLOAT_MODE_P (mode))
38354 /* ??? SSE vector cost should be used here. */
38355 *total = cost->fsqrt;
38356 return false;
38358 case UNSPEC:
38359 if (XINT (x, 1) == UNSPEC_TP)
38360 *total = 0;
38361 return false;
38363 case VEC_SELECT:
38364 case VEC_CONCAT:
38365 case VEC_DUPLICATE:
38366 /* ??? Assume all of these vector manipulation patterns are
38367 recognizable. In which case they all pretty much have the
38368 same cost. */
38369 *total = cost->fabs;
38370 return true;
38371 case VEC_MERGE:
38372 mask = XEXP (x, 2);
38373 /* This is masked instruction, assume the same cost,
38374 as nonmasked variant. */
38375 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38376 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38377 else
38378 *total = cost->fabs;
38379 return true;
38381 default:
38382 return false;
38386 #if TARGET_MACHO
38388 static int current_machopic_label_num;
38390 /* Given a symbol name and its associated stub, write out the
38391 definition of the stub. */
38393 void
38394 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38396 unsigned int length;
38397 char *binder_name, *symbol_name, lazy_ptr_name[32];
38398 int label = ++current_machopic_label_num;
38400 /* For 64-bit we shouldn't get here. */
38401 gcc_assert (!TARGET_64BIT);
38403 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38404 symb = targetm.strip_name_encoding (symb);
38406 length = strlen (stub);
38407 binder_name = XALLOCAVEC (char, length + 32);
38408 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38410 length = strlen (symb);
38411 symbol_name = XALLOCAVEC (char, length + 32);
38412 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38414 sprintf (lazy_ptr_name, "L%d$lz", label);
38416 if (MACHOPIC_ATT_STUB)
38417 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38418 else if (MACHOPIC_PURE)
38419 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38420 else
38421 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38423 fprintf (file, "%s:\n", stub);
38424 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38426 if (MACHOPIC_ATT_STUB)
38428 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38430 else if (MACHOPIC_PURE)
38432 /* PIC stub. */
38433 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38434 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38435 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38436 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38437 label, lazy_ptr_name, label);
38438 fprintf (file, "\tjmp\t*%%ecx\n");
38440 else
38441 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38443 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38444 it needs no stub-binding-helper. */
38445 if (MACHOPIC_ATT_STUB)
38446 return;
38448 fprintf (file, "%s:\n", binder_name);
38450 if (MACHOPIC_PURE)
38452 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38453 fprintf (file, "\tpushl\t%%ecx\n");
38455 else
38456 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38458 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38460 /* N.B. Keep the correspondence of these
38461 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38462 old-pic/new-pic/non-pic stubs; altering this will break
38463 compatibility with existing dylibs. */
38464 if (MACHOPIC_PURE)
38466 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38467 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38469 else
38470 /* 16-byte -mdynamic-no-pic stub. */
38471 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38473 fprintf (file, "%s:\n", lazy_ptr_name);
38474 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38475 fprintf (file, ASM_LONG "%s\n", binder_name);
38477 #endif /* TARGET_MACHO */
38479 /* Order the registers for register allocator. */
38481 void
38482 x86_order_regs_for_local_alloc (void)
38484 int pos = 0;
38485 int i;
38487 /* First allocate the local general purpose registers. */
38488 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38489 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38490 reg_alloc_order [pos++] = i;
38492 /* Global general purpose registers. */
38493 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38494 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38495 reg_alloc_order [pos++] = i;
38497 /* x87 registers come first in case we are doing FP math
38498 using them. */
38499 if (!TARGET_SSE_MATH)
38500 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38501 reg_alloc_order [pos++] = i;
38503 /* SSE registers. */
38504 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38505 reg_alloc_order [pos++] = i;
38506 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38507 reg_alloc_order [pos++] = i;
38509 /* Extended REX SSE registers. */
38510 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38511 reg_alloc_order [pos++] = i;
38513 /* Mask register. */
38514 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38515 reg_alloc_order [pos++] = i;
38517 /* x87 registers. */
38518 if (TARGET_SSE_MATH)
38519 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38520 reg_alloc_order [pos++] = i;
38522 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38523 reg_alloc_order [pos++] = i;
38525 /* Initialize the rest of array as we do not allocate some registers
38526 at all. */
38527 while (pos < FIRST_PSEUDO_REGISTER)
38528 reg_alloc_order [pos++] = 0;
38531 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38532 in struct attribute_spec handler. */
38533 static tree
38534 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38535 tree args,
38536 int flags ATTRIBUTE_UNUSED,
38537 bool *no_add_attrs)
38539 if (TREE_CODE (*node) != FUNCTION_TYPE
38540 && TREE_CODE (*node) != METHOD_TYPE
38541 && TREE_CODE (*node) != FIELD_DECL
38542 && TREE_CODE (*node) != TYPE_DECL)
38544 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38545 name);
38546 *no_add_attrs = true;
38547 return NULL_TREE;
38549 if (TARGET_64BIT)
38551 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38552 name);
38553 *no_add_attrs = true;
38554 return NULL_TREE;
38556 if (is_attribute_p ("callee_pop_aggregate_return", name))
38558 tree cst;
38560 cst = TREE_VALUE (args);
38561 if (TREE_CODE (cst) != INTEGER_CST)
38563 warning (OPT_Wattributes,
38564 "%qE attribute requires an integer constant argument",
38565 name);
38566 *no_add_attrs = true;
38568 else if (compare_tree_int (cst, 0) != 0
38569 && compare_tree_int (cst, 1) != 0)
38571 warning (OPT_Wattributes,
38572 "argument to %qE attribute is neither zero, nor one",
38573 name);
38574 *no_add_attrs = true;
38577 return NULL_TREE;
38580 return NULL_TREE;
38583 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38584 struct attribute_spec.handler. */
38585 static tree
38586 ix86_handle_abi_attribute (tree *node, tree name,
38587 tree args ATTRIBUTE_UNUSED,
38588 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38590 if (TREE_CODE (*node) != FUNCTION_TYPE
38591 && TREE_CODE (*node) != METHOD_TYPE
38592 && TREE_CODE (*node) != FIELD_DECL
38593 && TREE_CODE (*node) != TYPE_DECL)
38595 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38596 name);
38597 *no_add_attrs = true;
38598 return NULL_TREE;
38601 /* Can combine regparm with all attributes but fastcall. */
38602 if (is_attribute_p ("ms_abi", name))
38604 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38606 error ("ms_abi and sysv_abi attributes are not compatible");
38609 return NULL_TREE;
38611 else if (is_attribute_p ("sysv_abi", name))
38613 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38615 error ("ms_abi and sysv_abi attributes are not compatible");
38618 return NULL_TREE;
38621 return NULL_TREE;
38624 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38625 struct attribute_spec.handler. */
38626 static tree
38627 ix86_handle_struct_attribute (tree *node, tree name,
38628 tree args ATTRIBUTE_UNUSED,
38629 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38631 tree *type = NULL;
38632 if (DECL_P (*node))
38634 if (TREE_CODE (*node) == TYPE_DECL)
38635 type = &TREE_TYPE (*node);
38637 else
38638 type = node;
38640 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38642 warning (OPT_Wattributes, "%qE attribute ignored",
38643 name);
38644 *no_add_attrs = true;
38647 else if ((is_attribute_p ("ms_struct", name)
38648 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38649 || ((is_attribute_p ("gcc_struct", name)
38650 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38652 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38653 name);
38654 *no_add_attrs = true;
38657 return NULL_TREE;
38660 static tree
38661 ix86_handle_fndecl_attribute (tree *node, tree name,
38662 tree args ATTRIBUTE_UNUSED,
38663 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38665 if (TREE_CODE (*node) != FUNCTION_DECL)
38667 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38668 name);
38669 *no_add_attrs = true;
38671 return NULL_TREE;
38674 static bool
38675 ix86_ms_bitfield_layout_p (const_tree record_type)
38677 return ((TARGET_MS_BITFIELD_LAYOUT
38678 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38679 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38682 /* Returns an expression indicating where the this parameter is
38683 located on entry to the FUNCTION. */
38685 static rtx
38686 x86_this_parameter (tree function)
38688 tree type = TREE_TYPE (function);
38689 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38690 int nregs;
38692 if (TARGET_64BIT)
38694 const int *parm_regs;
38696 if (ix86_function_type_abi (type) == MS_ABI)
38697 parm_regs = x86_64_ms_abi_int_parameter_registers;
38698 else
38699 parm_regs = x86_64_int_parameter_registers;
38700 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38703 nregs = ix86_function_regparm (type, function);
38705 if (nregs > 0 && !stdarg_p (type))
38707 int regno;
38708 unsigned int ccvt = ix86_get_callcvt (type);
38710 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38711 regno = aggr ? DX_REG : CX_REG;
38712 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38714 regno = CX_REG;
38715 if (aggr)
38716 return gen_rtx_MEM (SImode,
38717 plus_constant (Pmode, stack_pointer_rtx, 4));
38719 else
38721 regno = AX_REG;
38722 if (aggr)
38724 regno = DX_REG;
38725 if (nregs == 1)
38726 return gen_rtx_MEM (SImode,
38727 plus_constant (Pmode,
38728 stack_pointer_rtx, 4));
38731 return gen_rtx_REG (SImode, regno);
38734 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38735 aggr ? 8 : 4));
38738 /* Determine whether x86_output_mi_thunk can succeed. */
38740 static bool
38741 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38742 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38743 HOST_WIDE_INT vcall_offset, const_tree function)
38745 /* 64-bit can handle anything. */
38746 if (TARGET_64BIT)
38747 return true;
38749 /* For 32-bit, everything's fine if we have one free register. */
38750 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38751 return true;
38753 /* Need a free register for vcall_offset. */
38754 if (vcall_offset)
38755 return false;
38757 /* Need a free register for GOT references. */
38758 if (flag_pic && !targetm.binds_local_p (function))
38759 return false;
38761 /* Otherwise ok. */
38762 return true;
38765 /* Output the assembler code for a thunk function. THUNK_DECL is the
38766 declaration for the thunk function itself, FUNCTION is the decl for
38767 the target function. DELTA is an immediate constant offset to be
38768 added to THIS. If VCALL_OFFSET is nonzero, the word at
38769 *(*this + vcall_offset) should be added to THIS. */
38771 static void
38772 x86_output_mi_thunk (FILE *file,
38773 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38774 HOST_WIDE_INT vcall_offset, tree function)
38776 rtx this_param = x86_this_parameter (function);
38777 rtx this_reg, tmp, fnaddr;
38778 unsigned int tmp_regno;
38780 if (TARGET_64BIT)
38781 tmp_regno = R10_REG;
38782 else
38784 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38785 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38786 tmp_regno = AX_REG;
38787 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38788 tmp_regno = DX_REG;
38789 else
38790 tmp_regno = CX_REG;
38793 emit_note (NOTE_INSN_PROLOGUE_END);
38795 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38796 pull it in now and let DELTA benefit. */
38797 if (REG_P (this_param))
38798 this_reg = this_param;
38799 else if (vcall_offset)
38801 /* Put the this parameter into %eax. */
38802 this_reg = gen_rtx_REG (Pmode, AX_REG);
38803 emit_move_insn (this_reg, this_param);
38805 else
38806 this_reg = NULL_RTX;
38808 /* Adjust the this parameter by a fixed constant. */
38809 if (delta)
38811 rtx delta_rtx = GEN_INT (delta);
38812 rtx delta_dst = this_reg ? this_reg : this_param;
38814 if (TARGET_64BIT)
38816 if (!x86_64_general_operand (delta_rtx, Pmode))
38818 tmp = gen_rtx_REG (Pmode, tmp_regno);
38819 emit_move_insn (tmp, delta_rtx);
38820 delta_rtx = tmp;
38824 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38827 /* Adjust the this parameter by a value stored in the vtable. */
38828 if (vcall_offset)
38830 rtx vcall_addr, vcall_mem, this_mem;
38832 tmp = gen_rtx_REG (Pmode, tmp_regno);
38834 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38835 if (Pmode != ptr_mode)
38836 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38837 emit_move_insn (tmp, this_mem);
38839 /* Adjust the this parameter. */
38840 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38841 if (TARGET_64BIT
38842 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38844 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38845 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38846 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38849 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38850 if (Pmode != ptr_mode)
38851 emit_insn (gen_addsi_1_zext (this_reg,
38852 gen_rtx_REG (ptr_mode,
38853 REGNO (this_reg)),
38854 vcall_mem));
38855 else
38856 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38859 /* If necessary, drop THIS back to its stack slot. */
38860 if (this_reg && this_reg != this_param)
38861 emit_move_insn (this_param, this_reg);
38863 fnaddr = XEXP (DECL_RTL (function), 0);
38864 if (TARGET_64BIT)
38866 if (!flag_pic || targetm.binds_local_p (function)
38867 || TARGET_PECOFF)
38869 else
38871 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38872 tmp = gen_rtx_CONST (Pmode, tmp);
38873 fnaddr = gen_rtx_MEM (Pmode, tmp);
38876 else
38878 if (!flag_pic || targetm.binds_local_p (function))
38880 #if TARGET_MACHO
38881 else if (TARGET_MACHO)
38883 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38884 fnaddr = XEXP (fnaddr, 0);
38886 #endif /* TARGET_MACHO */
38887 else
38889 tmp = gen_rtx_REG (Pmode, CX_REG);
38890 output_set_got (tmp, NULL_RTX);
38892 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38893 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
38894 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
38898 /* Our sibling call patterns do not allow memories, because we have no
38899 predicate that can distinguish between frame and non-frame memory.
38900 For our purposes here, we can get away with (ab)using a jump pattern,
38901 because we're going to do no optimization. */
38902 if (MEM_P (fnaddr))
38903 emit_jump_insn (gen_indirect_jump (fnaddr));
38904 else
38906 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38907 fnaddr = legitimize_pic_address (fnaddr,
38908 gen_rtx_REG (Pmode, tmp_regno));
38910 if (!sibcall_insn_operand (fnaddr, word_mode))
38912 tmp = gen_rtx_REG (word_mode, tmp_regno);
38913 if (GET_MODE (fnaddr) != word_mode)
38914 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38915 emit_move_insn (tmp, fnaddr);
38916 fnaddr = tmp;
38919 tmp = gen_rtx_MEM (QImode, fnaddr);
38920 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38921 tmp = emit_call_insn (tmp);
38922 SIBLING_CALL_P (tmp) = 1;
38924 emit_barrier ();
38926 /* Emit just enough of rest_of_compilation to get the insns emitted.
38927 Note that use_thunk calls assemble_start_function et al. */
38928 tmp = get_insns ();
38929 shorten_branches (tmp);
38930 final_start_function (tmp, file, 1);
38931 final (tmp, file, 1);
38932 final_end_function ();
38935 static void
38936 x86_file_start (void)
38938 default_file_start ();
38939 if (TARGET_16BIT)
38940 fputs ("\t.code16gcc\n", asm_out_file);
38941 #if TARGET_MACHO
38942 darwin_file_start ();
38943 #endif
38944 if (X86_FILE_START_VERSION_DIRECTIVE)
38945 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38946 if (X86_FILE_START_FLTUSED)
38947 fputs ("\t.global\t__fltused\n", asm_out_file);
38948 if (ix86_asm_dialect == ASM_INTEL)
38949 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38953 x86_field_alignment (tree field, int computed)
38955 enum machine_mode mode;
38956 tree type = TREE_TYPE (field);
38958 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38959 return computed;
38960 mode = TYPE_MODE (strip_array_types (type));
38961 if (mode == DFmode || mode == DCmode
38962 || GET_MODE_CLASS (mode) == MODE_INT
38963 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38964 return MIN (32, computed);
38965 return computed;
38968 /* Output assembler code to FILE to increment profiler label # LABELNO
38969 for profiling a function entry. */
38970 void
38971 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38973 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38974 : MCOUNT_NAME);
38976 if (TARGET_64BIT)
38978 #ifndef NO_PROFILE_COUNTERS
38979 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38980 #endif
38982 if (!TARGET_PECOFF && flag_pic)
38983 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38984 else
38985 fprintf (file, "\tcall\t%s\n", mcount_name);
38987 else if (flag_pic)
38989 #ifndef NO_PROFILE_COUNTERS
38990 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38991 LPREFIX, labelno);
38992 #endif
38993 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38995 else
38997 #ifndef NO_PROFILE_COUNTERS
38998 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38999 LPREFIX, labelno);
39000 #endif
39001 fprintf (file, "\tcall\t%s\n", mcount_name);
39005 /* We don't have exact information about the insn sizes, but we may assume
39006 quite safely that we are informed about all 1 byte insns and memory
39007 address sizes. This is enough to eliminate unnecessary padding in
39008 99% of cases. */
39010 static int
39011 min_insn_size (rtx insn)
39013 int l = 0, len;
39015 if (!INSN_P (insn) || !active_insn_p (insn))
39016 return 0;
39018 /* Discard alignments we've emit and jump instructions. */
39019 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39020 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39021 return 0;
39023 /* Important case - calls are always 5 bytes.
39024 It is common to have many calls in the row. */
39025 if (CALL_P (insn)
39026 && symbolic_reference_mentioned_p (PATTERN (insn))
39027 && !SIBLING_CALL_P (insn))
39028 return 5;
39029 len = get_attr_length (insn);
39030 if (len <= 1)
39031 return 1;
39033 /* For normal instructions we rely on get_attr_length being exact,
39034 with a few exceptions. */
39035 if (!JUMP_P (insn))
39037 enum attr_type type = get_attr_type (insn);
39039 switch (type)
39041 case TYPE_MULTI:
39042 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39043 || asm_noperands (PATTERN (insn)) >= 0)
39044 return 0;
39045 break;
39046 case TYPE_OTHER:
39047 case TYPE_FCMP:
39048 break;
39049 default:
39050 /* Otherwise trust get_attr_length. */
39051 return len;
39054 l = get_attr_length_address (insn);
39055 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39056 l = 4;
39058 if (l)
39059 return 1+l;
39060 else
39061 return 2;
39064 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39066 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39067 window. */
39069 static void
39070 ix86_avoid_jump_mispredicts (void)
39072 rtx insn, start = get_insns ();
39073 int nbytes = 0, njumps = 0;
39074 int isjump = 0;
39076 /* Look for all minimal intervals of instructions containing 4 jumps.
39077 The intervals are bounded by START and INSN. NBYTES is the total
39078 size of instructions in the interval including INSN and not including
39079 START. When the NBYTES is smaller than 16 bytes, it is possible
39080 that the end of START and INSN ends up in the same 16byte page.
39082 The smallest offset in the page INSN can start is the case where START
39083 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39084 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39086 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39087 have to, control transfer to label(s) can be performed through other
39088 means, and also we estimate minimum length of all asm stmts as 0. */
39089 for (insn = start; insn; insn = NEXT_INSN (insn))
39091 int min_size;
39093 if (LABEL_P (insn))
39095 int align = label_to_alignment (insn);
39096 int max_skip = label_to_max_skip (insn);
39098 if (max_skip > 15)
39099 max_skip = 15;
39100 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39101 already in the current 16 byte page, because otherwise
39102 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39103 bytes to reach 16 byte boundary. */
39104 if (align <= 0
39105 || (align <= 3 && max_skip != (1 << align) - 1))
39106 max_skip = 0;
39107 if (dump_file)
39108 fprintf (dump_file, "Label %i with max_skip %i\n",
39109 INSN_UID (insn), max_skip);
39110 if (max_skip)
39112 while (nbytes + max_skip >= 16)
39114 start = NEXT_INSN (start);
39115 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39116 || CALL_P (start))
39117 njumps--, isjump = 1;
39118 else
39119 isjump = 0;
39120 nbytes -= min_insn_size (start);
39123 continue;
39126 min_size = min_insn_size (insn);
39127 nbytes += min_size;
39128 if (dump_file)
39129 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39130 INSN_UID (insn), min_size);
39131 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39132 || CALL_P (insn))
39133 njumps++;
39134 else
39135 continue;
39137 while (njumps > 3)
39139 start = NEXT_INSN (start);
39140 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39141 || CALL_P (start))
39142 njumps--, isjump = 1;
39143 else
39144 isjump = 0;
39145 nbytes -= min_insn_size (start);
39147 gcc_assert (njumps >= 0);
39148 if (dump_file)
39149 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39150 INSN_UID (start), INSN_UID (insn), nbytes);
39152 if (njumps == 3 && isjump && nbytes < 16)
39154 int padsize = 15 - nbytes + min_insn_size (insn);
39156 if (dump_file)
39157 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39158 INSN_UID (insn), padsize);
39159 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39163 #endif
39165 /* AMD Athlon works faster
39166 when RET is not destination of conditional jump or directly preceded
39167 by other jump instruction. We avoid the penalty by inserting NOP just
39168 before the RET instructions in such cases. */
39169 static void
39170 ix86_pad_returns (void)
39172 edge e;
39173 edge_iterator ei;
39175 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39177 basic_block bb = e->src;
39178 rtx ret = BB_END (bb);
39179 rtx prev;
39180 bool replace = false;
39182 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39183 || optimize_bb_for_size_p (bb))
39184 continue;
39185 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39186 if (active_insn_p (prev) || LABEL_P (prev))
39187 break;
39188 if (prev && LABEL_P (prev))
39190 edge e;
39191 edge_iterator ei;
39193 FOR_EACH_EDGE (e, ei, bb->preds)
39194 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39195 && !(e->flags & EDGE_FALLTHRU))
39197 replace = true;
39198 break;
39201 if (!replace)
39203 prev = prev_active_insn (ret);
39204 if (prev
39205 && ((JUMP_P (prev) && any_condjump_p (prev))
39206 || CALL_P (prev)))
39207 replace = true;
39208 /* Empty functions get branch mispredict even when
39209 the jump destination is not visible to us. */
39210 if (!prev && !optimize_function_for_size_p (cfun))
39211 replace = true;
39213 if (replace)
39215 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39216 delete_insn (ret);
39221 /* Count the minimum number of instructions in BB. Return 4 if the
39222 number of instructions >= 4. */
39224 static int
39225 ix86_count_insn_bb (basic_block bb)
39227 rtx insn;
39228 int insn_count = 0;
39230 /* Count number of instructions in this block. Return 4 if the number
39231 of instructions >= 4. */
39232 FOR_BB_INSNS (bb, insn)
39234 /* Only happen in exit blocks. */
39235 if (JUMP_P (insn)
39236 && ANY_RETURN_P (PATTERN (insn)))
39237 break;
39239 if (NONDEBUG_INSN_P (insn)
39240 && GET_CODE (PATTERN (insn)) != USE
39241 && GET_CODE (PATTERN (insn)) != CLOBBER)
39243 insn_count++;
39244 if (insn_count >= 4)
39245 return insn_count;
39249 return insn_count;
39253 /* Count the minimum number of instructions in code path in BB.
39254 Return 4 if the number of instructions >= 4. */
39256 static int
39257 ix86_count_insn (basic_block bb)
39259 edge e;
39260 edge_iterator ei;
39261 int min_prev_count;
39263 /* Only bother counting instructions along paths with no
39264 more than 2 basic blocks between entry and exit. Given
39265 that BB has an edge to exit, determine if a predecessor
39266 of BB has an edge from entry. If so, compute the number
39267 of instructions in the predecessor block. If there
39268 happen to be multiple such blocks, compute the minimum. */
39269 min_prev_count = 4;
39270 FOR_EACH_EDGE (e, ei, bb->preds)
39272 edge prev_e;
39273 edge_iterator prev_ei;
39275 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39277 min_prev_count = 0;
39278 break;
39280 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39282 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39284 int count = ix86_count_insn_bb (e->src);
39285 if (count < min_prev_count)
39286 min_prev_count = count;
39287 break;
39292 if (min_prev_count < 4)
39293 min_prev_count += ix86_count_insn_bb (bb);
39295 return min_prev_count;
39298 /* Pad short function to 4 instructions. */
39300 static void
39301 ix86_pad_short_function (void)
39303 edge e;
39304 edge_iterator ei;
39306 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39308 rtx ret = BB_END (e->src);
39309 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39311 int insn_count = ix86_count_insn (e->src);
39313 /* Pad short function. */
39314 if (insn_count < 4)
39316 rtx insn = ret;
39318 /* Find epilogue. */
39319 while (insn
39320 && (!NOTE_P (insn)
39321 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39322 insn = PREV_INSN (insn);
39324 if (!insn)
39325 insn = ret;
39327 /* Two NOPs count as one instruction. */
39328 insn_count = 2 * (4 - insn_count);
39329 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39335 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39336 the epilogue, the Windows system unwinder will apply epilogue logic and
39337 produce incorrect offsets. This can be avoided by adding a nop between
39338 the last insn that can throw and the first insn of the epilogue. */
39340 static void
39341 ix86_seh_fixup_eh_fallthru (void)
39343 edge e;
39344 edge_iterator ei;
39346 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39348 rtx insn, next;
39350 /* Find the beginning of the epilogue. */
39351 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39352 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39353 break;
39354 if (insn == NULL)
39355 continue;
39357 /* We only care about preceding insns that can throw. */
39358 insn = prev_active_insn (insn);
39359 if (insn == NULL || !can_throw_internal (insn))
39360 continue;
39362 /* Do not separate calls from their debug information. */
39363 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39364 if (NOTE_P (next)
39365 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39366 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39367 insn = next;
39368 else
39369 break;
39371 emit_insn_after (gen_nops (const1_rtx), insn);
39375 /* Implement machine specific optimizations. We implement padding of returns
39376 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39377 static void
39378 ix86_reorg (void)
39380 /* We are freeing block_for_insn in the toplev to keep compatibility
39381 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39382 compute_bb_for_insn ();
39384 if (TARGET_SEH && current_function_has_exception_handlers ())
39385 ix86_seh_fixup_eh_fallthru ();
39387 if (optimize && optimize_function_for_speed_p (cfun))
39389 if (TARGET_PAD_SHORT_FUNCTION)
39390 ix86_pad_short_function ();
39391 else if (TARGET_PAD_RETURNS)
39392 ix86_pad_returns ();
39393 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39394 if (TARGET_FOUR_JUMP_LIMIT)
39395 ix86_avoid_jump_mispredicts ();
39396 #endif
39400 /* Return nonzero when QImode register that must be represented via REX prefix
39401 is used. */
39402 bool
39403 x86_extended_QIreg_mentioned_p (rtx insn)
39405 int i;
39406 extract_insn_cached (insn);
39407 for (i = 0; i < recog_data.n_operands; i++)
39408 if (GENERAL_REG_P (recog_data.operand[i])
39409 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39410 return true;
39411 return false;
39414 /* Return nonzero when P points to register encoded via REX prefix.
39415 Called via for_each_rtx. */
39416 static int
39417 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39419 unsigned int regno;
39420 if (!REG_P (*p))
39421 return 0;
39422 regno = REGNO (*p);
39423 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39426 /* Return true when INSN mentions register that must be encoded using REX
39427 prefix. */
39428 bool
39429 x86_extended_reg_mentioned_p (rtx insn)
39431 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39432 extended_reg_mentioned_1, NULL);
39435 /* If profitable, negate (without causing overflow) integer constant
39436 of mode MODE at location LOC. Return true in this case. */
39437 bool
39438 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39440 HOST_WIDE_INT val;
39442 if (!CONST_INT_P (*loc))
39443 return false;
39445 switch (mode)
39447 case DImode:
39448 /* DImode x86_64 constants must fit in 32 bits. */
39449 gcc_assert (x86_64_immediate_operand (*loc, mode));
39451 mode = SImode;
39452 break;
39454 case SImode:
39455 case HImode:
39456 case QImode:
39457 break;
39459 default:
39460 gcc_unreachable ();
39463 /* Avoid overflows. */
39464 if (mode_signbit_p (mode, *loc))
39465 return false;
39467 val = INTVAL (*loc);
39469 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39470 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39471 if ((val < 0 && val != -128)
39472 || val == 128)
39474 *loc = GEN_INT (-val);
39475 return true;
39478 return false;
39481 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39482 optabs would emit if we didn't have TFmode patterns. */
39484 void
39485 x86_emit_floatuns (rtx operands[2])
39487 rtx neglab, donelab, i0, i1, f0, in, out;
39488 enum machine_mode mode, inmode;
39490 inmode = GET_MODE (operands[1]);
39491 gcc_assert (inmode == SImode || inmode == DImode);
39493 out = operands[0];
39494 in = force_reg (inmode, operands[1]);
39495 mode = GET_MODE (out);
39496 neglab = gen_label_rtx ();
39497 donelab = gen_label_rtx ();
39498 f0 = gen_reg_rtx (mode);
39500 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39502 expand_float (out, in, 0);
39504 emit_jump_insn (gen_jump (donelab));
39505 emit_barrier ();
39507 emit_label (neglab);
39509 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39510 1, OPTAB_DIRECT);
39511 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39512 1, OPTAB_DIRECT);
39513 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39515 expand_float (f0, i0, 0);
39517 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39519 emit_label (donelab);
39522 /* AVX512F does support 64-byte integer vector operations,
39523 thus the longest vector we are faced with is V64QImode. */
39524 #define MAX_VECT_LEN 64
39526 struct expand_vec_perm_d
39528 rtx target, op0, op1;
39529 unsigned char perm[MAX_VECT_LEN];
39530 enum machine_mode vmode;
39531 unsigned char nelt;
39532 bool one_operand_p;
39533 bool testing_p;
39536 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39537 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39538 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39540 /* Get a vector mode of the same size as the original but with elements
39541 twice as wide. This is only guaranteed to apply to integral vectors. */
39543 static inline enum machine_mode
39544 get_mode_wider_vector (enum machine_mode o)
39546 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39547 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39548 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39549 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39550 return n;
39553 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39554 fill target with val via vec_duplicate. */
39556 static bool
39557 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39559 bool ok;
39560 rtx insn, dup;
39562 /* First attempt to recognize VAL as-is. */
39563 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39564 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39565 if (recog_memoized (insn) < 0)
39567 rtx seq;
39568 /* If that fails, force VAL into a register. */
39570 start_sequence ();
39571 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39572 seq = get_insns ();
39573 end_sequence ();
39574 if (seq)
39575 emit_insn_before (seq, insn);
39577 ok = recog_memoized (insn) >= 0;
39578 gcc_assert (ok);
39580 return true;
39583 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39584 with all elements equal to VAR. Return true if successful. */
39586 static bool
39587 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39588 rtx target, rtx val)
39590 bool ok;
39592 switch (mode)
39594 case V2SImode:
39595 case V2SFmode:
39596 if (!mmx_ok)
39597 return false;
39598 /* FALLTHRU */
39600 case V4DFmode:
39601 case V4DImode:
39602 case V8SFmode:
39603 case V8SImode:
39604 case V2DFmode:
39605 case V2DImode:
39606 case V4SFmode:
39607 case V4SImode:
39608 case V16SImode:
39609 case V8DImode:
39610 case V16SFmode:
39611 case V8DFmode:
39612 return ix86_vector_duplicate_value (mode, target, val);
39614 case V4HImode:
39615 if (!mmx_ok)
39616 return false;
39617 if (TARGET_SSE || TARGET_3DNOW_A)
39619 rtx x;
39621 val = gen_lowpart (SImode, val);
39622 x = gen_rtx_TRUNCATE (HImode, val);
39623 x = gen_rtx_VEC_DUPLICATE (mode, x);
39624 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39625 return true;
39627 goto widen;
39629 case V8QImode:
39630 if (!mmx_ok)
39631 return false;
39632 goto widen;
39634 case V8HImode:
39635 if (TARGET_SSE2)
39637 struct expand_vec_perm_d dperm;
39638 rtx tmp1, tmp2;
39640 permute:
39641 memset (&dperm, 0, sizeof (dperm));
39642 dperm.target = target;
39643 dperm.vmode = mode;
39644 dperm.nelt = GET_MODE_NUNITS (mode);
39645 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39646 dperm.one_operand_p = true;
39648 /* Extend to SImode using a paradoxical SUBREG. */
39649 tmp1 = gen_reg_rtx (SImode);
39650 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39652 /* Insert the SImode value as low element of a V4SImode vector. */
39653 tmp2 = gen_reg_rtx (V4SImode);
39654 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39655 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39657 ok = (expand_vec_perm_1 (&dperm)
39658 || expand_vec_perm_broadcast_1 (&dperm));
39659 gcc_assert (ok);
39660 return ok;
39662 goto widen;
39664 case V16QImode:
39665 if (TARGET_SSE2)
39666 goto permute;
39667 goto widen;
39669 widen:
39670 /* Replicate the value once into the next wider mode and recurse. */
39672 enum machine_mode smode, wsmode, wvmode;
39673 rtx x;
39675 smode = GET_MODE_INNER (mode);
39676 wvmode = get_mode_wider_vector (mode);
39677 wsmode = GET_MODE_INNER (wvmode);
39679 val = convert_modes (wsmode, smode, val, true);
39680 x = expand_simple_binop (wsmode, ASHIFT, val,
39681 GEN_INT (GET_MODE_BITSIZE (smode)),
39682 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39683 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39685 x = gen_reg_rtx (wvmode);
39686 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39687 gcc_assert (ok);
39688 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39689 return ok;
39692 case V16HImode:
39693 case V32QImode:
39695 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39696 rtx x = gen_reg_rtx (hvmode);
39698 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39699 gcc_assert (ok);
39701 x = gen_rtx_VEC_CONCAT (mode, x, x);
39702 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39704 return true;
39706 default:
39707 return false;
39711 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39712 whose ONE_VAR element is VAR, and other elements are zero. Return true
39713 if successful. */
39715 static bool
39716 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39717 rtx target, rtx var, int one_var)
39719 enum machine_mode vsimode;
39720 rtx new_target;
39721 rtx x, tmp;
39722 bool use_vector_set = false;
39724 switch (mode)
39726 case V2DImode:
39727 /* For SSE4.1, we normally use vector set. But if the second
39728 element is zero and inter-unit moves are OK, we use movq
39729 instead. */
39730 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39731 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39732 && one_var == 0));
39733 break;
39734 case V16QImode:
39735 case V4SImode:
39736 case V4SFmode:
39737 use_vector_set = TARGET_SSE4_1;
39738 break;
39739 case V8HImode:
39740 use_vector_set = TARGET_SSE2;
39741 break;
39742 case V4HImode:
39743 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39744 break;
39745 case V32QImode:
39746 case V16HImode:
39747 case V8SImode:
39748 case V8SFmode:
39749 case V4DFmode:
39750 use_vector_set = TARGET_AVX;
39751 break;
39752 case V4DImode:
39753 /* Use ix86_expand_vector_set in 64bit mode only. */
39754 use_vector_set = TARGET_AVX && TARGET_64BIT;
39755 break;
39756 default:
39757 break;
39760 if (use_vector_set)
39762 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39763 var = force_reg (GET_MODE_INNER (mode), var);
39764 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39765 return true;
39768 switch (mode)
39770 case V2SFmode:
39771 case V2SImode:
39772 if (!mmx_ok)
39773 return false;
39774 /* FALLTHRU */
39776 case V2DFmode:
39777 case V2DImode:
39778 if (one_var != 0)
39779 return false;
39780 var = force_reg (GET_MODE_INNER (mode), var);
39781 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39782 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39783 return true;
39785 case V4SFmode:
39786 case V4SImode:
39787 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39788 new_target = gen_reg_rtx (mode);
39789 else
39790 new_target = target;
39791 var = force_reg (GET_MODE_INNER (mode), var);
39792 x = gen_rtx_VEC_DUPLICATE (mode, var);
39793 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39794 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39795 if (one_var != 0)
39797 /* We need to shuffle the value to the correct position, so
39798 create a new pseudo to store the intermediate result. */
39800 /* With SSE2, we can use the integer shuffle insns. */
39801 if (mode != V4SFmode && TARGET_SSE2)
39803 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39804 const1_rtx,
39805 GEN_INT (one_var == 1 ? 0 : 1),
39806 GEN_INT (one_var == 2 ? 0 : 1),
39807 GEN_INT (one_var == 3 ? 0 : 1)));
39808 if (target != new_target)
39809 emit_move_insn (target, new_target);
39810 return true;
39813 /* Otherwise convert the intermediate result to V4SFmode and
39814 use the SSE1 shuffle instructions. */
39815 if (mode != V4SFmode)
39817 tmp = gen_reg_rtx (V4SFmode);
39818 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39820 else
39821 tmp = new_target;
39823 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39824 const1_rtx,
39825 GEN_INT (one_var == 1 ? 0 : 1),
39826 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39827 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39829 if (mode != V4SFmode)
39830 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39831 else if (tmp != target)
39832 emit_move_insn (target, tmp);
39834 else if (target != new_target)
39835 emit_move_insn (target, new_target);
39836 return true;
39838 case V8HImode:
39839 case V16QImode:
39840 vsimode = V4SImode;
39841 goto widen;
39842 case V4HImode:
39843 case V8QImode:
39844 if (!mmx_ok)
39845 return false;
39846 vsimode = V2SImode;
39847 goto widen;
39848 widen:
39849 if (one_var != 0)
39850 return false;
39852 /* Zero extend the variable element to SImode and recurse. */
39853 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39855 x = gen_reg_rtx (vsimode);
39856 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39857 var, one_var))
39858 gcc_unreachable ();
39860 emit_move_insn (target, gen_lowpart (mode, x));
39861 return true;
39863 default:
39864 return false;
39868 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39869 consisting of the values in VALS. It is known that all elements
39870 except ONE_VAR are constants. Return true if successful. */
39872 static bool
39873 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39874 rtx target, rtx vals, int one_var)
39876 rtx var = XVECEXP (vals, 0, one_var);
39877 enum machine_mode wmode;
39878 rtx const_vec, x;
39880 const_vec = copy_rtx (vals);
39881 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39882 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39884 switch (mode)
39886 case V2DFmode:
39887 case V2DImode:
39888 case V2SFmode:
39889 case V2SImode:
39890 /* For the two element vectors, it's just as easy to use
39891 the general case. */
39892 return false;
39894 case V4DImode:
39895 /* Use ix86_expand_vector_set in 64bit mode only. */
39896 if (!TARGET_64BIT)
39897 return false;
39898 case V4DFmode:
39899 case V8SFmode:
39900 case V8SImode:
39901 case V16HImode:
39902 case V32QImode:
39903 case V4SFmode:
39904 case V4SImode:
39905 case V8HImode:
39906 case V4HImode:
39907 break;
39909 case V16QImode:
39910 if (TARGET_SSE4_1)
39911 break;
39912 wmode = V8HImode;
39913 goto widen;
39914 case V8QImode:
39915 wmode = V4HImode;
39916 goto widen;
39917 widen:
39918 /* There's no way to set one QImode entry easily. Combine
39919 the variable value with its adjacent constant value, and
39920 promote to an HImode set. */
39921 x = XVECEXP (vals, 0, one_var ^ 1);
39922 if (one_var & 1)
39924 var = convert_modes (HImode, QImode, var, true);
39925 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39926 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39927 x = GEN_INT (INTVAL (x) & 0xff);
39929 else
39931 var = convert_modes (HImode, QImode, var, true);
39932 x = gen_int_mode (INTVAL (x) << 8, HImode);
39934 if (x != const0_rtx)
39935 var = expand_simple_binop (HImode, IOR, var, x, var,
39936 1, OPTAB_LIB_WIDEN);
39938 x = gen_reg_rtx (wmode);
39939 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39940 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39942 emit_move_insn (target, gen_lowpart (mode, x));
39943 return true;
39945 default:
39946 return false;
39949 emit_move_insn (target, const_vec);
39950 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39951 return true;
39954 /* A subroutine of ix86_expand_vector_init_general. Use vector
39955 concatenate to handle the most general case: all values variable,
39956 and none identical. */
39958 static void
39959 ix86_expand_vector_init_concat (enum machine_mode mode,
39960 rtx target, rtx *ops, int n)
39962 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39963 rtx first[16], second[8], third[4];
39964 rtvec v;
39965 int i, j;
39967 switch (n)
39969 case 2:
39970 switch (mode)
39972 case V16SImode:
39973 cmode = V8SImode;
39974 break;
39975 case V16SFmode:
39976 cmode = V8SFmode;
39977 break;
39978 case V8DImode:
39979 cmode = V4DImode;
39980 break;
39981 case V8DFmode:
39982 cmode = V4DFmode;
39983 break;
39984 case V8SImode:
39985 cmode = V4SImode;
39986 break;
39987 case V8SFmode:
39988 cmode = V4SFmode;
39989 break;
39990 case V4DImode:
39991 cmode = V2DImode;
39992 break;
39993 case V4DFmode:
39994 cmode = V2DFmode;
39995 break;
39996 case V4SImode:
39997 cmode = V2SImode;
39998 break;
39999 case V4SFmode:
40000 cmode = V2SFmode;
40001 break;
40002 case V2DImode:
40003 cmode = DImode;
40004 break;
40005 case V2SImode:
40006 cmode = SImode;
40007 break;
40008 case V2DFmode:
40009 cmode = DFmode;
40010 break;
40011 case V2SFmode:
40012 cmode = SFmode;
40013 break;
40014 default:
40015 gcc_unreachable ();
40018 if (!register_operand (ops[1], cmode))
40019 ops[1] = force_reg (cmode, ops[1]);
40020 if (!register_operand (ops[0], cmode))
40021 ops[0] = force_reg (cmode, ops[0]);
40022 emit_insn (gen_rtx_SET (VOIDmode, target,
40023 gen_rtx_VEC_CONCAT (mode, ops[0],
40024 ops[1])));
40025 break;
40027 case 4:
40028 switch (mode)
40030 case V4DImode:
40031 cmode = V2DImode;
40032 break;
40033 case V4DFmode:
40034 cmode = V2DFmode;
40035 break;
40036 case V4SImode:
40037 cmode = V2SImode;
40038 break;
40039 case V4SFmode:
40040 cmode = V2SFmode;
40041 break;
40042 default:
40043 gcc_unreachable ();
40045 goto half;
40047 case 8:
40048 switch (mode)
40050 case V8DImode:
40051 cmode = V2DImode;
40052 hmode = V4DImode;
40053 break;
40054 case V8DFmode:
40055 cmode = V2DFmode;
40056 hmode = V4DFmode;
40057 break;
40058 case V8SImode:
40059 cmode = V2SImode;
40060 hmode = V4SImode;
40061 break;
40062 case V8SFmode:
40063 cmode = V2SFmode;
40064 hmode = V4SFmode;
40065 break;
40066 default:
40067 gcc_unreachable ();
40069 goto half;
40071 case 16:
40072 switch (mode)
40074 case V16SImode:
40075 cmode = V2SImode;
40076 hmode = V4SImode;
40077 gmode = V8SImode;
40078 break;
40079 case V16SFmode:
40080 cmode = V2SFmode;
40081 hmode = V4SFmode;
40082 gmode = V8SFmode;
40083 break;
40084 default:
40085 gcc_unreachable ();
40087 goto half;
40089 half:
40090 /* FIXME: We process inputs backward to help RA. PR 36222. */
40091 i = n - 1;
40092 j = (n >> 1) - 1;
40093 for (; i > 0; i -= 2, j--)
40095 first[j] = gen_reg_rtx (cmode);
40096 v = gen_rtvec (2, ops[i - 1], ops[i]);
40097 ix86_expand_vector_init (false, first[j],
40098 gen_rtx_PARALLEL (cmode, v));
40101 n >>= 1;
40102 if (n > 4)
40104 gcc_assert (hmode != VOIDmode);
40105 gcc_assert (gmode != VOIDmode);
40106 for (i = j = 0; i < n; i += 2, j++)
40108 second[j] = gen_reg_rtx (hmode);
40109 ix86_expand_vector_init_concat (hmode, second [j],
40110 &first [i], 2);
40112 n >>= 1;
40113 for (i = j = 0; i < n; i += 2, j++)
40115 third[j] = gen_reg_rtx (gmode);
40116 ix86_expand_vector_init_concat (gmode, third[j],
40117 &second[i], 2);
40119 n >>= 1;
40120 ix86_expand_vector_init_concat (mode, target, third, n);
40122 else if (n > 2)
40124 gcc_assert (hmode != VOIDmode);
40125 for (i = j = 0; i < n; i += 2, j++)
40127 second[j] = gen_reg_rtx (hmode);
40128 ix86_expand_vector_init_concat (hmode, second [j],
40129 &first [i], 2);
40131 n >>= 1;
40132 ix86_expand_vector_init_concat (mode, target, second, n);
40134 else
40135 ix86_expand_vector_init_concat (mode, target, first, n);
40136 break;
40138 default:
40139 gcc_unreachable ();
40143 /* A subroutine of ix86_expand_vector_init_general. Use vector
40144 interleave to handle the most general case: all values variable,
40145 and none identical. */
40147 static void
40148 ix86_expand_vector_init_interleave (enum machine_mode mode,
40149 rtx target, rtx *ops, int n)
40151 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40152 int i, j;
40153 rtx op0, op1;
40154 rtx (*gen_load_even) (rtx, rtx, rtx);
40155 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40156 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40158 switch (mode)
40160 case V8HImode:
40161 gen_load_even = gen_vec_setv8hi;
40162 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40163 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40164 inner_mode = HImode;
40165 first_imode = V4SImode;
40166 second_imode = V2DImode;
40167 third_imode = VOIDmode;
40168 break;
40169 case V16QImode:
40170 gen_load_even = gen_vec_setv16qi;
40171 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40172 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40173 inner_mode = QImode;
40174 first_imode = V8HImode;
40175 second_imode = V4SImode;
40176 third_imode = V2DImode;
40177 break;
40178 default:
40179 gcc_unreachable ();
40182 for (i = 0; i < n; i++)
40184 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40185 op0 = gen_reg_rtx (SImode);
40186 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40188 /* Insert the SImode value as low element of V4SImode vector. */
40189 op1 = gen_reg_rtx (V4SImode);
40190 op0 = gen_rtx_VEC_MERGE (V4SImode,
40191 gen_rtx_VEC_DUPLICATE (V4SImode,
40192 op0),
40193 CONST0_RTX (V4SImode),
40194 const1_rtx);
40195 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40197 /* Cast the V4SImode vector back to a vector in orignal mode. */
40198 op0 = gen_reg_rtx (mode);
40199 emit_move_insn (op0, gen_lowpart (mode, op1));
40201 /* Load even elements into the second position. */
40202 emit_insn (gen_load_even (op0,
40203 force_reg (inner_mode,
40204 ops [i + i + 1]),
40205 const1_rtx));
40207 /* Cast vector to FIRST_IMODE vector. */
40208 ops[i] = gen_reg_rtx (first_imode);
40209 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40212 /* Interleave low FIRST_IMODE vectors. */
40213 for (i = j = 0; i < n; i += 2, j++)
40215 op0 = gen_reg_rtx (first_imode);
40216 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40218 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40219 ops[j] = gen_reg_rtx (second_imode);
40220 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40223 /* Interleave low SECOND_IMODE vectors. */
40224 switch (second_imode)
40226 case V4SImode:
40227 for (i = j = 0; i < n / 2; i += 2, j++)
40229 op0 = gen_reg_rtx (second_imode);
40230 emit_insn (gen_interleave_second_low (op0, ops[i],
40231 ops[i + 1]));
40233 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40234 vector. */
40235 ops[j] = gen_reg_rtx (third_imode);
40236 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40238 second_imode = V2DImode;
40239 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40240 /* FALLTHRU */
40242 case V2DImode:
40243 op0 = gen_reg_rtx (second_imode);
40244 emit_insn (gen_interleave_second_low (op0, ops[0],
40245 ops[1]));
40247 /* Cast the SECOND_IMODE vector back to a vector on original
40248 mode. */
40249 emit_insn (gen_rtx_SET (VOIDmode, target,
40250 gen_lowpart (mode, op0)));
40251 break;
40253 default:
40254 gcc_unreachable ();
40258 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40259 all values variable, and none identical. */
40261 static void
40262 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40263 rtx target, rtx vals)
40265 rtx ops[64], op0, op1;
40266 enum machine_mode half_mode = VOIDmode;
40267 int n, i;
40269 switch (mode)
40271 case V2SFmode:
40272 case V2SImode:
40273 if (!mmx_ok && !TARGET_SSE)
40274 break;
40275 /* FALLTHRU */
40277 case V16SImode:
40278 case V16SFmode:
40279 case V8DFmode:
40280 case V8DImode:
40281 case V8SFmode:
40282 case V8SImode:
40283 case V4DFmode:
40284 case V4DImode:
40285 case V4SFmode:
40286 case V4SImode:
40287 case V2DFmode:
40288 case V2DImode:
40289 n = GET_MODE_NUNITS (mode);
40290 for (i = 0; i < n; i++)
40291 ops[i] = XVECEXP (vals, 0, i);
40292 ix86_expand_vector_init_concat (mode, target, ops, n);
40293 return;
40295 case V32QImode:
40296 half_mode = V16QImode;
40297 goto half;
40299 case V16HImode:
40300 half_mode = V8HImode;
40301 goto half;
40303 half:
40304 n = GET_MODE_NUNITS (mode);
40305 for (i = 0; i < n; i++)
40306 ops[i] = XVECEXP (vals, 0, i);
40307 op0 = gen_reg_rtx (half_mode);
40308 op1 = gen_reg_rtx (half_mode);
40309 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40310 n >> 2);
40311 ix86_expand_vector_init_interleave (half_mode, op1,
40312 &ops [n >> 1], n >> 2);
40313 emit_insn (gen_rtx_SET (VOIDmode, target,
40314 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40315 return;
40317 case V16QImode:
40318 if (!TARGET_SSE4_1)
40319 break;
40320 /* FALLTHRU */
40322 case V8HImode:
40323 if (!TARGET_SSE2)
40324 break;
40326 /* Don't use ix86_expand_vector_init_interleave if we can't
40327 move from GPR to SSE register directly. */
40328 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40329 break;
40331 n = GET_MODE_NUNITS (mode);
40332 for (i = 0; i < n; i++)
40333 ops[i] = XVECEXP (vals, 0, i);
40334 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40335 return;
40337 case V4HImode:
40338 case V8QImode:
40339 break;
40341 default:
40342 gcc_unreachable ();
40346 int i, j, n_elts, n_words, n_elt_per_word;
40347 enum machine_mode inner_mode;
40348 rtx words[4], shift;
40350 inner_mode = GET_MODE_INNER (mode);
40351 n_elts = GET_MODE_NUNITS (mode);
40352 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40353 n_elt_per_word = n_elts / n_words;
40354 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40356 for (i = 0; i < n_words; ++i)
40358 rtx word = NULL_RTX;
40360 for (j = 0; j < n_elt_per_word; ++j)
40362 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40363 elt = convert_modes (word_mode, inner_mode, elt, true);
40365 if (j == 0)
40366 word = elt;
40367 else
40369 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40370 word, 1, OPTAB_LIB_WIDEN);
40371 word = expand_simple_binop (word_mode, IOR, word, elt,
40372 word, 1, OPTAB_LIB_WIDEN);
40376 words[i] = word;
40379 if (n_words == 1)
40380 emit_move_insn (target, gen_lowpart (mode, words[0]));
40381 else if (n_words == 2)
40383 rtx tmp = gen_reg_rtx (mode);
40384 emit_clobber (tmp);
40385 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40386 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40387 emit_move_insn (target, tmp);
40389 else if (n_words == 4)
40391 rtx tmp = gen_reg_rtx (V4SImode);
40392 gcc_assert (word_mode == SImode);
40393 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40394 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40395 emit_move_insn (target, gen_lowpart (mode, tmp));
40397 else
40398 gcc_unreachable ();
40402 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40403 instructions unless MMX_OK is true. */
40405 void
40406 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40408 enum machine_mode mode = GET_MODE (target);
40409 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40410 int n_elts = GET_MODE_NUNITS (mode);
40411 int n_var = 0, one_var = -1;
40412 bool all_same = true, all_const_zero = true;
40413 int i;
40414 rtx x;
40416 for (i = 0; i < n_elts; ++i)
40418 x = XVECEXP (vals, 0, i);
40419 if (!(CONST_INT_P (x)
40420 || GET_CODE (x) == CONST_DOUBLE
40421 || GET_CODE (x) == CONST_FIXED))
40422 n_var++, one_var = i;
40423 else if (x != CONST0_RTX (inner_mode))
40424 all_const_zero = false;
40425 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40426 all_same = false;
40429 /* Constants are best loaded from the constant pool. */
40430 if (n_var == 0)
40432 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40433 return;
40436 /* If all values are identical, broadcast the value. */
40437 if (all_same
40438 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40439 XVECEXP (vals, 0, 0)))
40440 return;
40442 /* Values where only one field is non-constant are best loaded from
40443 the pool and overwritten via move later. */
40444 if (n_var == 1)
40446 if (all_const_zero
40447 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40448 XVECEXP (vals, 0, one_var),
40449 one_var))
40450 return;
40452 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40453 return;
40456 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40459 void
40460 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40462 enum machine_mode mode = GET_MODE (target);
40463 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40464 enum machine_mode half_mode;
40465 bool use_vec_merge = false;
40466 rtx tmp;
40467 static rtx (*gen_extract[6][2]) (rtx, rtx)
40469 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40470 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40471 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40472 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40473 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40474 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40476 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40478 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40479 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40480 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40481 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40482 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40483 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40485 int i, j, n;
40487 switch (mode)
40489 case V2SFmode:
40490 case V2SImode:
40491 if (mmx_ok)
40493 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40494 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40495 if (elt == 0)
40496 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40497 else
40498 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40499 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40500 return;
40502 break;
40504 case V2DImode:
40505 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40506 if (use_vec_merge)
40507 break;
40509 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40510 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40511 if (elt == 0)
40512 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40513 else
40514 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40515 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40516 return;
40518 case V2DFmode:
40520 rtx op0, op1;
40522 /* For the two element vectors, we implement a VEC_CONCAT with
40523 the extraction of the other element. */
40525 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40526 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40528 if (elt == 0)
40529 op0 = val, op1 = tmp;
40530 else
40531 op0 = tmp, op1 = val;
40533 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40534 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40536 return;
40538 case V4SFmode:
40539 use_vec_merge = TARGET_SSE4_1;
40540 if (use_vec_merge)
40541 break;
40543 switch (elt)
40545 case 0:
40546 use_vec_merge = true;
40547 break;
40549 case 1:
40550 /* tmp = target = A B C D */
40551 tmp = copy_to_reg (target);
40552 /* target = A A B B */
40553 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40554 /* target = X A B B */
40555 ix86_expand_vector_set (false, target, val, 0);
40556 /* target = A X C D */
40557 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40558 const1_rtx, const0_rtx,
40559 GEN_INT (2+4), GEN_INT (3+4)));
40560 return;
40562 case 2:
40563 /* tmp = target = A B C D */
40564 tmp = copy_to_reg (target);
40565 /* tmp = X B C D */
40566 ix86_expand_vector_set (false, tmp, val, 0);
40567 /* target = A B X D */
40568 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40569 const0_rtx, const1_rtx,
40570 GEN_INT (0+4), GEN_INT (3+4)));
40571 return;
40573 case 3:
40574 /* tmp = target = A B C D */
40575 tmp = copy_to_reg (target);
40576 /* tmp = X B C D */
40577 ix86_expand_vector_set (false, tmp, val, 0);
40578 /* target = A B X D */
40579 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40580 const0_rtx, const1_rtx,
40581 GEN_INT (2+4), GEN_INT (0+4)));
40582 return;
40584 default:
40585 gcc_unreachable ();
40587 break;
40589 case V4SImode:
40590 use_vec_merge = TARGET_SSE4_1;
40591 if (use_vec_merge)
40592 break;
40594 /* Element 0 handled by vec_merge below. */
40595 if (elt == 0)
40597 use_vec_merge = true;
40598 break;
40601 if (TARGET_SSE2)
40603 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40604 store into element 0, then shuffle them back. */
40606 rtx order[4];
40608 order[0] = GEN_INT (elt);
40609 order[1] = const1_rtx;
40610 order[2] = const2_rtx;
40611 order[3] = GEN_INT (3);
40612 order[elt] = const0_rtx;
40614 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40615 order[1], order[2], order[3]));
40617 ix86_expand_vector_set (false, target, val, 0);
40619 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40620 order[1], order[2], order[3]));
40622 else
40624 /* For SSE1, we have to reuse the V4SF code. */
40625 rtx t = gen_reg_rtx (V4SFmode);
40626 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40627 emit_move_insn (target, gen_lowpart (mode, t));
40629 return;
40631 case V8HImode:
40632 use_vec_merge = TARGET_SSE2;
40633 break;
40634 case V4HImode:
40635 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40636 break;
40638 case V16QImode:
40639 use_vec_merge = TARGET_SSE4_1;
40640 break;
40642 case V8QImode:
40643 break;
40645 case V32QImode:
40646 half_mode = V16QImode;
40647 j = 0;
40648 n = 16;
40649 goto half;
40651 case V16HImode:
40652 half_mode = V8HImode;
40653 j = 1;
40654 n = 8;
40655 goto half;
40657 case V8SImode:
40658 half_mode = V4SImode;
40659 j = 2;
40660 n = 4;
40661 goto half;
40663 case V4DImode:
40664 half_mode = V2DImode;
40665 j = 3;
40666 n = 2;
40667 goto half;
40669 case V8SFmode:
40670 half_mode = V4SFmode;
40671 j = 4;
40672 n = 4;
40673 goto half;
40675 case V4DFmode:
40676 half_mode = V2DFmode;
40677 j = 5;
40678 n = 2;
40679 goto half;
40681 half:
40682 /* Compute offset. */
40683 i = elt / n;
40684 elt %= n;
40686 gcc_assert (i <= 1);
40688 /* Extract the half. */
40689 tmp = gen_reg_rtx (half_mode);
40690 emit_insn (gen_extract[j][i] (tmp, target));
40692 /* Put val in tmp at elt. */
40693 ix86_expand_vector_set (false, tmp, val, elt);
40695 /* Put it back. */
40696 emit_insn (gen_insert[j][i] (target, target, tmp));
40697 return;
40699 default:
40700 break;
40703 if (use_vec_merge)
40705 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40706 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40707 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40709 else
40711 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40713 emit_move_insn (mem, target);
40715 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40716 emit_move_insn (tmp, val);
40718 emit_move_insn (target, mem);
40722 void
40723 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40725 enum machine_mode mode = GET_MODE (vec);
40726 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40727 bool use_vec_extr = false;
40728 rtx tmp;
40730 switch (mode)
40732 case V2SImode:
40733 case V2SFmode:
40734 if (!mmx_ok)
40735 break;
40736 /* FALLTHRU */
40738 case V2DFmode:
40739 case V2DImode:
40740 use_vec_extr = true;
40741 break;
40743 case V4SFmode:
40744 use_vec_extr = TARGET_SSE4_1;
40745 if (use_vec_extr)
40746 break;
40748 switch (elt)
40750 case 0:
40751 tmp = vec;
40752 break;
40754 case 1:
40755 case 3:
40756 tmp = gen_reg_rtx (mode);
40757 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40758 GEN_INT (elt), GEN_INT (elt),
40759 GEN_INT (elt+4), GEN_INT (elt+4)));
40760 break;
40762 case 2:
40763 tmp = gen_reg_rtx (mode);
40764 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40765 break;
40767 default:
40768 gcc_unreachable ();
40770 vec = tmp;
40771 use_vec_extr = true;
40772 elt = 0;
40773 break;
40775 case V4SImode:
40776 use_vec_extr = TARGET_SSE4_1;
40777 if (use_vec_extr)
40778 break;
40780 if (TARGET_SSE2)
40782 switch (elt)
40784 case 0:
40785 tmp = vec;
40786 break;
40788 case 1:
40789 case 3:
40790 tmp = gen_reg_rtx (mode);
40791 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40792 GEN_INT (elt), GEN_INT (elt),
40793 GEN_INT (elt), GEN_INT (elt)));
40794 break;
40796 case 2:
40797 tmp = gen_reg_rtx (mode);
40798 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40799 break;
40801 default:
40802 gcc_unreachable ();
40804 vec = tmp;
40805 use_vec_extr = true;
40806 elt = 0;
40808 else
40810 /* For SSE1, we have to reuse the V4SF code. */
40811 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40812 gen_lowpart (V4SFmode, vec), elt);
40813 return;
40815 break;
40817 case V8HImode:
40818 use_vec_extr = TARGET_SSE2;
40819 break;
40820 case V4HImode:
40821 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40822 break;
40824 case V16QImode:
40825 use_vec_extr = TARGET_SSE4_1;
40826 break;
40828 case V8SFmode:
40829 if (TARGET_AVX)
40831 tmp = gen_reg_rtx (V4SFmode);
40832 if (elt < 4)
40833 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40834 else
40835 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40836 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40837 return;
40839 break;
40841 case V4DFmode:
40842 if (TARGET_AVX)
40844 tmp = gen_reg_rtx (V2DFmode);
40845 if (elt < 2)
40846 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40847 else
40848 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40849 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40850 return;
40852 break;
40854 case V32QImode:
40855 if (TARGET_AVX)
40857 tmp = gen_reg_rtx (V16QImode);
40858 if (elt < 16)
40859 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40860 else
40861 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40862 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40863 return;
40865 break;
40867 case V16HImode:
40868 if (TARGET_AVX)
40870 tmp = gen_reg_rtx (V8HImode);
40871 if (elt < 8)
40872 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40873 else
40874 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40875 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40876 return;
40878 break;
40880 case V8SImode:
40881 if (TARGET_AVX)
40883 tmp = gen_reg_rtx (V4SImode);
40884 if (elt < 4)
40885 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40886 else
40887 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40888 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40889 return;
40891 break;
40893 case V4DImode:
40894 if (TARGET_AVX)
40896 tmp = gen_reg_rtx (V2DImode);
40897 if (elt < 2)
40898 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40899 else
40900 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40901 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40902 return;
40904 break;
40906 case V16SFmode:
40907 tmp = gen_reg_rtx (V8SFmode);
40908 if (elt < 8)
40909 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40910 else
40911 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40912 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40913 return;
40915 case V8DFmode:
40916 tmp = gen_reg_rtx (V4DFmode);
40917 if (elt < 4)
40918 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40919 else
40920 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40921 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40922 return;
40924 case V16SImode:
40925 tmp = gen_reg_rtx (V8SImode);
40926 if (elt < 8)
40927 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40928 else
40929 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40930 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40931 return;
40933 case V8DImode:
40934 tmp = gen_reg_rtx (V4DImode);
40935 if (elt < 4)
40936 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40937 else
40938 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40939 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40940 return;
40942 case V8QImode:
40943 /* ??? Could extract the appropriate HImode element and shift. */
40944 default:
40945 break;
40948 if (use_vec_extr)
40950 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40951 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40953 /* Let the rtl optimizers know about the zero extension performed. */
40954 if (inner_mode == QImode || inner_mode == HImode)
40956 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40957 target = gen_lowpart (SImode, target);
40960 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40962 else
40964 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40966 emit_move_insn (mem, vec);
40968 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40969 emit_move_insn (target, tmp);
40973 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40974 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40975 The upper bits of DEST are undefined, though they shouldn't cause
40976 exceptions (some bits from src or all zeros are ok). */
40978 static void
40979 emit_reduc_half (rtx dest, rtx src, int i)
40981 rtx tem, d = dest;
40982 switch (GET_MODE (src))
40984 case V4SFmode:
40985 if (i == 128)
40986 tem = gen_sse_movhlps (dest, src, src);
40987 else
40988 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40989 GEN_INT (1 + 4), GEN_INT (1 + 4));
40990 break;
40991 case V2DFmode:
40992 tem = gen_vec_interleave_highv2df (dest, src, src);
40993 break;
40994 case V16QImode:
40995 case V8HImode:
40996 case V4SImode:
40997 case V2DImode:
40998 d = gen_reg_rtx (V1TImode);
40999 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41000 GEN_INT (i / 2));
41001 break;
41002 case V8SFmode:
41003 if (i == 256)
41004 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41005 else
41006 tem = gen_avx_shufps256 (dest, src, src,
41007 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41008 break;
41009 case V4DFmode:
41010 if (i == 256)
41011 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41012 else
41013 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41014 break;
41015 case V32QImode:
41016 case V16HImode:
41017 case V8SImode:
41018 case V4DImode:
41019 if (i == 256)
41021 if (GET_MODE (dest) != V4DImode)
41022 d = gen_reg_rtx (V4DImode);
41023 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41024 gen_lowpart (V4DImode, src),
41025 const1_rtx);
41027 else
41029 d = gen_reg_rtx (V2TImode);
41030 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41031 GEN_INT (i / 2));
41033 break;
41034 case V16SImode:
41035 case V16SFmode:
41036 case V8DImode:
41037 case V8DFmode:
41038 if (i > 128)
41039 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41040 gen_lowpart (V16SImode, src),
41041 gen_lowpart (V16SImode, src),
41042 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41043 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41044 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41045 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41046 GEN_INT (0xC), GEN_INT (0xD),
41047 GEN_INT (0xE), GEN_INT (0xF),
41048 GEN_INT (0x10), GEN_INT (0x11),
41049 GEN_INT (0x12), GEN_INT (0x13),
41050 GEN_INT (0x14), GEN_INT (0x15),
41051 GEN_INT (0x16), GEN_INT (0x17));
41052 else
41053 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41054 gen_lowpart (V16SImode, src),
41055 GEN_INT (i == 128 ? 0x2 : 0x1),
41056 GEN_INT (0x3),
41057 GEN_INT (0x3),
41058 GEN_INT (0x3),
41059 GEN_INT (i == 128 ? 0x6 : 0x5),
41060 GEN_INT (0x7),
41061 GEN_INT (0x7),
41062 GEN_INT (0x7),
41063 GEN_INT (i == 128 ? 0xA : 0x9),
41064 GEN_INT (0xB),
41065 GEN_INT (0xB),
41066 GEN_INT (0xB),
41067 GEN_INT (i == 128 ? 0xE : 0xD),
41068 GEN_INT (0xF),
41069 GEN_INT (0xF),
41070 GEN_INT (0xF));
41071 break;
41072 default:
41073 gcc_unreachable ();
41075 emit_insn (tem);
41076 if (d != dest)
41077 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41080 /* Expand a vector reduction. FN is the binary pattern to reduce;
41081 DEST is the destination; IN is the input vector. */
41083 void
41084 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41086 rtx half, dst, vec = in;
41087 enum machine_mode mode = GET_MODE (in);
41088 int i;
41090 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41091 if (TARGET_SSE4_1
41092 && mode == V8HImode
41093 && fn == gen_uminv8hi3)
41095 emit_insn (gen_sse4_1_phminposuw (dest, in));
41096 return;
41099 for (i = GET_MODE_BITSIZE (mode);
41100 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41101 i >>= 1)
41103 half = gen_reg_rtx (mode);
41104 emit_reduc_half (half, vec, i);
41105 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41106 dst = dest;
41107 else
41108 dst = gen_reg_rtx (mode);
41109 emit_insn (fn (dst, half, vec));
41110 vec = dst;
41114 /* Target hook for scalar_mode_supported_p. */
41115 static bool
41116 ix86_scalar_mode_supported_p (enum machine_mode mode)
41118 if (DECIMAL_FLOAT_MODE_P (mode))
41119 return default_decimal_float_supported_p ();
41120 else if (mode == TFmode)
41121 return true;
41122 else
41123 return default_scalar_mode_supported_p (mode);
41126 /* Implements target hook vector_mode_supported_p. */
41127 static bool
41128 ix86_vector_mode_supported_p (enum machine_mode mode)
41130 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41131 return true;
41132 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41133 return true;
41134 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41135 return true;
41136 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41137 return true;
41138 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41139 return true;
41140 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41141 return true;
41142 return false;
41145 /* Target hook for c_mode_for_suffix. */
41146 static enum machine_mode
41147 ix86_c_mode_for_suffix (char suffix)
41149 if (suffix == 'q')
41150 return TFmode;
41151 if (suffix == 'w')
41152 return XFmode;
41154 return VOIDmode;
41157 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41159 We do this in the new i386 backend to maintain source compatibility
41160 with the old cc0-based compiler. */
41162 static tree
41163 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41164 tree inputs ATTRIBUTE_UNUSED,
41165 tree clobbers)
41167 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41168 clobbers);
41169 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41170 clobbers);
41171 return clobbers;
41174 /* Implements target vector targetm.asm.encode_section_info. */
41176 static void ATTRIBUTE_UNUSED
41177 ix86_encode_section_info (tree decl, rtx rtl, int first)
41179 default_encode_section_info (decl, rtl, first);
41181 if (TREE_CODE (decl) == VAR_DECL
41182 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41183 && ix86_in_large_data_p (decl))
41184 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41187 /* Worker function for REVERSE_CONDITION. */
41189 enum rtx_code
41190 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41192 return (mode != CCFPmode && mode != CCFPUmode
41193 ? reverse_condition (code)
41194 : reverse_condition_maybe_unordered (code));
41197 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41198 to OPERANDS[0]. */
41200 const char *
41201 output_387_reg_move (rtx insn, rtx *operands)
41203 if (REG_P (operands[0]))
41205 if (REG_P (operands[1])
41206 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41208 if (REGNO (operands[0]) == FIRST_STACK_REG)
41209 return output_387_ffreep (operands, 0);
41210 return "fstp\t%y0";
41212 if (STACK_TOP_P (operands[0]))
41213 return "fld%Z1\t%y1";
41214 return "fst\t%y0";
41216 else if (MEM_P (operands[0]))
41218 gcc_assert (REG_P (operands[1]));
41219 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41220 return "fstp%Z0\t%y0";
41221 else
41223 /* There is no non-popping store to memory for XFmode.
41224 So if we need one, follow the store with a load. */
41225 if (GET_MODE (operands[0]) == XFmode)
41226 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41227 else
41228 return "fst%Z0\t%y0";
41231 else
41232 gcc_unreachable();
41235 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41236 FP status register is set. */
41238 void
41239 ix86_emit_fp_unordered_jump (rtx label)
41241 rtx reg = gen_reg_rtx (HImode);
41242 rtx temp;
41244 emit_insn (gen_x86_fnstsw_1 (reg));
41246 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41248 emit_insn (gen_x86_sahf_1 (reg));
41250 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41251 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41253 else
41255 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41257 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41258 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41261 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41262 gen_rtx_LABEL_REF (VOIDmode, label),
41263 pc_rtx);
41264 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41266 emit_jump_insn (temp);
41267 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41270 /* Output code to perform a log1p XFmode calculation. */
41272 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41274 rtx label1 = gen_label_rtx ();
41275 rtx label2 = gen_label_rtx ();
41277 rtx tmp = gen_reg_rtx (XFmode);
41278 rtx tmp2 = gen_reg_rtx (XFmode);
41279 rtx test;
41281 emit_insn (gen_absxf2 (tmp, op1));
41282 test = gen_rtx_GE (VOIDmode, tmp,
41283 CONST_DOUBLE_FROM_REAL_VALUE (
41284 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41285 XFmode));
41286 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41288 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41289 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41290 emit_jump (label2);
41292 emit_label (label1);
41293 emit_move_insn (tmp, CONST1_RTX (XFmode));
41294 emit_insn (gen_addxf3 (tmp, op1, tmp));
41295 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41296 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41298 emit_label (label2);
41301 /* Emit code for round calculation. */
41302 void ix86_emit_i387_round (rtx op0, rtx op1)
41304 enum machine_mode inmode = GET_MODE (op1);
41305 enum machine_mode outmode = GET_MODE (op0);
41306 rtx e1, e2, res, tmp, tmp1, half;
41307 rtx scratch = gen_reg_rtx (HImode);
41308 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41309 rtx jump_label = gen_label_rtx ();
41310 rtx insn;
41311 rtx (*gen_abs) (rtx, rtx);
41312 rtx (*gen_neg) (rtx, rtx);
41314 switch (inmode)
41316 case SFmode:
41317 gen_abs = gen_abssf2;
41318 break;
41319 case DFmode:
41320 gen_abs = gen_absdf2;
41321 break;
41322 case XFmode:
41323 gen_abs = gen_absxf2;
41324 break;
41325 default:
41326 gcc_unreachable ();
41329 switch (outmode)
41331 case SFmode:
41332 gen_neg = gen_negsf2;
41333 break;
41334 case DFmode:
41335 gen_neg = gen_negdf2;
41336 break;
41337 case XFmode:
41338 gen_neg = gen_negxf2;
41339 break;
41340 case HImode:
41341 gen_neg = gen_neghi2;
41342 break;
41343 case SImode:
41344 gen_neg = gen_negsi2;
41345 break;
41346 case DImode:
41347 gen_neg = gen_negdi2;
41348 break;
41349 default:
41350 gcc_unreachable ();
41353 e1 = gen_reg_rtx (inmode);
41354 e2 = gen_reg_rtx (inmode);
41355 res = gen_reg_rtx (outmode);
41357 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41359 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41361 /* scratch = fxam(op1) */
41362 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41363 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41364 UNSPEC_FXAM)));
41365 /* e1 = fabs(op1) */
41366 emit_insn (gen_abs (e1, op1));
41368 /* e2 = e1 + 0.5 */
41369 half = force_reg (inmode, half);
41370 emit_insn (gen_rtx_SET (VOIDmode, e2,
41371 gen_rtx_PLUS (inmode, e1, half)));
41373 /* res = floor(e2) */
41374 if (inmode != XFmode)
41376 tmp1 = gen_reg_rtx (XFmode);
41378 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41379 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41381 else
41382 tmp1 = e2;
41384 switch (outmode)
41386 case SFmode:
41387 case DFmode:
41389 rtx tmp0 = gen_reg_rtx (XFmode);
41391 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41393 emit_insn (gen_rtx_SET (VOIDmode, res,
41394 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41395 UNSPEC_TRUNC_NOOP)));
41397 break;
41398 case XFmode:
41399 emit_insn (gen_frndintxf2_floor (res, tmp1));
41400 break;
41401 case HImode:
41402 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41403 break;
41404 case SImode:
41405 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41406 break;
41407 case DImode:
41408 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41409 break;
41410 default:
41411 gcc_unreachable ();
41414 /* flags = signbit(a) */
41415 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41417 /* if (flags) then res = -res */
41418 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41419 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41420 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41421 pc_rtx);
41422 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41423 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41424 JUMP_LABEL (insn) = jump_label;
41426 emit_insn (gen_neg (res, res));
41428 emit_label (jump_label);
41429 LABEL_NUSES (jump_label) = 1;
41431 emit_move_insn (op0, res);
41434 /* Output code to perform a Newton-Rhapson approximation of a single precision
41435 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41437 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41439 rtx x0, x1, e0, e1;
41441 x0 = gen_reg_rtx (mode);
41442 e0 = gen_reg_rtx (mode);
41443 e1 = gen_reg_rtx (mode);
41444 x1 = gen_reg_rtx (mode);
41446 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41448 b = force_reg (mode, b);
41450 /* x0 = rcp(b) estimate */
41451 if (mode == V16SFmode || mode == V8DFmode)
41452 emit_insn (gen_rtx_SET (VOIDmode, x0,
41453 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41454 UNSPEC_RCP14)));
41455 else
41456 emit_insn (gen_rtx_SET (VOIDmode, x0,
41457 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41458 UNSPEC_RCP)));
41460 /* e0 = x0 * b */
41461 emit_insn (gen_rtx_SET (VOIDmode, e0,
41462 gen_rtx_MULT (mode, x0, b)));
41464 /* e0 = x0 * e0 */
41465 emit_insn (gen_rtx_SET (VOIDmode, e0,
41466 gen_rtx_MULT (mode, x0, e0)));
41468 /* e1 = x0 + x0 */
41469 emit_insn (gen_rtx_SET (VOIDmode, e1,
41470 gen_rtx_PLUS (mode, x0, x0)));
41472 /* x1 = e1 - e0 */
41473 emit_insn (gen_rtx_SET (VOIDmode, x1,
41474 gen_rtx_MINUS (mode, e1, e0)));
41476 /* res = a * x1 */
41477 emit_insn (gen_rtx_SET (VOIDmode, res,
41478 gen_rtx_MULT (mode, a, x1)));
41481 /* Output code to perform a Newton-Rhapson approximation of a
41482 single precision floating point [reciprocal] square root. */
41484 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41485 bool recip)
41487 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41488 REAL_VALUE_TYPE r;
41489 int unspec;
41491 x0 = gen_reg_rtx (mode);
41492 e0 = gen_reg_rtx (mode);
41493 e1 = gen_reg_rtx (mode);
41494 e2 = gen_reg_rtx (mode);
41495 e3 = gen_reg_rtx (mode);
41497 real_from_integer (&r, VOIDmode, -3, -1, 0);
41498 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41500 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41501 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41502 unspec = UNSPEC_RSQRT;
41504 if (VECTOR_MODE_P (mode))
41506 mthree = ix86_build_const_vector (mode, true, mthree);
41507 mhalf = ix86_build_const_vector (mode, true, mhalf);
41508 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41509 if (GET_MODE_SIZE (mode) == 64)
41510 unspec = UNSPEC_RSQRT14;
41513 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41514 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41516 a = force_reg (mode, a);
41518 /* x0 = rsqrt(a) estimate */
41519 emit_insn (gen_rtx_SET (VOIDmode, x0,
41520 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41521 unspec)));
41523 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41524 if (!recip)
41526 rtx zero, mask;
41528 zero = gen_reg_rtx (mode);
41529 mask = gen_reg_rtx (mode);
41531 zero = force_reg (mode, CONST0_RTX(mode));
41533 /* Handle masked compare. */
41534 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41536 mask = gen_reg_rtx (HImode);
41537 /* Imm value 0x4 corresponds to not-equal comparison. */
41538 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41539 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41541 else
41543 emit_insn (gen_rtx_SET (VOIDmode, mask,
41544 gen_rtx_NE (mode, zero, a)));
41546 emit_insn (gen_rtx_SET (VOIDmode, x0,
41547 gen_rtx_AND (mode, x0, mask)));
41551 /* e0 = x0 * a */
41552 emit_insn (gen_rtx_SET (VOIDmode, e0,
41553 gen_rtx_MULT (mode, x0, a)));
41554 /* e1 = e0 * x0 */
41555 emit_insn (gen_rtx_SET (VOIDmode, e1,
41556 gen_rtx_MULT (mode, e0, x0)));
41558 /* e2 = e1 - 3. */
41559 mthree = force_reg (mode, mthree);
41560 emit_insn (gen_rtx_SET (VOIDmode, e2,
41561 gen_rtx_PLUS (mode, e1, mthree)));
41563 mhalf = force_reg (mode, mhalf);
41564 if (recip)
41565 /* e3 = -.5 * x0 */
41566 emit_insn (gen_rtx_SET (VOIDmode, e3,
41567 gen_rtx_MULT (mode, x0, mhalf)));
41568 else
41569 /* e3 = -.5 * e0 */
41570 emit_insn (gen_rtx_SET (VOIDmode, e3,
41571 gen_rtx_MULT (mode, e0, mhalf)));
41572 /* ret = e2 * e3 */
41573 emit_insn (gen_rtx_SET (VOIDmode, res,
41574 gen_rtx_MULT (mode, e2, e3)));
41577 #ifdef TARGET_SOLARIS
41578 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41580 static void
41581 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41582 tree decl)
41584 /* With Binutils 2.15, the "@unwind" marker must be specified on
41585 every occurrence of the ".eh_frame" section, not just the first
41586 one. */
41587 if (TARGET_64BIT
41588 && strcmp (name, ".eh_frame") == 0)
41590 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41591 flags & SECTION_WRITE ? "aw" : "a");
41592 return;
41595 #ifndef USE_GAS
41596 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41598 solaris_elf_asm_comdat_section (name, flags, decl);
41599 return;
41601 #endif
41603 default_elf_asm_named_section (name, flags, decl);
41605 #endif /* TARGET_SOLARIS */
41607 /* Return the mangling of TYPE if it is an extended fundamental type. */
41609 static const char *
41610 ix86_mangle_type (const_tree type)
41612 type = TYPE_MAIN_VARIANT (type);
41614 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41615 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41616 return NULL;
41618 switch (TYPE_MODE (type))
41620 case TFmode:
41621 /* __float128 is "g". */
41622 return "g";
41623 case XFmode:
41624 /* "long double" or __float80 is "e". */
41625 return "e";
41626 default:
41627 return NULL;
41631 /* For 32-bit code we can save PIC register setup by using
41632 __stack_chk_fail_local hidden function instead of calling
41633 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41634 register, so it is better to call __stack_chk_fail directly. */
41636 static tree ATTRIBUTE_UNUSED
41637 ix86_stack_protect_fail (void)
41639 return TARGET_64BIT
41640 ? default_external_stack_protect_fail ()
41641 : default_hidden_stack_protect_fail ();
41644 /* Select a format to encode pointers in exception handling data. CODE
41645 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41646 true if the symbol may be affected by dynamic relocations.
41648 ??? All x86 object file formats are capable of representing this.
41649 After all, the relocation needed is the same as for the call insn.
41650 Whether or not a particular assembler allows us to enter such, I
41651 guess we'll have to see. */
41653 asm_preferred_eh_data_format (int code, int global)
41655 if (flag_pic)
41657 int type = DW_EH_PE_sdata8;
41658 if (!TARGET_64BIT
41659 || ix86_cmodel == CM_SMALL_PIC
41660 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41661 type = DW_EH_PE_sdata4;
41662 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41664 if (ix86_cmodel == CM_SMALL
41665 || (ix86_cmodel == CM_MEDIUM && code))
41666 return DW_EH_PE_udata4;
41667 return DW_EH_PE_absptr;
41670 /* Expand copysign from SIGN to the positive value ABS_VALUE
41671 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41672 the sign-bit. */
41673 static void
41674 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41676 enum machine_mode mode = GET_MODE (sign);
41677 rtx sgn = gen_reg_rtx (mode);
41678 if (mask == NULL_RTX)
41680 enum machine_mode vmode;
41682 if (mode == SFmode)
41683 vmode = V4SFmode;
41684 else if (mode == DFmode)
41685 vmode = V2DFmode;
41686 else
41687 vmode = mode;
41689 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41690 if (!VECTOR_MODE_P (mode))
41692 /* We need to generate a scalar mode mask in this case. */
41693 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41694 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41695 mask = gen_reg_rtx (mode);
41696 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41699 else
41700 mask = gen_rtx_NOT (mode, mask);
41701 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41702 gen_rtx_AND (mode, mask, sign)));
41703 emit_insn (gen_rtx_SET (VOIDmode, result,
41704 gen_rtx_IOR (mode, abs_value, sgn)));
41707 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41708 mask for masking out the sign-bit is stored in *SMASK, if that is
41709 non-null. */
41710 static rtx
41711 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41713 enum machine_mode vmode, mode = GET_MODE (op0);
41714 rtx xa, mask;
41716 xa = gen_reg_rtx (mode);
41717 if (mode == SFmode)
41718 vmode = V4SFmode;
41719 else if (mode == DFmode)
41720 vmode = V2DFmode;
41721 else
41722 vmode = mode;
41723 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41724 if (!VECTOR_MODE_P (mode))
41726 /* We need to generate a scalar mode mask in this case. */
41727 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41728 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41729 mask = gen_reg_rtx (mode);
41730 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41732 emit_insn (gen_rtx_SET (VOIDmode, xa,
41733 gen_rtx_AND (mode, op0, mask)));
41735 if (smask)
41736 *smask = mask;
41738 return xa;
41741 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41742 swapping the operands if SWAP_OPERANDS is true. The expanded
41743 code is a forward jump to a newly created label in case the
41744 comparison is true. The generated label rtx is returned. */
41745 static rtx
41746 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41747 bool swap_operands)
41749 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41750 rtx label, tmp;
41752 if (swap_operands)
41754 tmp = op0;
41755 op0 = op1;
41756 op1 = tmp;
41759 label = gen_label_rtx ();
41760 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41761 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41762 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41763 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41764 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41765 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41766 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41767 JUMP_LABEL (tmp) = label;
41769 return label;
41772 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41773 using comparison code CODE. Operands are swapped for the comparison if
41774 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41775 static rtx
41776 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41777 bool swap_operands)
41779 rtx (*insn)(rtx, rtx, rtx, rtx);
41780 enum machine_mode mode = GET_MODE (op0);
41781 rtx mask = gen_reg_rtx (mode);
41783 if (swap_operands)
41785 rtx tmp = op0;
41786 op0 = op1;
41787 op1 = tmp;
41790 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41792 emit_insn (insn (mask, op0, op1,
41793 gen_rtx_fmt_ee (code, mode, op0, op1)));
41794 return mask;
41797 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41798 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41799 static rtx
41800 ix86_gen_TWO52 (enum machine_mode mode)
41802 REAL_VALUE_TYPE TWO52r;
41803 rtx TWO52;
41805 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41806 TWO52 = const_double_from_real_value (TWO52r, mode);
41807 TWO52 = force_reg (mode, TWO52);
41809 return TWO52;
41812 /* Expand SSE sequence for computing lround from OP1 storing
41813 into OP0. */
41814 void
41815 ix86_expand_lround (rtx op0, rtx op1)
41817 /* C code for the stuff we're doing below:
41818 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41819 return (long)tmp;
41821 enum machine_mode mode = GET_MODE (op1);
41822 const struct real_format *fmt;
41823 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41824 rtx adj;
41826 /* load nextafter (0.5, 0.0) */
41827 fmt = REAL_MODE_FORMAT (mode);
41828 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41829 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41831 /* adj = copysign (0.5, op1) */
41832 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41833 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41835 /* adj = op1 + adj */
41836 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41838 /* op0 = (imode)adj */
41839 expand_fix (op0, adj, 0);
41842 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41843 into OPERAND0. */
41844 void
41845 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41847 /* C code for the stuff we're doing below (for do_floor):
41848 xi = (long)op1;
41849 xi -= (double)xi > op1 ? 1 : 0;
41850 return xi;
41852 enum machine_mode fmode = GET_MODE (op1);
41853 enum machine_mode imode = GET_MODE (op0);
41854 rtx ireg, freg, label, tmp;
41856 /* reg = (long)op1 */
41857 ireg = gen_reg_rtx (imode);
41858 expand_fix (ireg, op1, 0);
41860 /* freg = (double)reg */
41861 freg = gen_reg_rtx (fmode);
41862 expand_float (freg, ireg, 0);
41864 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41865 label = ix86_expand_sse_compare_and_jump (UNLE,
41866 freg, op1, !do_floor);
41867 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41868 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41869 emit_move_insn (ireg, tmp);
41871 emit_label (label);
41872 LABEL_NUSES (label) = 1;
41874 emit_move_insn (op0, ireg);
41877 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41878 result in OPERAND0. */
41879 void
41880 ix86_expand_rint (rtx operand0, rtx operand1)
41882 /* C code for the stuff we're doing below:
41883 xa = fabs (operand1);
41884 if (!isless (xa, 2**52))
41885 return operand1;
41886 xa = xa + 2**52 - 2**52;
41887 return copysign (xa, operand1);
41889 enum machine_mode mode = GET_MODE (operand0);
41890 rtx res, xa, label, TWO52, mask;
41892 res = gen_reg_rtx (mode);
41893 emit_move_insn (res, operand1);
41895 /* xa = abs (operand1) */
41896 xa = ix86_expand_sse_fabs (res, &mask);
41898 /* if (!isless (xa, TWO52)) goto label; */
41899 TWO52 = ix86_gen_TWO52 (mode);
41900 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41902 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41903 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41905 ix86_sse_copysign_to_positive (res, xa, res, mask);
41907 emit_label (label);
41908 LABEL_NUSES (label) = 1;
41910 emit_move_insn (operand0, res);
41913 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41914 into OPERAND0. */
41915 void
41916 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41918 /* C code for the stuff we expand below.
41919 double xa = fabs (x), x2;
41920 if (!isless (xa, TWO52))
41921 return x;
41922 xa = xa + TWO52 - TWO52;
41923 x2 = copysign (xa, x);
41924 Compensate. Floor:
41925 if (x2 > x)
41926 x2 -= 1;
41927 Compensate. Ceil:
41928 if (x2 < x)
41929 x2 -= -1;
41930 return x2;
41932 enum machine_mode mode = GET_MODE (operand0);
41933 rtx xa, TWO52, tmp, label, one, res, mask;
41935 TWO52 = ix86_gen_TWO52 (mode);
41937 /* Temporary for holding the result, initialized to the input
41938 operand to ease control flow. */
41939 res = gen_reg_rtx (mode);
41940 emit_move_insn (res, operand1);
41942 /* xa = abs (operand1) */
41943 xa = ix86_expand_sse_fabs (res, &mask);
41945 /* if (!isless (xa, TWO52)) goto label; */
41946 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41948 /* xa = xa + TWO52 - TWO52; */
41949 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41950 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41952 /* xa = copysign (xa, operand1) */
41953 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41955 /* generate 1.0 or -1.0 */
41956 one = force_reg (mode,
41957 const_double_from_real_value (do_floor
41958 ? dconst1 : dconstm1, mode));
41960 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41961 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41962 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41963 gen_rtx_AND (mode, one, tmp)));
41964 /* We always need to subtract here to preserve signed zero. */
41965 tmp = expand_simple_binop (mode, MINUS,
41966 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41967 emit_move_insn (res, tmp);
41969 emit_label (label);
41970 LABEL_NUSES (label) = 1;
41972 emit_move_insn (operand0, res);
41975 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41976 into OPERAND0. */
41977 void
41978 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41980 /* C code for the stuff we expand below.
41981 double xa = fabs (x), x2;
41982 if (!isless (xa, TWO52))
41983 return x;
41984 x2 = (double)(long)x;
41985 Compensate. Floor:
41986 if (x2 > x)
41987 x2 -= 1;
41988 Compensate. Ceil:
41989 if (x2 < x)
41990 x2 += 1;
41991 if (HONOR_SIGNED_ZEROS (mode))
41992 return copysign (x2, x);
41993 return x2;
41995 enum machine_mode mode = GET_MODE (operand0);
41996 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41998 TWO52 = ix86_gen_TWO52 (mode);
42000 /* Temporary for holding the result, initialized to the input
42001 operand to ease control flow. */
42002 res = gen_reg_rtx (mode);
42003 emit_move_insn (res, operand1);
42005 /* xa = abs (operand1) */
42006 xa = ix86_expand_sse_fabs (res, &mask);
42008 /* if (!isless (xa, TWO52)) goto label; */
42009 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42011 /* xa = (double)(long)x */
42012 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42013 expand_fix (xi, res, 0);
42014 expand_float (xa, xi, 0);
42016 /* generate 1.0 */
42017 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42019 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42020 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42021 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42022 gen_rtx_AND (mode, one, tmp)));
42023 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42024 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42025 emit_move_insn (res, tmp);
42027 if (HONOR_SIGNED_ZEROS (mode))
42028 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42030 emit_label (label);
42031 LABEL_NUSES (label) = 1;
42033 emit_move_insn (operand0, res);
42036 /* Expand SSE sequence for computing round from OPERAND1 storing
42037 into OPERAND0. Sequence that works without relying on DImode truncation
42038 via cvttsd2siq that is only available on 64bit targets. */
42039 void
42040 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42042 /* C code for the stuff we expand below.
42043 double xa = fabs (x), xa2, x2;
42044 if (!isless (xa, TWO52))
42045 return x;
42046 Using the absolute value and copying back sign makes
42047 -0.0 -> -0.0 correct.
42048 xa2 = xa + TWO52 - TWO52;
42049 Compensate.
42050 dxa = xa2 - xa;
42051 if (dxa <= -0.5)
42052 xa2 += 1;
42053 else if (dxa > 0.5)
42054 xa2 -= 1;
42055 x2 = copysign (xa2, x);
42056 return x2;
42058 enum machine_mode mode = GET_MODE (operand0);
42059 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42061 TWO52 = ix86_gen_TWO52 (mode);
42063 /* Temporary for holding the result, initialized to the input
42064 operand to ease control flow. */
42065 res = gen_reg_rtx (mode);
42066 emit_move_insn (res, operand1);
42068 /* xa = abs (operand1) */
42069 xa = ix86_expand_sse_fabs (res, &mask);
42071 /* if (!isless (xa, TWO52)) goto label; */
42072 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42074 /* xa2 = xa + TWO52 - TWO52; */
42075 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42076 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42078 /* dxa = xa2 - xa; */
42079 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42081 /* generate 0.5, 1.0 and -0.5 */
42082 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42083 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42084 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42085 0, OPTAB_DIRECT);
42087 /* Compensate. */
42088 tmp = gen_reg_rtx (mode);
42089 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42090 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42091 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42092 gen_rtx_AND (mode, one, tmp)));
42093 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42094 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42095 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42096 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42097 gen_rtx_AND (mode, one, tmp)));
42098 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42100 /* res = copysign (xa2, operand1) */
42101 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42103 emit_label (label);
42104 LABEL_NUSES (label) = 1;
42106 emit_move_insn (operand0, res);
42109 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42110 into OPERAND0. */
42111 void
42112 ix86_expand_trunc (rtx operand0, rtx operand1)
42114 /* C code for SSE variant we expand below.
42115 double xa = fabs (x), x2;
42116 if (!isless (xa, TWO52))
42117 return x;
42118 x2 = (double)(long)x;
42119 if (HONOR_SIGNED_ZEROS (mode))
42120 return copysign (x2, x);
42121 return x2;
42123 enum machine_mode mode = GET_MODE (operand0);
42124 rtx xa, xi, TWO52, label, res, mask;
42126 TWO52 = ix86_gen_TWO52 (mode);
42128 /* Temporary for holding the result, initialized to the input
42129 operand to ease control flow. */
42130 res = gen_reg_rtx (mode);
42131 emit_move_insn (res, operand1);
42133 /* xa = abs (operand1) */
42134 xa = ix86_expand_sse_fabs (res, &mask);
42136 /* if (!isless (xa, TWO52)) goto label; */
42137 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42139 /* x = (double)(long)x */
42140 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42141 expand_fix (xi, res, 0);
42142 expand_float (res, xi, 0);
42144 if (HONOR_SIGNED_ZEROS (mode))
42145 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42147 emit_label (label);
42148 LABEL_NUSES (label) = 1;
42150 emit_move_insn (operand0, res);
42153 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42154 into OPERAND0. */
42155 void
42156 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42158 enum machine_mode mode = GET_MODE (operand0);
42159 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42161 /* C code for SSE variant we expand below.
42162 double xa = fabs (x), x2;
42163 if (!isless (xa, TWO52))
42164 return x;
42165 xa2 = xa + TWO52 - TWO52;
42166 Compensate:
42167 if (xa2 > xa)
42168 xa2 -= 1.0;
42169 x2 = copysign (xa2, x);
42170 return x2;
42173 TWO52 = ix86_gen_TWO52 (mode);
42175 /* Temporary for holding the result, initialized to the input
42176 operand to ease control flow. */
42177 res = gen_reg_rtx (mode);
42178 emit_move_insn (res, operand1);
42180 /* xa = abs (operand1) */
42181 xa = ix86_expand_sse_fabs (res, &smask);
42183 /* if (!isless (xa, TWO52)) goto label; */
42184 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42186 /* res = xa + TWO52 - TWO52; */
42187 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42188 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42189 emit_move_insn (res, tmp);
42191 /* generate 1.0 */
42192 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42194 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42195 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42196 emit_insn (gen_rtx_SET (VOIDmode, mask,
42197 gen_rtx_AND (mode, mask, one)));
42198 tmp = expand_simple_binop (mode, MINUS,
42199 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42200 emit_move_insn (res, tmp);
42202 /* res = copysign (res, operand1) */
42203 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42205 emit_label (label);
42206 LABEL_NUSES (label) = 1;
42208 emit_move_insn (operand0, res);
42211 /* Expand SSE sequence for computing round from OPERAND1 storing
42212 into OPERAND0. */
42213 void
42214 ix86_expand_round (rtx operand0, rtx operand1)
42216 /* C code for the stuff we're doing below:
42217 double xa = fabs (x);
42218 if (!isless (xa, TWO52))
42219 return x;
42220 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42221 return copysign (xa, x);
42223 enum machine_mode mode = GET_MODE (operand0);
42224 rtx res, TWO52, xa, label, xi, half, mask;
42225 const struct real_format *fmt;
42226 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42228 /* Temporary for holding the result, initialized to the input
42229 operand to ease control flow. */
42230 res = gen_reg_rtx (mode);
42231 emit_move_insn (res, operand1);
42233 TWO52 = ix86_gen_TWO52 (mode);
42234 xa = ix86_expand_sse_fabs (res, &mask);
42235 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42237 /* load nextafter (0.5, 0.0) */
42238 fmt = REAL_MODE_FORMAT (mode);
42239 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42240 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42242 /* xa = xa + 0.5 */
42243 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42244 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42246 /* xa = (double)(int64_t)xa */
42247 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42248 expand_fix (xi, xa, 0);
42249 expand_float (xa, xi, 0);
42251 /* res = copysign (xa, operand1) */
42252 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42254 emit_label (label);
42255 LABEL_NUSES (label) = 1;
42257 emit_move_insn (operand0, res);
42260 /* Expand SSE sequence for computing round
42261 from OP1 storing into OP0 using sse4 round insn. */
42262 void
42263 ix86_expand_round_sse4 (rtx op0, rtx op1)
42265 enum machine_mode mode = GET_MODE (op0);
42266 rtx e1, e2, res, half;
42267 const struct real_format *fmt;
42268 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42269 rtx (*gen_copysign) (rtx, rtx, rtx);
42270 rtx (*gen_round) (rtx, rtx, rtx);
42272 switch (mode)
42274 case SFmode:
42275 gen_copysign = gen_copysignsf3;
42276 gen_round = gen_sse4_1_roundsf2;
42277 break;
42278 case DFmode:
42279 gen_copysign = gen_copysigndf3;
42280 gen_round = gen_sse4_1_rounddf2;
42281 break;
42282 default:
42283 gcc_unreachable ();
42286 /* round (a) = trunc (a + copysign (0.5, a)) */
42288 /* load nextafter (0.5, 0.0) */
42289 fmt = REAL_MODE_FORMAT (mode);
42290 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42291 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42292 half = const_double_from_real_value (pred_half, mode);
42294 /* e1 = copysign (0.5, op1) */
42295 e1 = gen_reg_rtx (mode);
42296 emit_insn (gen_copysign (e1, half, op1));
42298 /* e2 = op1 + e1 */
42299 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42301 /* res = trunc (e2) */
42302 res = gen_reg_rtx (mode);
42303 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42305 emit_move_insn (op0, res);
42309 /* Table of valid machine attributes. */
42310 static const struct attribute_spec ix86_attribute_table[] =
42312 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42313 affects_type_identity } */
42314 /* Stdcall attribute says callee is responsible for popping arguments
42315 if they are not variable. */
42316 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42317 true },
42318 /* Fastcall attribute says callee is responsible for popping arguments
42319 if they are not variable. */
42320 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42321 true },
42322 /* Thiscall attribute says callee is responsible for popping arguments
42323 if they are not variable. */
42324 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42325 true },
42326 /* Cdecl attribute says the callee is a normal C declaration */
42327 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42328 true },
42329 /* Regparm attribute specifies how many integer arguments are to be
42330 passed in registers. */
42331 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42332 true },
42333 /* Sseregparm attribute says we are using x86_64 calling conventions
42334 for FP arguments. */
42335 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42336 true },
42337 /* The transactional memory builtins are implicitly regparm or fastcall
42338 depending on the ABI. Override the generic do-nothing attribute that
42339 these builtins were declared with. */
42340 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42341 true },
42342 /* force_align_arg_pointer says this function realigns the stack at entry. */
42343 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42344 false, true, true, ix86_handle_cconv_attribute, false },
42345 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42346 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42347 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42348 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42349 false },
42350 #endif
42351 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42352 false },
42353 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42354 false },
42355 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42356 SUBTARGET_ATTRIBUTE_TABLE,
42357 #endif
42358 /* ms_abi and sysv_abi calling convention function attributes. */
42359 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42360 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42361 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42362 false },
42363 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42364 ix86_handle_callee_pop_aggregate_return, true },
42365 /* End element. */
42366 { NULL, 0, 0, false, false, false, NULL, false }
42369 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42370 static int
42371 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42372 tree vectype,
42373 int misalign ATTRIBUTE_UNUSED)
42375 unsigned elements;
42377 switch (type_of_cost)
42379 case scalar_stmt:
42380 return ix86_cost->scalar_stmt_cost;
42382 case scalar_load:
42383 return ix86_cost->scalar_load_cost;
42385 case scalar_store:
42386 return ix86_cost->scalar_store_cost;
42388 case vector_stmt:
42389 return ix86_cost->vec_stmt_cost;
42391 case vector_load:
42392 return ix86_cost->vec_align_load_cost;
42394 case vector_store:
42395 return ix86_cost->vec_store_cost;
42397 case vec_to_scalar:
42398 return ix86_cost->vec_to_scalar_cost;
42400 case scalar_to_vec:
42401 return ix86_cost->scalar_to_vec_cost;
42403 case unaligned_load:
42404 case unaligned_store:
42405 return ix86_cost->vec_unalign_load_cost;
42407 case cond_branch_taken:
42408 return ix86_cost->cond_taken_branch_cost;
42410 case cond_branch_not_taken:
42411 return ix86_cost->cond_not_taken_branch_cost;
42413 case vec_perm:
42414 case vec_promote_demote:
42415 return ix86_cost->vec_stmt_cost;
42417 case vec_construct:
42418 elements = TYPE_VECTOR_SUBPARTS (vectype);
42419 return elements / 2 + 1;
42421 default:
42422 gcc_unreachable ();
42426 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42427 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42428 insn every time. */
42430 static GTY(()) rtx vselect_insn;
42432 /* Initialize vselect_insn. */
42434 static void
42435 init_vselect_insn (void)
42437 unsigned i;
42438 rtx x;
42440 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42441 for (i = 0; i < MAX_VECT_LEN; ++i)
42442 XVECEXP (x, 0, i) = const0_rtx;
42443 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42444 const0_rtx), x);
42445 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42446 start_sequence ();
42447 vselect_insn = emit_insn (x);
42448 end_sequence ();
42451 /* Construct (set target (vec_select op0 (parallel perm))) and
42452 return true if that's a valid instruction in the active ISA. */
42454 static bool
42455 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42456 unsigned nelt, bool testing_p)
42458 unsigned int i;
42459 rtx x, save_vconcat;
42460 int icode;
42462 if (vselect_insn == NULL_RTX)
42463 init_vselect_insn ();
42465 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42466 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42467 for (i = 0; i < nelt; ++i)
42468 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42469 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42470 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42471 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42472 SET_DEST (PATTERN (vselect_insn)) = target;
42473 icode = recog_memoized (vselect_insn);
42475 if (icode >= 0 && !testing_p)
42476 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42478 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42479 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42480 INSN_CODE (vselect_insn) = -1;
42482 return icode >= 0;
42485 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42487 static bool
42488 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42489 const unsigned char *perm, unsigned nelt,
42490 bool testing_p)
42492 enum machine_mode v2mode;
42493 rtx x;
42494 bool ok;
42496 if (vselect_insn == NULL_RTX)
42497 init_vselect_insn ();
42499 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42500 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42501 PUT_MODE (x, v2mode);
42502 XEXP (x, 0) = op0;
42503 XEXP (x, 1) = op1;
42504 ok = expand_vselect (target, x, perm, nelt, testing_p);
42505 XEXP (x, 0) = const0_rtx;
42506 XEXP (x, 1) = const0_rtx;
42507 return ok;
42510 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42511 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42513 static bool
42514 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42516 enum machine_mode vmode = d->vmode;
42517 unsigned i, mask, nelt = d->nelt;
42518 rtx target, op0, op1, x;
42519 rtx rperm[32], vperm;
42521 if (d->one_operand_p)
42522 return false;
42523 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42525 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42527 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42529 else
42530 return false;
42532 /* This is a blend, not a permute. Elements must stay in their
42533 respective lanes. */
42534 for (i = 0; i < nelt; ++i)
42536 unsigned e = d->perm[i];
42537 if (!(e == i || e == i + nelt))
42538 return false;
42541 if (d->testing_p)
42542 return true;
42544 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42545 decision should be extracted elsewhere, so that we only try that
42546 sequence once all budget==3 options have been tried. */
42547 target = d->target;
42548 op0 = d->op0;
42549 op1 = d->op1;
42550 mask = 0;
42552 switch (vmode)
42554 case V4DFmode:
42555 case V8SFmode:
42556 case V2DFmode:
42557 case V4SFmode:
42558 case V8HImode:
42559 case V8SImode:
42560 for (i = 0; i < nelt; ++i)
42561 mask |= (d->perm[i] >= nelt) << i;
42562 break;
42564 case V2DImode:
42565 for (i = 0; i < 2; ++i)
42566 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42567 vmode = V8HImode;
42568 goto do_subreg;
42570 case V4SImode:
42571 for (i = 0; i < 4; ++i)
42572 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42573 vmode = V8HImode;
42574 goto do_subreg;
42576 case V16QImode:
42577 /* See if bytes move in pairs so we can use pblendw with
42578 an immediate argument, rather than pblendvb with a vector
42579 argument. */
42580 for (i = 0; i < 16; i += 2)
42581 if (d->perm[i] + 1 != d->perm[i + 1])
42583 use_pblendvb:
42584 for (i = 0; i < nelt; ++i)
42585 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42587 finish_pblendvb:
42588 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42589 vperm = force_reg (vmode, vperm);
42591 if (GET_MODE_SIZE (vmode) == 16)
42592 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42593 else
42594 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42595 if (target != d->target)
42596 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42597 return true;
42600 for (i = 0; i < 8; ++i)
42601 mask |= (d->perm[i * 2] >= 16) << i;
42602 vmode = V8HImode;
42603 /* FALLTHRU */
42605 do_subreg:
42606 target = gen_reg_rtx (vmode);
42607 op0 = gen_lowpart (vmode, op0);
42608 op1 = gen_lowpart (vmode, op1);
42609 break;
42611 case V32QImode:
42612 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42613 for (i = 0; i < 32; i += 2)
42614 if (d->perm[i] + 1 != d->perm[i + 1])
42615 goto use_pblendvb;
42616 /* See if bytes move in quadruplets. If yes, vpblendd
42617 with immediate can be used. */
42618 for (i = 0; i < 32; i += 4)
42619 if (d->perm[i] + 2 != d->perm[i + 2])
42620 break;
42621 if (i < 32)
42623 /* See if bytes move the same in both lanes. If yes,
42624 vpblendw with immediate can be used. */
42625 for (i = 0; i < 16; i += 2)
42626 if (d->perm[i] + 16 != d->perm[i + 16])
42627 goto use_pblendvb;
42629 /* Use vpblendw. */
42630 for (i = 0; i < 16; ++i)
42631 mask |= (d->perm[i * 2] >= 32) << i;
42632 vmode = V16HImode;
42633 goto do_subreg;
42636 /* Use vpblendd. */
42637 for (i = 0; i < 8; ++i)
42638 mask |= (d->perm[i * 4] >= 32) << i;
42639 vmode = V8SImode;
42640 goto do_subreg;
42642 case V16HImode:
42643 /* See if words move in pairs. If yes, vpblendd can be used. */
42644 for (i = 0; i < 16; i += 2)
42645 if (d->perm[i] + 1 != d->perm[i + 1])
42646 break;
42647 if (i < 16)
42649 /* See if words move the same in both lanes. If not,
42650 vpblendvb must be used. */
42651 for (i = 0; i < 8; i++)
42652 if (d->perm[i] + 8 != d->perm[i + 8])
42654 /* Use vpblendvb. */
42655 for (i = 0; i < 32; ++i)
42656 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42658 vmode = V32QImode;
42659 nelt = 32;
42660 target = gen_reg_rtx (vmode);
42661 op0 = gen_lowpart (vmode, op0);
42662 op1 = gen_lowpart (vmode, op1);
42663 goto finish_pblendvb;
42666 /* Use vpblendw. */
42667 for (i = 0; i < 16; ++i)
42668 mask |= (d->perm[i] >= 16) << i;
42669 break;
42672 /* Use vpblendd. */
42673 for (i = 0; i < 8; ++i)
42674 mask |= (d->perm[i * 2] >= 16) << i;
42675 vmode = V8SImode;
42676 goto do_subreg;
42678 case V4DImode:
42679 /* Use vpblendd. */
42680 for (i = 0; i < 4; ++i)
42681 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42682 vmode = V8SImode;
42683 goto do_subreg;
42685 default:
42686 gcc_unreachable ();
42689 /* This matches five different patterns with the different modes. */
42690 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42691 x = gen_rtx_SET (VOIDmode, target, x);
42692 emit_insn (x);
42693 if (target != d->target)
42694 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42696 return true;
42699 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42700 in terms of the variable form of vpermilps.
42702 Note that we will have already failed the immediate input vpermilps,
42703 which requires that the high and low part shuffle be identical; the
42704 variable form doesn't require that. */
42706 static bool
42707 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42709 rtx rperm[8], vperm;
42710 unsigned i;
42712 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42713 return false;
42715 /* We can only permute within the 128-bit lane. */
42716 for (i = 0; i < 8; ++i)
42718 unsigned e = d->perm[i];
42719 if (i < 4 ? e >= 4 : e < 4)
42720 return false;
42723 if (d->testing_p)
42724 return true;
42726 for (i = 0; i < 8; ++i)
42728 unsigned e = d->perm[i];
42730 /* Within each 128-bit lane, the elements of op0 are numbered
42731 from 0 and the elements of op1 are numbered from 4. */
42732 if (e >= 8 + 4)
42733 e -= 8;
42734 else if (e >= 4)
42735 e -= 4;
42737 rperm[i] = GEN_INT (e);
42740 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42741 vperm = force_reg (V8SImode, vperm);
42742 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42744 return true;
42747 /* Return true if permutation D can be performed as VMODE permutation
42748 instead. */
42750 static bool
42751 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42753 unsigned int i, j, chunk;
42755 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42756 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42757 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42758 return false;
42760 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42761 return true;
42763 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42764 for (i = 0; i < d->nelt; i += chunk)
42765 if (d->perm[i] & (chunk - 1))
42766 return false;
42767 else
42768 for (j = 1; j < chunk; ++j)
42769 if (d->perm[i] + j != d->perm[i + j])
42770 return false;
42772 return true;
42775 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42776 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42778 static bool
42779 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42781 unsigned i, nelt, eltsz, mask;
42782 unsigned char perm[32];
42783 enum machine_mode vmode = V16QImode;
42784 rtx rperm[32], vperm, target, op0, op1;
42786 nelt = d->nelt;
42788 if (!d->one_operand_p)
42790 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42792 if (TARGET_AVX2
42793 && valid_perm_using_mode_p (V2TImode, d))
42795 if (d->testing_p)
42796 return true;
42798 /* Use vperm2i128 insn. The pattern uses
42799 V4DImode instead of V2TImode. */
42800 target = d->target;
42801 if (d->vmode != V4DImode)
42802 target = gen_reg_rtx (V4DImode);
42803 op0 = gen_lowpart (V4DImode, d->op0);
42804 op1 = gen_lowpart (V4DImode, d->op1);
42805 rperm[0]
42806 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42807 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42808 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42809 if (target != d->target)
42810 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42811 return true;
42813 return false;
42816 else
42818 if (GET_MODE_SIZE (d->vmode) == 16)
42820 if (!TARGET_SSSE3)
42821 return false;
42823 else if (GET_MODE_SIZE (d->vmode) == 32)
42825 if (!TARGET_AVX2)
42826 return false;
42828 /* V4DImode should be already handled through
42829 expand_vselect by vpermq instruction. */
42830 gcc_assert (d->vmode != V4DImode);
42832 vmode = V32QImode;
42833 if (d->vmode == V8SImode
42834 || d->vmode == V16HImode
42835 || d->vmode == V32QImode)
42837 /* First see if vpermq can be used for
42838 V8SImode/V16HImode/V32QImode. */
42839 if (valid_perm_using_mode_p (V4DImode, d))
42841 for (i = 0; i < 4; i++)
42842 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42843 if (d->testing_p)
42844 return true;
42845 target = gen_reg_rtx (V4DImode);
42846 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42847 perm, 4, false))
42849 emit_move_insn (d->target,
42850 gen_lowpart (d->vmode, target));
42851 return true;
42853 return false;
42856 /* Next see if vpermd can be used. */
42857 if (valid_perm_using_mode_p (V8SImode, d))
42858 vmode = V8SImode;
42860 /* Or if vpermps can be used. */
42861 else if (d->vmode == V8SFmode)
42862 vmode = V8SImode;
42864 if (vmode == V32QImode)
42866 /* vpshufb only works intra lanes, it is not
42867 possible to shuffle bytes in between the lanes. */
42868 for (i = 0; i < nelt; ++i)
42869 if ((d->perm[i] ^ i) & (nelt / 2))
42870 return false;
42873 else
42874 return false;
42877 if (d->testing_p)
42878 return true;
42880 if (vmode == V8SImode)
42881 for (i = 0; i < 8; ++i)
42882 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42883 else
42885 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42886 if (!d->one_operand_p)
42887 mask = 2 * nelt - 1;
42888 else if (vmode == V16QImode)
42889 mask = nelt - 1;
42890 else
42891 mask = nelt / 2 - 1;
42893 for (i = 0; i < nelt; ++i)
42895 unsigned j, e = d->perm[i] & mask;
42896 for (j = 0; j < eltsz; ++j)
42897 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42901 vperm = gen_rtx_CONST_VECTOR (vmode,
42902 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42903 vperm = force_reg (vmode, vperm);
42905 target = d->target;
42906 if (d->vmode != vmode)
42907 target = gen_reg_rtx (vmode);
42908 op0 = gen_lowpart (vmode, d->op0);
42909 if (d->one_operand_p)
42911 if (vmode == V16QImode)
42912 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42913 else if (vmode == V32QImode)
42914 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42915 else if (vmode == V8SFmode)
42916 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42917 else
42918 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42920 else
42922 op1 = gen_lowpart (vmode, d->op1);
42923 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42925 if (target != d->target)
42926 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42928 return true;
42931 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42932 in a single instruction. */
42934 static bool
42935 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42937 unsigned i, nelt = d->nelt;
42938 unsigned char perm2[MAX_VECT_LEN];
42940 /* Check plain VEC_SELECT first, because AVX has instructions that could
42941 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42942 input where SEL+CONCAT may not. */
42943 if (d->one_operand_p)
42945 int mask = nelt - 1;
42946 bool identity_perm = true;
42947 bool broadcast_perm = true;
42949 for (i = 0; i < nelt; i++)
42951 perm2[i] = d->perm[i] & mask;
42952 if (perm2[i] != i)
42953 identity_perm = false;
42954 if (perm2[i])
42955 broadcast_perm = false;
42958 if (identity_perm)
42960 if (!d->testing_p)
42961 emit_move_insn (d->target, d->op0);
42962 return true;
42964 else if (broadcast_perm && TARGET_AVX2)
42966 /* Use vpbroadcast{b,w,d}. */
42967 rtx (*gen) (rtx, rtx) = NULL;
42968 switch (d->vmode)
42970 case V32QImode:
42971 gen = gen_avx2_pbroadcastv32qi_1;
42972 break;
42973 case V16HImode:
42974 gen = gen_avx2_pbroadcastv16hi_1;
42975 break;
42976 case V8SImode:
42977 gen = gen_avx2_pbroadcastv8si_1;
42978 break;
42979 case V16QImode:
42980 gen = gen_avx2_pbroadcastv16qi;
42981 break;
42982 case V8HImode:
42983 gen = gen_avx2_pbroadcastv8hi;
42984 break;
42985 case V8SFmode:
42986 gen = gen_avx2_vec_dupv8sf_1;
42987 break;
42988 /* For other modes prefer other shuffles this function creates. */
42989 default: break;
42991 if (gen != NULL)
42993 if (!d->testing_p)
42994 emit_insn (gen (d->target, d->op0));
42995 return true;
42999 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43000 return true;
43002 /* There are plenty of patterns in sse.md that are written for
43003 SEL+CONCAT and are not replicated for a single op. Perhaps
43004 that should be changed, to avoid the nastiness here. */
43006 /* Recognize interleave style patterns, which means incrementing
43007 every other permutation operand. */
43008 for (i = 0; i < nelt; i += 2)
43010 perm2[i] = d->perm[i] & mask;
43011 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43013 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43014 d->testing_p))
43015 return true;
43017 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43018 if (nelt >= 4)
43020 for (i = 0; i < nelt; i += 4)
43022 perm2[i + 0] = d->perm[i + 0] & mask;
43023 perm2[i + 1] = d->perm[i + 1] & mask;
43024 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43025 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43028 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43029 d->testing_p))
43030 return true;
43034 /* Finally, try the fully general two operand permute. */
43035 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43036 d->testing_p))
43037 return true;
43039 /* Recognize interleave style patterns with reversed operands. */
43040 if (!d->one_operand_p)
43042 for (i = 0; i < nelt; ++i)
43044 unsigned e = d->perm[i];
43045 if (e >= nelt)
43046 e -= nelt;
43047 else
43048 e += nelt;
43049 perm2[i] = e;
43052 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43053 d->testing_p))
43054 return true;
43057 /* Try the SSE4.1 blend variable merge instructions. */
43058 if (expand_vec_perm_blend (d))
43059 return true;
43061 /* Try one of the AVX vpermil variable permutations. */
43062 if (expand_vec_perm_vpermil (d))
43063 return true;
43065 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43066 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43067 if (expand_vec_perm_pshufb (d))
43068 return true;
43070 /* Try the AVX512F vpermi2 instructions. */
43071 rtx vec[64];
43072 enum machine_mode mode = d->vmode;
43073 if (mode == V8DFmode)
43074 mode = V8DImode;
43075 else if (mode == V16SFmode)
43076 mode = V16SImode;
43077 for (i = 0; i < nelt; ++i)
43078 vec[i] = GEN_INT (d->perm[i]);
43079 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43080 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43081 return true;
43083 return false;
43086 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43087 in terms of a pair of pshuflw + pshufhw instructions. */
43089 static bool
43090 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43092 unsigned char perm2[MAX_VECT_LEN];
43093 unsigned i;
43094 bool ok;
43096 if (d->vmode != V8HImode || !d->one_operand_p)
43097 return false;
43099 /* The two permutations only operate in 64-bit lanes. */
43100 for (i = 0; i < 4; ++i)
43101 if (d->perm[i] >= 4)
43102 return false;
43103 for (i = 4; i < 8; ++i)
43104 if (d->perm[i] < 4)
43105 return false;
43107 if (d->testing_p)
43108 return true;
43110 /* Emit the pshuflw. */
43111 memcpy (perm2, d->perm, 4);
43112 for (i = 4; i < 8; ++i)
43113 perm2[i] = i;
43114 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43115 gcc_assert (ok);
43117 /* Emit the pshufhw. */
43118 memcpy (perm2 + 4, d->perm + 4, 4);
43119 for (i = 0; i < 4; ++i)
43120 perm2[i] = i;
43121 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43122 gcc_assert (ok);
43124 return true;
43127 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43128 the permutation using the SSSE3 palignr instruction. This succeeds
43129 when all of the elements in PERM fit within one vector and we merely
43130 need to shift them down so that a single vector permutation has a
43131 chance to succeed. */
43133 static bool
43134 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43136 unsigned i, nelt = d->nelt;
43137 unsigned min, max;
43138 bool in_order, ok;
43139 rtx shift, target;
43140 struct expand_vec_perm_d dcopy;
43142 /* Even with AVX, palignr only operates on 128-bit vectors. */
43143 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43144 return false;
43146 min = nelt, max = 0;
43147 for (i = 0; i < nelt; ++i)
43149 unsigned e = d->perm[i];
43150 if (e < min)
43151 min = e;
43152 if (e > max)
43153 max = e;
43155 if (min == 0 || max - min >= nelt)
43156 return false;
43158 /* Given that we have SSSE3, we know we'll be able to implement the
43159 single operand permutation after the palignr with pshufb. */
43160 if (d->testing_p)
43161 return true;
43163 dcopy = *d;
43164 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43165 target = gen_reg_rtx (TImode);
43166 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43167 gen_lowpart (TImode, d->op0), shift));
43169 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43170 dcopy.one_operand_p = true;
43172 in_order = true;
43173 for (i = 0; i < nelt; ++i)
43175 unsigned e = dcopy.perm[i] - min;
43176 if (e != i)
43177 in_order = false;
43178 dcopy.perm[i] = e;
43181 /* Test for the degenerate case where the alignment by itself
43182 produces the desired permutation. */
43183 if (in_order)
43185 emit_move_insn (d->target, dcopy.op0);
43186 return true;
43189 ok = expand_vec_perm_1 (&dcopy);
43190 gcc_assert (ok);
43192 return ok;
43195 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43197 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43198 a two vector permutation into a single vector permutation by using
43199 an interleave operation to merge the vectors. */
43201 static bool
43202 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43204 struct expand_vec_perm_d dremap, dfinal;
43205 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43206 unsigned HOST_WIDE_INT contents;
43207 unsigned char remap[2 * MAX_VECT_LEN];
43208 rtx seq;
43209 bool ok, same_halves = false;
43211 if (GET_MODE_SIZE (d->vmode) == 16)
43213 if (d->one_operand_p)
43214 return false;
43216 else if (GET_MODE_SIZE (d->vmode) == 32)
43218 if (!TARGET_AVX)
43219 return false;
43220 /* For 32-byte modes allow even d->one_operand_p.
43221 The lack of cross-lane shuffling in some instructions
43222 might prevent a single insn shuffle. */
43223 dfinal = *d;
43224 dfinal.testing_p = true;
43225 /* If expand_vec_perm_interleave3 can expand this into
43226 a 3 insn sequence, give up and let it be expanded as
43227 3 insn sequence. While that is one insn longer,
43228 it doesn't need a memory operand and in the common
43229 case that both interleave low and high permutations
43230 with the same operands are adjacent needs 4 insns
43231 for both after CSE. */
43232 if (expand_vec_perm_interleave3 (&dfinal))
43233 return false;
43235 else
43236 return false;
43238 /* Examine from whence the elements come. */
43239 contents = 0;
43240 for (i = 0; i < nelt; ++i)
43241 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43243 memset (remap, 0xff, sizeof (remap));
43244 dremap = *d;
43246 if (GET_MODE_SIZE (d->vmode) == 16)
43248 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43250 /* Split the two input vectors into 4 halves. */
43251 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43252 h2 = h1 << nelt2;
43253 h3 = h2 << nelt2;
43254 h4 = h3 << nelt2;
43256 /* If the elements from the low halves use interleave low, and similarly
43257 for interleave high. If the elements are from mis-matched halves, we
43258 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43259 if ((contents & (h1 | h3)) == contents)
43261 /* punpckl* */
43262 for (i = 0; i < nelt2; ++i)
43264 remap[i] = i * 2;
43265 remap[i + nelt] = i * 2 + 1;
43266 dremap.perm[i * 2] = i;
43267 dremap.perm[i * 2 + 1] = i + nelt;
43269 if (!TARGET_SSE2 && d->vmode == V4SImode)
43270 dremap.vmode = V4SFmode;
43272 else if ((contents & (h2 | h4)) == contents)
43274 /* punpckh* */
43275 for (i = 0; i < nelt2; ++i)
43277 remap[i + nelt2] = i * 2;
43278 remap[i + nelt + nelt2] = i * 2 + 1;
43279 dremap.perm[i * 2] = i + nelt2;
43280 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43282 if (!TARGET_SSE2 && d->vmode == V4SImode)
43283 dremap.vmode = V4SFmode;
43285 else if ((contents & (h1 | h4)) == contents)
43287 /* shufps */
43288 for (i = 0; i < nelt2; ++i)
43290 remap[i] = i;
43291 remap[i + nelt + nelt2] = i + nelt2;
43292 dremap.perm[i] = i;
43293 dremap.perm[i + nelt2] = i + nelt + nelt2;
43295 if (nelt != 4)
43297 /* shufpd */
43298 dremap.vmode = V2DImode;
43299 dremap.nelt = 2;
43300 dremap.perm[0] = 0;
43301 dremap.perm[1] = 3;
43304 else if ((contents & (h2 | h3)) == contents)
43306 /* shufps */
43307 for (i = 0; i < nelt2; ++i)
43309 remap[i + nelt2] = i;
43310 remap[i + nelt] = i + nelt2;
43311 dremap.perm[i] = i + nelt2;
43312 dremap.perm[i + nelt2] = i + nelt;
43314 if (nelt != 4)
43316 /* shufpd */
43317 dremap.vmode = V2DImode;
43318 dremap.nelt = 2;
43319 dremap.perm[0] = 1;
43320 dremap.perm[1] = 2;
43323 else
43324 return false;
43326 else
43328 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43329 unsigned HOST_WIDE_INT q[8];
43330 unsigned int nonzero_halves[4];
43332 /* Split the two input vectors into 8 quarters. */
43333 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43334 for (i = 1; i < 8; ++i)
43335 q[i] = q[0] << (nelt4 * i);
43336 for (i = 0; i < 4; ++i)
43337 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43339 nonzero_halves[nzcnt] = i;
43340 ++nzcnt;
43343 if (nzcnt == 1)
43345 gcc_assert (d->one_operand_p);
43346 nonzero_halves[1] = nonzero_halves[0];
43347 same_halves = true;
43349 else if (d->one_operand_p)
43351 gcc_assert (nonzero_halves[0] == 0);
43352 gcc_assert (nonzero_halves[1] == 1);
43355 if (nzcnt <= 2)
43357 if (d->perm[0] / nelt2 == nonzero_halves[1])
43359 /* Attempt to increase the likelihood that dfinal
43360 shuffle will be intra-lane. */
43361 char tmph = nonzero_halves[0];
43362 nonzero_halves[0] = nonzero_halves[1];
43363 nonzero_halves[1] = tmph;
43366 /* vperm2f128 or vperm2i128. */
43367 for (i = 0; i < nelt2; ++i)
43369 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43370 remap[i + nonzero_halves[0] * nelt2] = i;
43371 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43372 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43375 if (d->vmode != V8SFmode
43376 && d->vmode != V4DFmode
43377 && d->vmode != V8SImode)
43379 dremap.vmode = V8SImode;
43380 dremap.nelt = 8;
43381 for (i = 0; i < 4; ++i)
43383 dremap.perm[i] = i + nonzero_halves[0] * 4;
43384 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43388 else if (d->one_operand_p)
43389 return false;
43390 else if (TARGET_AVX2
43391 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43393 /* vpunpckl* */
43394 for (i = 0; i < nelt4; ++i)
43396 remap[i] = i * 2;
43397 remap[i + nelt] = i * 2 + 1;
43398 remap[i + nelt2] = i * 2 + nelt2;
43399 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43400 dremap.perm[i * 2] = i;
43401 dremap.perm[i * 2 + 1] = i + nelt;
43402 dremap.perm[i * 2 + nelt2] = i + nelt2;
43403 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43406 else if (TARGET_AVX2
43407 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43409 /* vpunpckh* */
43410 for (i = 0; i < nelt4; ++i)
43412 remap[i + nelt4] = i * 2;
43413 remap[i + nelt + nelt4] = i * 2 + 1;
43414 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43415 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43416 dremap.perm[i * 2] = i + nelt4;
43417 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43418 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43419 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43422 else
43423 return false;
43426 /* Use the remapping array set up above to move the elements from their
43427 swizzled locations into their final destinations. */
43428 dfinal = *d;
43429 for (i = 0; i < nelt; ++i)
43431 unsigned e = remap[d->perm[i]];
43432 gcc_assert (e < nelt);
43433 /* If same_halves is true, both halves of the remapped vector are the
43434 same. Avoid cross-lane accesses if possible. */
43435 if (same_halves && i >= nelt2)
43437 gcc_assert (e < nelt2);
43438 dfinal.perm[i] = e + nelt2;
43440 else
43441 dfinal.perm[i] = e;
43443 if (!d->testing_p)
43445 dremap.target = gen_reg_rtx (dremap.vmode);
43446 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43448 dfinal.op1 = dfinal.op0;
43449 dfinal.one_operand_p = true;
43451 /* Test if the final remap can be done with a single insn. For V4SFmode or
43452 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43453 start_sequence ();
43454 ok = expand_vec_perm_1 (&dfinal);
43455 seq = get_insns ();
43456 end_sequence ();
43458 if (!ok)
43459 return false;
43461 if (d->testing_p)
43462 return true;
43464 if (dremap.vmode != dfinal.vmode)
43466 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43467 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43470 ok = expand_vec_perm_1 (&dremap);
43471 gcc_assert (ok);
43473 emit_insn (seq);
43474 return true;
43477 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43478 a single vector cross-lane permutation into vpermq followed
43479 by any of the single insn permutations. */
43481 static bool
43482 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43484 struct expand_vec_perm_d dremap, dfinal;
43485 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43486 unsigned contents[2];
43487 bool ok;
43489 if (!(TARGET_AVX2
43490 && (d->vmode == V32QImode || d->vmode == V16HImode)
43491 && d->one_operand_p))
43492 return false;
43494 contents[0] = 0;
43495 contents[1] = 0;
43496 for (i = 0; i < nelt2; ++i)
43498 contents[0] |= 1u << (d->perm[i] / nelt4);
43499 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43502 for (i = 0; i < 2; ++i)
43504 unsigned int cnt = 0;
43505 for (j = 0; j < 4; ++j)
43506 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43507 return false;
43510 if (d->testing_p)
43511 return true;
43513 dremap = *d;
43514 dremap.vmode = V4DImode;
43515 dremap.nelt = 4;
43516 dremap.target = gen_reg_rtx (V4DImode);
43517 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43518 dremap.op1 = dremap.op0;
43519 dremap.one_operand_p = true;
43520 for (i = 0; i < 2; ++i)
43522 unsigned int cnt = 0;
43523 for (j = 0; j < 4; ++j)
43524 if ((contents[i] & (1u << j)) != 0)
43525 dremap.perm[2 * i + cnt++] = j;
43526 for (; cnt < 2; ++cnt)
43527 dremap.perm[2 * i + cnt] = 0;
43530 dfinal = *d;
43531 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43532 dfinal.op1 = dfinal.op0;
43533 dfinal.one_operand_p = true;
43534 for (i = 0, j = 0; i < nelt; ++i)
43536 if (i == nelt2)
43537 j = 2;
43538 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43539 if ((d->perm[i] / nelt4) == dremap.perm[j])
43541 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43542 dfinal.perm[i] |= nelt4;
43543 else
43544 gcc_unreachable ();
43547 ok = expand_vec_perm_1 (&dremap);
43548 gcc_assert (ok);
43550 ok = expand_vec_perm_1 (&dfinal);
43551 gcc_assert (ok);
43553 return true;
43556 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43557 a vector permutation using two instructions, vperm2f128 resp.
43558 vperm2i128 followed by any single in-lane permutation. */
43560 static bool
43561 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43563 struct expand_vec_perm_d dfirst, dsecond;
43564 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43565 bool ok;
43567 if (!TARGET_AVX
43568 || GET_MODE_SIZE (d->vmode) != 32
43569 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43570 return false;
43572 dsecond = *d;
43573 dsecond.one_operand_p = false;
43574 dsecond.testing_p = true;
43576 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43577 immediate. For perm < 16 the second permutation uses
43578 d->op0 as first operand, for perm >= 16 it uses d->op1
43579 as first operand. The second operand is the result of
43580 vperm2[fi]128. */
43581 for (perm = 0; perm < 32; perm++)
43583 /* Ignore permutations which do not move anything cross-lane. */
43584 if (perm < 16)
43586 /* The second shuffle for e.g. V4DFmode has
43587 0123 and ABCD operands.
43588 Ignore AB23, as 23 is already in the second lane
43589 of the first operand. */
43590 if ((perm & 0xc) == (1 << 2)) continue;
43591 /* And 01CD, as 01 is in the first lane of the first
43592 operand. */
43593 if ((perm & 3) == 0) continue;
43594 /* And 4567, as then the vperm2[fi]128 doesn't change
43595 anything on the original 4567 second operand. */
43596 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43598 else
43600 /* The second shuffle for e.g. V4DFmode has
43601 4567 and ABCD operands.
43602 Ignore AB67, as 67 is already in the second lane
43603 of the first operand. */
43604 if ((perm & 0xc) == (3 << 2)) continue;
43605 /* And 45CD, as 45 is in the first lane of the first
43606 operand. */
43607 if ((perm & 3) == 2) continue;
43608 /* And 0123, as then the vperm2[fi]128 doesn't change
43609 anything on the original 0123 first operand. */
43610 if ((perm & 0xf) == (1 << 2)) continue;
43613 for (i = 0; i < nelt; i++)
43615 j = d->perm[i] / nelt2;
43616 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43617 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43618 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43619 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43620 else
43621 break;
43624 if (i == nelt)
43626 start_sequence ();
43627 ok = expand_vec_perm_1 (&dsecond);
43628 end_sequence ();
43630 else
43631 ok = false;
43633 if (ok)
43635 if (d->testing_p)
43636 return true;
43638 /* Found a usable second shuffle. dfirst will be
43639 vperm2f128 on d->op0 and d->op1. */
43640 dsecond.testing_p = false;
43641 dfirst = *d;
43642 dfirst.target = gen_reg_rtx (d->vmode);
43643 for (i = 0; i < nelt; i++)
43644 dfirst.perm[i] = (i & (nelt2 - 1))
43645 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43647 ok = expand_vec_perm_1 (&dfirst);
43648 gcc_assert (ok);
43650 /* And dsecond is some single insn shuffle, taking
43651 d->op0 and result of vperm2f128 (if perm < 16) or
43652 d->op1 and result of vperm2f128 (otherwise). */
43653 dsecond.op1 = dfirst.target;
43654 if (perm >= 16)
43655 dsecond.op0 = dfirst.op1;
43657 ok = expand_vec_perm_1 (&dsecond);
43658 gcc_assert (ok);
43660 return true;
43663 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43664 if (d->one_operand_p)
43665 return false;
43668 return false;
43671 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43672 a two vector permutation using 2 intra-lane interleave insns
43673 and cross-lane shuffle for 32-byte vectors. */
43675 static bool
43676 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43678 unsigned i, nelt;
43679 rtx (*gen) (rtx, rtx, rtx);
43681 if (d->one_operand_p)
43682 return false;
43683 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43685 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43687 else
43688 return false;
43690 nelt = d->nelt;
43691 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43692 return false;
43693 for (i = 0; i < nelt; i += 2)
43694 if (d->perm[i] != d->perm[0] + i / 2
43695 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43696 return false;
43698 if (d->testing_p)
43699 return true;
43701 switch (d->vmode)
43703 case V32QImode:
43704 if (d->perm[0])
43705 gen = gen_vec_interleave_highv32qi;
43706 else
43707 gen = gen_vec_interleave_lowv32qi;
43708 break;
43709 case V16HImode:
43710 if (d->perm[0])
43711 gen = gen_vec_interleave_highv16hi;
43712 else
43713 gen = gen_vec_interleave_lowv16hi;
43714 break;
43715 case V8SImode:
43716 if (d->perm[0])
43717 gen = gen_vec_interleave_highv8si;
43718 else
43719 gen = gen_vec_interleave_lowv8si;
43720 break;
43721 case V4DImode:
43722 if (d->perm[0])
43723 gen = gen_vec_interleave_highv4di;
43724 else
43725 gen = gen_vec_interleave_lowv4di;
43726 break;
43727 case V8SFmode:
43728 if (d->perm[0])
43729 gen = gen_vec_interleave_highv8sf;
43730 else
43731 gen = gen_vec_interleave_lowv8sf;
43732 break;
43733 case V4DFmode:
43734 if (d->perm[0])
43735 gen = gen_vec_interleave_highv4df;
43736 else
43737 gen = gen_vec_interleave_lowv4df;
43738 break;
43739 default:
43740 gcc_unreachable ();
43743 emit_insn (gen (d->target, d->op0, d->op1));
43744 return true;
43747 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43748 a single vector permutation using a single intra-lane vector
43749 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43750 the non-swapped and swapped vectors together. */
43752 static bool
43753 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43755 struct expand_vec_perm_d dfirst, dsecond;
43756 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43757 rtx seq;
43758 bool ok;
43759 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43761 if (!TARGET_AVX
43762 || TARGET_AVX2
43763 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43764 || !d->one_operand_p)
43765 return false;
43767 dfirst = *d;
43768 for (i = 0; i < nelt; i++)
43769 dfirst.perm[i] = 0xff;
43770 for (i = 0, msk = 0; i < nelt; i++)
43772 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43773 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43774 return false;
43775 dfirst.perm[j] = d->perm[i];
43776 if (j != i)
43777 msk |= (1 << i);
43779 for (i = 0; i < nelt; i++)
43780 if (dfirst.perm[i] == 0xff)
43781 dfirst.perm[i] = i;
43783 if (!d->testing_p)
43784 dfirst.target = gen_reg_rtx (dfirst.vmode);
43786 start_sequence ();
43787 ok = expand_vec_perm_1 (&dfirst);
43788 seq = get_insns ();
43789 end_sequence ();
43791 if (!ok)
43792 return false;
43794 if (d->testing_p)
43795 return true;
43797 emit_insn (seq);
43799 dsecond = *d;
43800 dsecond.op0 = dfirst.target;
43801 dsecond.op1 = dfirst.target;
43802 dsecond.one_operand_p = true;
43803 dsecond.target = gen_reg_rtx (dsecond.vmode);
43804 for (i = 0; i < nelt; i++)
43805 dsecond.perm[i] = i ^ nelt2;
43807 ok = expand_vec_perm_1 (&dsecond);
43808 gcc_assert (ok);
43810 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43811 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43812 return true;
43815 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43816 permutation using two vperm2f128, followed by a vshufpd insn blending
43817 the two vectors together. */
43819 static bool
43820 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43822 struct expand_vec_perm_d dfirst, dsecond, dthird;
43823 bool ok;
43825 if (!TARGET_AVX || (d->vmode != V4DFmode))
43826 return false;
43828 if (d->testing_p)
43829 return true;
43831 dfirst = *d;
43832 dsecond = *d;
43833 dthird = *d;
43835 dfirst.perm[0] = (d->perm[0] & ~1);
43836 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43837 dfirst.perm[2] = (d->perm[2] & ~1);
43838 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43839 dsecond.perm[0] = (d->perm[1] & ~1);
43840 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43841 dsecond.perm[2] = (d->perm[3] & ~1);
43842 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43843 dthird.perm[0] = (d->perm[0] % 2);
43844 dthird.perm[1] = (d->perm[1] % 2) + 4;
43845 dthird.perm[2] = (d->perm[2] % 2) + 2;
43846 dthird.perm[3] = (d->perm[3] % 2) + 6;
43848 dfirst.target = gen_reg_rtx (dfirst.vmode);
43849 dsecond.target = gen_reg_rtx (dsecond.vmode);
43850 dthird.op0 = dfirst.target;
43851 dthird.op1 = dsecond.target;
43852 dthird.one_operand_p = false;
43854 canonicalize_perm (&dfirst);
43855 canonicalize_perm (&dsecond);
43857 ok = expand_vec_perm_1 (&dfirst)
43858 && expand_vec_perm_1 (&dsecond)
43859 && expand_vec_perm_1 (&dthird);
43861 gcc_assert (ok);
43863 return true;
43866 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43867 permutation with two pshufb insns and an ior. We should have already
43868 failed all two instruction sequences. */
43870 static bool
43871 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43873 rtx rperm[2][16], vperm, l, h, op, m128;
43874 unsigned int i, nelt, eltsz;
43876 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43877 return false;
43878 gcc_assert (!d->one_operand_p);
43880 if (d->testing_p)
43881 return true;
43883 nelt = d->nelt;
43884 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43886 /* Generate two permutation masks. If the required element is within
43887 the given vector it is shuffled into the proper lane. If the required
43888 element is in the other vector, force a zero into the lane by setting
43889 bit 7 in the permutation mask. */
43890 m128 = GEN_INT (-128);
43891 for (i = 0; i < nelt; ++i)
43893 unsigned j, e = d->perm[i];
43894 unsigned which = (e >= nelt);
43895 if (e >= nelt)
43896 e -= nelt;
43898 for (j = 0; j < eltsz; ++j)
43900 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43901 rperm[1-which][i*eltsz + j] = m128;
43905 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43906 vperm = force_reg (V16QImode, vperm);
43908 l = gen_reg_rtx (V16QImode);
43909 op = gen_lowpart (V16QImode, d->op0);
43910 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43912 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43913 vperm = force_reg (V16QImode, vperm);
43915 h = gen_reg_rtx (V16QImode);
43916 op = gen_lowpart (V16QImode, d->op1);
43917 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43919 op = d->target;
43920 if (d->vmode != V16QImode)
43921 op = gen_reg_rtx (V16QImode);
43922 emit_insn (gen_iorv16qi3 (op, l, h));
43923 if (op != d->target)
43924 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43926 return true;
43929 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43930 with two vpshufb insns, vpermq and vpor. We should have already failed
43931 all two or three instruction sequences. */
43933 static bool
43934 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43936 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43937 unsigned int i, nelt, eltsz;
43939 if (!TARGET_AVX2
43940 || !d->one_operand_p
43941 || (d->vmode != V32QImode && d->vmode != V16HImode))
43942 return false;
43944 if (d->testing_p)
43945 return true;
43947 nelt = d->nelt;
43948 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43950 /* Generate two permutation masks. If the required element is within
43951 the same lane, it is shuffled in. If the required element from the
43952 other lane, force a zero by setting bit 7 in the permutation mask.
43953 In the other mask the mask has non-negative elements if element
43954 is requested from the other lane, but also moved to the other lane,
43955 so that the result of vpshufb can have the two V2TImode halves
43956 swapped. */
43957 m128 = GEN_INT (-128);
43958 for (i = 0; i < nelt; ++i)
43960 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43961 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43963 for (j = 0; j < eltsz; ++j)
43965 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43966 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43970 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43971 vperm = force_reg (V32QImode, vperm);
43973 h = gen_reg_rtx (V32QImode);
43974 op = gen_lowpart (V32QImode, d->op0);
43975 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43977 /* Swap the 128-byte lanes of h into hp. */
43978 hp = gen_reg_rtx (V4DImode);
43979 op = gen_lowpart (V4DImode, h);
43980 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43981 const1_rtx));
43983 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43984 vperm = force_reg (V32QImode, vperm);
43986 l = gen_reg_rtx (V32QImode);
43987 op = gen_lowpart (V32QImode, d->op0);
43988 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43990 op = d->target;
43991 if (d->vmode != V32QImode)
43992 op = gen_reg_rtx (V32QImode);
43993 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43994 if (op != d->target)
43995 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43997 return true;
44000 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44001 and extract-odd permutations of two V32QImode and V16QImode operand
44002 with two vpshufb insns, vpor and vpermq. We should have already
44003 failed all two or three instruction sequences. */
44005 static bool
44006 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44008 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44009 unsigned int i, nelt, eltsz;
44011 if (!TARGET_AVX2
44012 || d->one_operand_p
44013 || (d->vmode != V32QImode && d->vmode != V16HImode))
44014 return false;
44016 for (i = 0; i < d->nelt; ++i)
44017 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44018 return false;
44020 if (d->testing_p)
44021 return true;
44023 nelt = d->nelt;
44024 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44026 /* Generate two permutation masks. In the first permutation mask
44027 the first quarter will contain indexes for the first half
44028 of the op0, the second quarter will contain bit 7 set, third quarter
44029 will contain indexes for the second half of the op0 and the
44030 last quarter bit 7 set. In the second permutation mask
44031 the first quarter will contain bit 7 set, the second quarter
44032 indexes for the first half of the op1, the third quarter bit 7 set
44033 and last quarter indexes for the second half of the op1.
44034 I.e. the first mask e.g. for V32QImode extract even will be:
44035 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44036 (all values masked with 0xf except for -128) and second mask
44037 for extract even will be
44038 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44039 m128 = GEN_INT (-128);
44040 for (i = 0; i < nelt; ++i)
44042 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44043 unsigned which = d->perm[i] >= nelt;
44044 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44046 for (j = 0; j < eltsz; ++j)
44048 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44049 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44053 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44054 vperm = force_reg (V32QImode, vperm);
44056 l = gen_reg_rtx (V32QImode);
44057 op = gen_lowpart (V32QImode, d->op0);
44058 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44060 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44061 vperm = force_reg (V32QImode, vperm);
44063 h = gen_reg_rtx (V32QImode);
44064 op = gen_lowpart (V32QImode, d->op1);
44065 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44067 ior = gen_reg_rtx (V32QImode);
44068 emit_insn (gen_iorv32qi3 (ior, l, h));
44070 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44071 op = gen_reg_rtx (V4DImode);
44072 ior = gen_lowpart (V4DImode, ior);
44073 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44074 const1_rtx, GEN_INT (3)));
44075 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44077 return true;
44080 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44081 and extract-odd permutations. */
44083 static bool
44084 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44086 rtx t1, t2, t3, t4, t5;
44088 switch (d->vmode)
44090 case V4DFmode:
44091 if (d->testing_p)
44092 break;
44093 t1 = gen_reg_rtx (V4DFmode);
44094 t2 = gen_reg_rtx (V4DFmode);
44096 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44097 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44098 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44100 /* Now an unpck[lh]pd will produce the result required. */
44101 if (odd)
44102 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44103 else
44104 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44105 emit_insn (t3);
44106 break;
44108 case V8SFmode:
44110 int mask = odd ? 0xdd : 0x88;
44112 if (d->testing_p)
44113 break;
44114 t1 = gen_reg_rtx (V8SFmode);
44115 t2 = gen_reg_rtx (V8SFmode);
44116 t3 = gen_reg_rtx (V8SFmode);
44118 /* Shuffle within the 128-bit lanes to produce:
44119 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44120 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44121 GEN_INT (mask)));
44123 /* Shuffle the lanes around to produce:
44124 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44125 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44126 GEN_INT (0x3)));
44128 /* Shuffle within the 128-bit lanes to produce:
44129 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44130 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44132 /* Shuffle within the 128-bit lanes to produce:
44133 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44134 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44136 /* Shuffle the lanes around to produce:
44137 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44138 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44139 GEN_INT (0x20)));
44141 break;
44143 case V2DFmode:
44144 case V4SFmode:
44145 case V2DImode:
44146 case V4SImode:
44147 /* These are always directly implementable by expand_vec_perm_1. */
44148 gcc_unreachable ();
44150 case V8HImode:
44151 if (TARGET_SSSE3)
44152 return expand_vec_perm_pshufb2 (d);
44153 else
44155 if (d->testing_p)
44156 break;
44157 /* We need 2*log2(N)-1 operations to achieve odd/even
44158 with interleave. */
44159 t1 = gen_reg_rtx (V8HImode);
44160 t2 = gen_reg_rtx (V8HImode);
44161 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44162 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44163 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44164 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44165 if (odd)
44166 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44167 else
44168 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44169 emit_insn (t3);
44171 break;
44173 case V16QImode:
44174 if (TARGET_SSSE3)
44175 return expand_vec_perm_pshufb2 (d);
44176 else
44178 if (d->testing_p)
44179 break;
44180 t1 = gen_reg_rtx (V16QImode);
44181 t2 = gen_reg_rtx (V16QImode);
44182 t3 = gen_reg_rtx (V16QImode);
44183 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44184 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44185 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44186 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44187 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44188 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44189 if (odd)
44190 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44191 else
44192 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44193 emit_insn (t3);
44195 break;
44197 case V16HImode:
44198 case V32QImode:
44199 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44201 case V4DImode:
44202 if (!TARGET_AVX2)
44204 struct expand_vec_perm_d d_copy = *d;
44205 d_copy.vmode = V4DFmode;
44206 if (d->testing_p)
44207 d_copy.target = gen_lowpart (V4DFmode, d->target);
44208 else
44209 d_copy.target = gen_reg_rtx (V4DFmode);
44210 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44211 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44212 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44214 if (!d->testing_p)
44215 emit_move_insn (d->target,
44216 gen_lowpart (V4DImode, d_copy.target));
44217 return true;
44219 return false;
44222 if (d->testing_p)
44223 break;
44225 t1 = gen_reg_rtx (V4DImode);
44226 t2 = gen_reg_rtx (V4DImode);
44228 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44229 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44230 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44232 /* Now an vpunpck[lh]qdq will produce the result required. */
44233 if (odd)
44234 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44235 else
44236 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44237 emit_insn (t3);
44238 break;
44240 case V8SImode:
44241 if (!TARGET_AVX2)
44243 struct expand_vec_perm_d d_copy = *d;
44244 d_copy.vmode = V8SFmode;
44245 if (d->testing_p)
44246 d_copy.target = gen_lowpart (V8SFmode, d->target);
44247 else
44248 d_copy.target = gen_reg_rtx (V8SFmode);
44249 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44250 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44251 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44253 if (!d->testing_p)
44254 emit_move_insn (d->target,
44255 gen_lowpart (V8SImode, d_copy.target));
44256 return true;
44258 return false;
44261 if (d->testing_p)
44262 break;
44264 t1 = gen_reg_rtx (V8SImode);
44265 t2 = gen_reg_rtx (V8SImode);
44266 t3 = gen_reg_rtx (V4DImode);
44267 t4 = gen_reg_rtx (V4DImode);
44268 t5 = gen_reg_rtx (V4DImode);
44270 /* Shuffle the lanes around into
44271 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44272 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44273 gen_lowpart (V4DImode, d->op1),
44274 GEN_INT (0x20)));
44275 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44276 gen_lowpart (V4DImode, d->op1),
44277 GEN_INT (0x31)));
44279 /* Swap the 2nd and 3rd position in each lane into
44280 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44281 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44282 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44283 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44284 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44286 /* Now an vpunpck[lh]qdq will produce
44287 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44288 if (odd)
44289 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44290 gen_lowpart (V4DImode, t2));
44291 else
44292 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44293 gen_lowpart (V4DImode, t2));
44294 emit_insn (t3);
44295 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44296 break;
44298 default:
44299 gcc_unreachable ();
44302 return true;
44305 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44306 extract-even and extract-odd permutations. */
44308 static bool
44309 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44311 unsigned i, odd, nelt = d->nelt;
44313 odd = d->perm[0];
44314 if (odd != 0 && odd != 1)
44315 return false;
44317 for (i = 1; i < nelt; ++i)
44318 if (d->perm[i] != 2 * i + odd)
44319 return false;
44321 return expand_vec_perm_even_odd_1 (d, odd);
44324 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44325 permutations. We assume that expand_vec_perm_1 has already failed. */
44327 static bool
44328 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44330 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44331 enum machine_mode vmode = d->vmode;
44332 unsigned char perm2[4];
44333 rtx op0 = d->op0, dest;
44334 bool ok;
44336 switch (vmode)
44338 case V4DFmode:
44339 case V8SFmode:
44340 /* These are special-cased in sse.md so that we can optionally
44341 use the vbroadcast instruction. They expand to two insns
44342 if the input happens to be in a register. */
44343 gcc_unreachable ();
44345 case V2DFmode:
44346 case V2DImode:
44347 case V4SFmode:
44348 case V4SImode:
44349 /* These are always implementable using standard shuffle patterns. */
44350 gcc_unreachable ();
44352 case V8HImode:
44353 case V16QImode:
44354 /* These can be implemented via interleave. We save one insn by
44355 stopping once we have promoted to V4SImode and then use pshufd. */
44356 if (d->testing_p)
44357 return true;
44360 rtx dest;
44361 rtx (*gen) (rtx, rtx, rtx)
44362 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44363 : gen_vec_interleave_lowv8hi;
44365 if (elt >= nelt2)
44367 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44368 : gen_vec_interleave_highv8hi;
44369 elt -= nelt2;
44371 nelt2 /= 2;
44373 dest = gen_reg_rtx (vmode);
44374 emit_insn (gen (dest, op0, op0));
44375 vmode = get_mode_wider_vector (vmode);
44376 op0 = gen_lowpart (vmode, dest);
44378 while (vmode != V4SImode);
44380 memset (perm2, elt, 4);
44381 dest = gen_reg_rtx (V4SImode);
44382 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44383 gcc_assert (ok);
44384 if (!d->testing_p)
44385 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44386 return true;
44388 case V32QImode:
44389 case V16HImode:
44390 case V8SImode:
44391 case V4DImode:
44392 /* For AVX2 broadcasts of the first element vpbroadcast* or
44393 vpermq should be used by expand_vec_perm_1. */
44394 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44395 return false;
44397 default:
44398 gcc_unreachable ();
44402 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44403 broadcast permutations. */
44405 static bool
44406 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44408 unsigned i, elt, nelt = d->nelt;
44410 if (!d->one_operand_p)
44411 return false;
44413 elt = d->perm[0];
44414 for (i = 1; i < nelt; ++i)
44415 if (d->perm[i] != elt)
44416 return false;
44418 return expand_vec_perm_broadcast_1 (d);
44421 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44422 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44423 all the shorter instruction sequences. */
44425 static bool
44426 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44428 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44429 unsigned int i, nelt, eltsz;
44430 bool used[4];
44432 if (!TARGET_AVX2
44433 || d->one_operand_p
44434 || (d->vmode != V32QImode && d->vmode != V16HImode))
44435 return false;
44437 if (d->testing_p)
44438 return true;
44440 nelt = d->nelt;
44441 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44443 /* Generate 4 permutation masks. If the required element is within
44444 the same lane, it is shuffled in. If the required element from the
44445 other lane, force a zero by setting bit 7 in the permutation mask.
44446 In the other mask the mask has non-negative elements if element
44447 is requested from the other lane, but also moved to the other lane,
44448 so that the result of vpshufb can have the two V2TImode halves
44449 swapped. */
44450 m128 = GEN_INT (-128);
44451 for (i = 0; i < 32; ++i)
44453 rperm[0][i] = m128;
44454 rperm[1][i] = m128;
44455 rperm[2][i] = m128;
44456 rperm[3][i] = m128;
44458 used[0] = false;
44459 used[1] = false;
44460 used[2] = false;
44461 used[3] = false;
44462 for (i = 0; i < nelt; ++i)
44464 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44465 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44466 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44468 for (j = 0; j < eltsz; ++j)
44469 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44470 used[which] = true;
44473 for (i = 0; i < 2; ++i)
44475 if (!used[2 * i + 1])
44477 h[i] = NULL_RTX;
44478 continue;
44480 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44481 gen_rtvec_v (32, rperm[2 * i + 1]));
44482 vperm = force_reg (V32QImode, vperm);
44483 h[i] = gen_reg_rtx (V32QImode);
44484 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44485 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44488 /* Swap the 128-byte lanes of h[X]. */
44489 for (i = 0; i < 2; ++i)
44491 if (h[i] == NULL_RTX)
44492 continue;
44493 op = gen_reg_rtx (V4DImode);
44494 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44495 const2_rtx, GEN_INT (3), const0_rtx,
44496 const1_rtx));
44497 h[i] = gen_lowpart (V32QImode, op);
44500 for (i = 0; i < 2; ++i)
44502 if (!used[2 * i])
44504 l[i] = NULL_RTX;
44505 continue;
44507 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44508 vperm = force_reg (V32QImode, vperm);
44509 l[i] = gen_reg_rtx (V32QImode);
44510 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44511 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44514 for (i = 0; i < 2; ++i)
44516 if (h[i] && l[i])
44518 op = gen_reg_rtx (V32QImode);
44519 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44520 l[i] = op;
44522 else if (h[i])
44523 l[i] = h[i];
44526 gcc_assert (l[0] && l[1]);
44527 op = d->target;
44528 if (d->vmode != V32QImode)
44529 op = gen_reg_rtx (V32QImode);
44530 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44531 if (op != d->target)
44532 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44533 return true;
44536 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44537 With all of the interface bits taken care of, perform the expansion
44538 in D and return true on success. */
44540 static bool
44541 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44543 /* Try a single instruction expansion. */
44544 if (expand_vec_perm_1 (d))
44545 return true;
44547 /* Try sequences of two instructions. */
44549 if (expand_vec_perm_pshuflw_pshufhw (d))
44550 return true;
44552 if (expand_vec_perm_palignr (d))
44553 return true;
44555 if (expand_vec_perm_interleave2 (d))
44556 return true;
44558 if (expand_vec_perm_broadcast (d))
44559 return true;
44561 if (expand_vec_perm_vpermq_perm_1 (d))
44562 return true;
44564 if (expand_vec_perm_vperm2f128 (d))
44565 return true;
44567 /* Try sequences of three instructions. */
44569 if (expand_vec_perm_2vperm2f128_vshuf (d))
44570 return true;
44572 if (expand_vec_perm_pshufb2 (d))
44573 return true;
44575 if (expand_vec_perm_interleave3 (d))
44576 return true;
44578 if (expand_vec_perm_vperm2f128_vblend (d))
44579 return true;
44581 /* Try sequences of four instructions. */
44583 if (expand_vec_perm_vpshufb2_vpermq (d))
44584 return true;
44586 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44587 return true;
44589 /* ??? Look for narrow permutations whose element orderings would
44590 allow the promotion to a wider mode. */
44592 /* ??? Look for sequences of interleave or a wider permute that place
44593 the data into the correct lanes for a half-vector shuffle like
44594 pshuf[lh]w or vpermilps. */
44596 /* ??? Look for sequences of interleave that produce the desired results.
44597 The combinatorics of punpck[lh] get pretty ugly... */
44599 if (expand_vec_perm_even_odd (d))
44600 return true;
44602 /* Even longer sequences. */
44603 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44604 return true;
44606 return false;
44609 /* If a permutation only uses one operand, make it clear. Returns true
44610 if the permutation references both operands. */
44612 static bool
44613 canonicalize_perm (struct expand_vec_perm_d *d)
44615 int i, which, nelt = d->nelt;
44617 for (i = which = 0; i < nelt; ++i)
44618 which |= (d->perm[i] < nelt ? 1 : 2);
44620 d->one_operand_p = true;
44621 switch (which)
44623 default:
44624 gcc_unreachable();
44626 case 3:
44627 if (!rtx_equal_p (d->op0, d->op1))
44629 d->one_operand_p = false;
44630 break;
44632 /* The elements of PERM do not suggest that only the first operand
44633 is used, but both operands are identical. Allow easier matching
44634 of the permutation by folding the permutation into the single
44635 input vector. */
44636 /* FALLTHRU */
44638 case 2:
44639 for (i = 0; i < nelt; ++i)
44640 d->perm[i] &= nelt - 1;
44641 d->op0 = d->op1;
44642 break;
44644 case 1:
44645 d->op1 = d->op0;
44646 break;
44649 return (which == 3);
44652 bool
44653 ix86_expand_vec_perm_const (rtx operands[4])
44655 struct expand_vec_perm_d d;
44656 unsigned char perm[MAX_VECT_LEN];
44657 int i, nelt;
44658 bool two_args;
44659 rtx sel;
44661 d.target = operands[0];
44662 d.op0 = operands[1];
44663 d.op1 = operands[2];
44664 sel = operands[3];
44666 d.vmode = GET_MODE (d.target);
44667 gcc_assert (VECTOR_MODE_P (d.vmode));
44668 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44669 d.testing_p = false;
44671 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44672 gcc_assert (XVECLEN (sel, 0) == nelt);
44673 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44675 for (i = 0; i < nelt; ++i)
44677 rtx e = XVECEXP (sel, 0, i);
44678 int ei = INTVAL (e) & (2 * nelt - 1);
44679 d.perm[i] = ei;
44680 perm[i] = ei;
44683 two_args = canonicalize_perm (&d);
44685 if (ix86_expand_vec_perm_const_1 (&d))
44686 return true;
44688 /* If the selector says both arguments are needed, but the operands are the
44689 same, the above tried to expand with one_operand_p and flattened selector.
44690 If that didn't work, retry without one_operand_p; we succeeded with that
44691 during testing. */
44692 if (two_args && d.one_operand_p)
44694 d.one_operand_p = false;
44695 memcpy (d.perm, perm, sizeof (perm));
44696 return ix86_expand_vec_perm_const_1 (&d);
44699 return false;
44702 /* Implement targetm.vectorize.vec_perm_const_ok. */
44704 static bool
44705 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44706 const unsigned char *sel)
44708 struct expand_vec_perm_d d;
44709 unsigned int i, nelt, which;
44710 bool ret;
44712 d.vmode = vmode;
44713 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44714 d.testing_p = true;
44716 /* Given sufficient ISA support we can just return true here
44717 for selected vector modes. */
44718 if (d.vmode == V16SImode || d.vmode == V16SFmode
44719 || d.vmode == V8DFmode || d.vmode == V8DImode)
44720 /* All implementable with a single vpermi2 insn. */
44721 return true;
44722 if (GET_MODE_SIZE (d.vmode) == 16)
44724 /* All implementable with a single vpperm insn. */
44725 if (TARGET_XOP)
44726 return true;
44727 /* All implementable with 2 pshufb + 1 ior. */
44728 if (TARGET_SSSE3)
44729 return true;
44730 /* All implementable with shufpd or unpck[lh]pd. */
44731 if (d.nelt == 2)
44732 return true;
44735 /* Extract the values from the vector CST into the permutation
44736 array in D. */
44737 memcpy (d.perm, sel, nelt);
44738 for (i = which = 0; i < nelt; ++i)
44740 unsigned char e = d.perm[i];
44741 gcc_assert (e < 2 * nelt);
44742 which |= (e < nelt ? 1 : 2);
44745 /* For all elements from second vector, fold the elements to first. */
44746 if (which == 2)
44747 for (i = 0; i < nelt; ++i)
44748 d.perm[i] -= nelt;
44750 /* Check whether the mask can be applied to the vector type. */
44751 d.one_operand_p = (which != 3);
44753 /* Implementable with shufps or pshufd. */
44754 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44755 return true;
44757 /* Otherwise we have to go through the motions and see if we can
44758 figure out how to generate the requested permutation. */
44759 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44760 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44761 if (!d.one_operand_p)
44762 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44764 start_sequence ();
44765 ret = ix86_expand_vec_perm_const_1 (&d);
44766 end_sequence ();
44768 return ret;
44771 void
44772 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44774 struct expand_vec_perm_d d;
44775 unsigned i, nelt;
44777 d.target = targ;
44778 d.op0 = op0;
44779 d.op1 = op1;
44780 d.vmode = GET_MODE (targ);
44781 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44782 d.one_operand_p = false;
44783 d.testing_p = false;
44785 for (i = 0; i < nelt; ++i)
44786 d.perm[i] = i * 2 + odd;
44788 /* We'll either be able to implement the permutation directly... */
44789 if (expand_vec_perm_1 (&d))
44790 return;
44792 /* ... or we use the special-case patterns. */
44793 expand_vec_perm_even_odd_1 (&d, odd);
44796 static void
44797 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44799 struct expand_vec_perm_d d;
44800 unsigned i, nelt, base;
44801 bool ok;
44803 d.target = targ;
44804 d.op0 = op0;
44805 d.op1 = op1;
44806 d.vmode = GET_MODE (targ);
44807 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44808 d.one_operand_p = false;
44809 d.testing_p = false;
44811 base = high_p ? nelt / 2 : 0;
44812 for (i = 0; i < nelt / 2; ++i)
44814 d.perm[i * 2] = i + base;
44815 d.perm[i * 2 + 1] = i + base + nelt;
44818 /* Note that for AVX this isn't one instruction. */
44819 ok = ix86_expand_vec_perm_const_1 (&d);
44820 gcc_assert (ok);
44824 /* Expand a vector operation CODE for a V*QImode in terms of the
44825 same operation on V*HImode. */
44827 void
44828 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44830 enum machine_mode qimode = GET_MODE (dest);
44831 enum machine_mode himode;
44832 rtx (*gen_il) (rtx, rtx, rtx);
44833 rtx (*gen_ih) (rtx, rtx, rtx);
44834 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44835 struct expand_vec_perm_d d;
44836 bool ok, full_interleave;
44837 bool uns_p = false;
44838 int i;
44840 switch (qimode)
44842 case V16QImode:
44843 himode = V8HImode;
44844 gen_il = gen_vec_interleave_lowv16qi;
44845 gen_ih = gen_vec_interleave_highv16qi;
44846 break;
44847 case V32QImode:
44848 himode = V16HImode;
44849 gen_il = gen_avx2_interleave_lowv32qi;
44850 gen_ih = gen_avx2_interleave_highv32qi;
44851 break;
44852 default:
44853 gcc_unreachable ();
44856 op2_l = op2_h = op2;
44857 switch (code)
44859 case MULT:
44860 /* Unpack data such that we've got a source byte in each low byte of
44861 each word. We don't care what goes into the high byte of each word.
44862 Rather than trying to get zero in there, most convenient is to let
44863 it be a copy of the low byte. */
44864 op2_l = gen_reg_rtx (qimode);
44865 op2_h = gen_reg_rtx (qimode);
44866 emit_insn (gen_il (op2_l, op2, op2));
44867 emit_insn (gen_ih (op2_h, op2, op2));
44868 /* FALLTHRU */
44870 op1_l = gen_reg_rtx (qimode);
44871 op1_h = gen_reg_rtx (qimode);
44872 emit_insn (gen_il (op1_l, op1, op1));
44873 emit_insn (gen_ih (op1_h, op1, op1));
44874 full_interleave = qimode == V16QImode;
44875 break;
44877 case ASHIFT:
44878 case LSHIFTRT:
44879 uns_p = true;
44880 /* FALLTHRU */
44881 case ASHIFTRT:
44882 op1_l = gen_reg_rtx (himode);
44883 op1_h = gen_reg_rtx (himode);
44884 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44885 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44886 full_interleave = true;
44887 break;
44888 default:
44889 gcc_unreachable ();
44892 /* Perform the operation. */
44893 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44894 1, OPTAB_DIRECT);
44895 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44896 1, OPTAB_DIRECT);
44897 gcc_assert (res_l && res_h);
44899 /* Merge the data back into the right place. */
44900 d.target = dest;
44901 d.op0 = gen_lowpart (qimode, res_l);
44902 d.op1 = gen_lowpart (qimode, res_h);
44903 d.vmode = qimode;
44904 d.nelt = GET_MODE_NUNITS (qimode);
44905 d.one_operand_p = false;
44906 d.testing_p = false;
44908 if (full_interleave)
44910 /* For SSE2, we used an full interleave, so the desired
44911 results are in the even elements. */
44912 for (i = 0; i < 32; ++i)
44913 d.perm[i] = i * 2;
44915 else
44917 /* For AVX, the interleave used above was not cross-lane. So the
44918 extraction is evens but with the second and third quarter swapped.
44919 Happily, that is even one insn shorter than even extraction. */
44920 for (i = 0; i < 32; ++i)
44921 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44924 ok = ix86_expand_vec_perm_const_1 (&d);
44925 gcc_assert (ok);
44927 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44928 gen_rtx_fmt_ee (code, qimode, op1, op2));
44931 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44932 if op is CONST_VECTOR with all odd elements equal to their
44933 preceding element. */
44935 static bool
44936 const_vector_equal_evenodd_p (rtx op)
44938 enum machine_mode mode = GET_MODE (op);
44939 int i, nunits = GET_MODE_NUNITS (mode);
44940 if (GET_CODE (op) != CONST_VECTOR
44941 || nunits != CONST_VECTOR_NUNITS (op))
44942 return false;
44943 for (i = 0; i < nunits; i += 2)
44944 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44945 return false;
44946 return true;
44949 void
44950 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44951 bool uns_p, bool odd_p)
44953 enum machine_mode mode = GET_MODE (op1);
44954 enum machine_mode wmode = GET_MODE (dest);
44955 rtx x;
44956 rtx orig_op1 = op1, orig_op2 = op2;
44958 if (!nonimmediate_operand (op1, mode))
44959 op1 = force_reg (mode, op1);
44960 if (!nonimmediate_operand (op2, mode))
44961 op2 = force_reg (mode, op2);
44963 /* We only play even/odd games with vectors of SImode. */
44964 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44966 /* If we're looking for the odd results, shift those members down to
44967 the even slots. For some cpus this is faster than a PSHUFD. */
44968 if (odd_p)
44970 /* For XOP use vpmacsdqh, but only for smult, as it is only
44971 signed. */
44972 if (TARGET_XOP && mode == V4SImode && !uns_p)
44974 x = force_reg (wmode, CONST0_RTX (wmode));
44975 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44976 return;
44979 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44980 if (!const_vector_equal_evenodd_p (orig_op1))
44981 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44982 x, NULL, 1, OPTAB_DIRECT);
44983 if (!const_vector_equal_evenodd_p (orig_op2))
44984 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44985 x, NULL, 1, OPTAB_DIRECT);
44986 op1 = gen_lowpart (mode, op1);
44987 op2 = gen_lowpart (mode, op2);
44990 if (mode == V16SImode)
44992 if (uns_p)
44993 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44994 else
44995 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44997 else if (mode == V8SImode)
44999 if (uns_p)
45000 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45001 else
45002 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45004 else if (uns_p)
45005 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45006 else if (TARGET_SSE4_1)
45007 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45008 else
45010 rtx s1, s2, t0, t1, t2;
45012 /* The easiest way to implement this without PMULDQ is to go through
45013 the motions as if we are performing a full 64-bit multiply. With
45014 the exception that we need to do less shuffling of the elements. */
45016 /* Compute the sign-extension, aka highparts, of the two operands. */
45017 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45018 op1, pc_rtx, pc_rtx);
45019 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45020 op2, pc_rtx, pc_rtx);
45022 /* Multiply LO(A) * HI(B), and vice-versa. */
45023 t1 = gen_reg_rtx (wmode);
45024 t2 = gen_reg_rtx (wmode);
45025 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45026 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45028 /* Multiply LO(A) * LO(B). */
45029 t0 = gen_reg_rtx (wmode);
45030 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45032 /* Combine and shift the highparts into place. */
45033 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45034 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45035 1, OPTAB_DIRECT);
45037 /* Combine high and low parts. */
45038 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45039 return;
45041 emit_insn (x);
45044 void
45045 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45046 bool uns_p, bool high_p)
45048 enum machine_mode wmode = GET_MODE (dest);
45049 enum machine_mode mode = GET_MODE (op1);
45050 rtx t1, t2, t3, t4, mask;
45052 switch (mode)
45054 case V4SImode:
45055 t1 = gen_reg_rtx (mode);
45056 t2 = gen_reg_rtx (mode);
45057 if (TARGET_XOP && !uns_p)
45059 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45060 shuffle the elements once so that all elements are in the right
45061 place for immediate use: { A C B D }. */
45062 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45063 const1_rtx, GEN_INT (3)));
45064 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45065 const1_rtx, GEN_INT (3)));
45067 else
45069 /* Put the elements into place for the multiply. */
45070 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45071 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45072 high_p = false;
45074 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45075 break;
45077 case V8SImode:
45078 /* Shuffle the elements between the lanes. After this we
45079 have { A B E F | C D G H } for each operand. */
45080 t1 = gen_reg_rtx (V4DImode);
45081 t2 = gen_reg_rtx (V4DImode);
45082 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45083 const0_rtx, const2_rtx,
45084 const1_rtx, GEN_INT (3)));
45085 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45086 const0_rtx, const2_rtx,
45087 const1_rtx, GEN_INT (3)));
45089 /* Shuffle the elements within the lanes. After this we
45090 have { A A B B | C C D D } or { E E F F | G G H H }. */
45091 t3 = gen_reg_rtx (V8SImode);
45092 t4 = gen_reg_rtx (V8SImode);
45093 mask = GEN_INT (high_p
45094 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45095 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45096 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45097 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45099 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45100 break;
45102 case V8HImode:
45103 case V16HImode:
45104 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45105 uns_p, OPTAB_DIRECT);
45106 t2 = expand_binop (mode,
45107 uns_p ? umul_highpart_optab : smul_highpart_optab,
45108 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45109 gcc_assert (t1 && t2);
45111 t3 = gen_reg_rtx (mode);
45112 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45113 emit_move_insn (dest, gen_lowpart (wmode, t3));
45114 break;
45116 case V16QImode:
45117 case V32QImode:
45118 t1 = gen_reg_rtx (wmode);
45119 t2 = gen_reg_rtx (wmode);
45120 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45121 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45123 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45124 break;
45126 default:
45127 gcc_unreachable ();
45131 void
45132 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45134 rtx res_1, res_2, res_3, res_4;
45136 res_1 = gen_reg_rtx (V4SImode);
45137 res_2 = gen_reg_rtx (V4SImode);
45138 res_3 = gen_reg_rtx (V2DImode);
45139 res_4 = gen_reg_rtx (V2DImode);
45140 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45141 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45143 /* Move the results in element 2 down to element 1; we don't care
45144 what goes in elements 2 and 3. Then we can merge the parts
45145 back together with an interleave.
45147 Note that two other sequences were tried:
45148 (1) Use interleaves at the start instead of psrldq, which allows
45149 us to use a single shufps to merge things back at the end.
45150 (2) Use shufps here to combine the two vectors, then pshufd to
45151 put the elements in the correct order.
45152 In both cases the cost of the reformatting stall was too high
45153 and the overall sequence slower. */
45155 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45156 const0_rtx, const2_rtx,
45157 const0_rtx, const0_rtx));
45158 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45159 const0_rtx, const2_rtx,
45160 const0_rtx, const0_rtx));
45161 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45163 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45166 void
45167 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45169 enum machine_mode mode = GET_MODE (op0);
45170 rtx t1, t2, t3, t4, t5, t6;
45172 if (TARGET_XOP && mode == V2DImode)
45174 /* op1: A,B,C,D, op2: E,F,G,H */
45175 op1 = gen_lowpart (V4SImode, op1);
45176 op2 = gen_lowpart (V4SImode, op2);
45178 t1 = gen_reg_rtx (V4SImode);
45179 t2 = gen_reg_rtx (V4SImode);
45180 t3 = gen_reg_rtx (V2DImode);
45181 t4 = gen_reg_rtx (V2DImode);
45183 /* t1: B,A,D,C */
45184 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45185 GEN_INT (1),
45186 GEN_INT (0),
45187 GEN_INT (3),
45188 GEN_INT (2)));
45190 /* t2: (B*E),(A*F),(D*G),(C*H) */
45191 emit_insn (gen_mulv4si3 (t2, t1, op2));
45193 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45194 emit_insn (gen_xop_phadddq (t3, t2));
45196 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45197 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45199 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45200 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45202 else
45204 enum machine_mode nmode;
45205 rtx (*umul) (rtx, rtx, rtx);
45207 if (mode == V2DImode)
45209 umul = gen_vec_widen_umult_even_v4si;
45210 nmode = V4SImode;
45212 else if (mode == V4DImode)
45214 umul = gen_vec_widen_umult_even_v8si;
45215 nmode = V8SImode;
45217 else if (mode == V8DImode)
45219 umul = gen_vec_widen_umult_even_v16si;
45220 nmode = V16SImode;
45222 else
45223 gcc_unreachable ();
45226 /* Multiply low parts. */
45227 t1 = gen_reg_rtx (mode);
45228 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45230 /* Shift input vectors right 32 bits so we can multiply high parts. */
45231 t6 = GEN_INT (32);
45232 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45233 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45235 /* Multiply high parts by low parts. */
45236 t4 = gen_reg_rtx (mode);
45237 t5 = gen_reg_rtx (mode);
45238 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45239 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45241 /* Combine and shift the highparts back. */
45242 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45243 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45245 /* Combine high and low parts. */
45246 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45249 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45250 gen_rtx_MULT (mode, op1, op2));
45253 /* Calculate integer abs() using only SSE2 instructions. */
45255 void
45256 ix86_expand_sse2_abs (rtx target, rtx input)
45258 enum machine_mode mode = GET_MODE (target);
45259 rtx tmp0, tmp1, x;
45261 switch (mode)
45263 /* For 32-bit signed integer X, the best way to calculate the absolute
45264 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45265 case V4SImode:
45266 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45267 GEN_INT (GET_MODE_BITSIZE
45268 (GET_MODE_INNER (mode)) - 1),
45269 NULL, 0, OPTAB_DIRECT);
45270 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45271 NULL, 0, OPTAB_DIRECT);
45272 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45273 target, 0, OPTAB_DIRECT);
45274 break;
45276 /* For 16-bit signed integer X, the best way to calculate the absolute
45277 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45278 case V8HImode:
45279 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45281 x = expand_simple_binop (mode, SMAX, tmp0, input,
45282 target, 0, OPTAB_DIRECT);
45283 break;
45285 /* For 8-bit signed integer X, the best way to calculate the absolute
45286 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45287 as SSE2 provides the PMINUB insn. */
45288 case V16QImode:
45289 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45291 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45292 target, 0, OPTAB_DIRECT);
45293 break;
45295 default:
45296 gcc_unreachable ();
45299 if (x != target)
45300 emit_move_insn (target, x);
45303 /* Expand an insert into a vector register through pinsr insn.
45304 Return true if successful. */
45306 bool
45307 ix86_expand_pinsr (rtx *operands)
45309 rtx dst = operands[0];
45310 rtx src = operands[3];
45312 unsigned int size = INTVAL (operands[1]);
45313 unsigned int pos = INTVAL (operands[2]);
45315 if (GET_CODE (dst) == SUBREG)
45317 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45318 dst = SUBREG_REG (dst);
45321 if (GET_CODE (src) == SUBREG)
45322 src = SUBREG_REG (src);
45324 switch (GET_MODE (dst))
45326 case V16QImode:
45327 case V8HImode:
45328 case V4SImode:
45329 case V2DImode:
45331 enum machine_mode srcmode, dstmode;
45332 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45334 srcmode = mode_for_size (size, MODE_INT, 0);
45336 switch (srcmode)
45338 case QImode:
45339 if (!TARGET_SSE4_1)
45340 return false;
45341 dstmode = V16QImode;
45342 pinsr = gen_sse4_1_pinsrb;
45343 break;
45345 case HImode:
45346 if (!TARGET_SSE2)
45347 return false;
45348 dstmode = V8HImode;
45349 pinsr = gen_sse2_pinsrw;
45350 break;
45352 case SImode:
45353 if (!TARGET_SSE4_1)
45354 return false;
45355 dstmode = V4SImode;
45356 pinsr = gen_sse4_1_pinsrd;
45357 break;
45359 case DImode:
45360 gcc_assert (TARGET_64BIT);
45361 if (!TARGET_SSE4_1)
45362 return false;
45363 dstmode = V2DImode;
45364 pinsr = gen_sse4_1_pinsrq;
45365 break;
45367 default:
45368 return false;
45371 rtx d = dst;
45372 if (GET_MODE (dst) != dstmode)
45373 d = gen_reg_rtx (dstmode);
45374 src = gen_lowpart (srcmode, src);
45376 pos /= size;
45378 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45379 GEN_INT (1 << pos)));
45380 if (d != dst)
45381 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45382 return true;
45385 default:
45386 return false;
45390 /* This function returns the calling abi specific va_list type node.
45391 It returns the FNDECL specific va_list type. */
45393 static tree
45394 ix86_fn_abi_va_list (tree fndecl)
45396 if (!TARGET_64BIT)
45397 return va_list_type_node;
45398 gcc_assert (fndecl != NULL_TREE);
45400 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45401 return ms_va_list_type_node;
45402 else
45403 return sysv_va_list_type_node;
45406 /* Returns the canonical va_list type specified by TYPE. If there
45407 is no valid TYPE provided, it return NULL_TREE. */
45409 static tree
45410 ix86_canonical_va_list_type (tree type)
45412 tree wtype, htype;
45414 /* Resolve references and pointers to va_list type. */
45415 if (TREE_CODE (type) == MEM_REF)
45416 type = TREE_TYPE (type);
45417 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45418 type = TREE_TYPE (type);
45419 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45420 type = TREE_TYPE (type);
45422 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45424 wtype = va_list_type_node;
45425 gcc_assert (wtype != NULL_TREE);
45426 htype = type;
45427 if (TREE_CODE (wtype) == ARRAY_TYPE)
45429 /* If va_list is an array type, the argument may have decayed
45430 to a pointer type, e.g. by being passed to another function.
45431 In that case, unwrap both types so that we can compare the
45432 underlying records. */
45433 if (TREE_CODE (htype) == ARRAY_TYPE
45434 || POINTER_TYPE_P (htype))
45436 wtype = TREE_TYPE (wtype);
45437 htype = TREE_TYPE (htype);
45440 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45441 return va_list_type_node;
45442 wtype = sysv_va_list_type_node;
45443 gcc_assert (wtype != NULL_TREE);
45444 htype = type;
45445 if (TREE_CODE (wtype) == ARRAY_TYPE)
45447 /* If va_list is an array type, the argument may have decayed
45448 to a pointer type, e.g. by being passed to another function.
45449 In that case, unwrap both types so that we can compare the
45450 underlying records. */
45451 if (TREE_CODE (htype) == ARRAY_TYPE
45452 || POINTER_TYPE_P (htype))
45454 wtype = TREE_TYPE (wtype);
45455 htype = TREE_TYPE (htype);
45458 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45459 return sysv_va_list_type_node;
45460 wtype = ms_va_list_type_node;
45461 gcc_assert (wtype != NULL_TREE);
45462 htype = type;
45463 if (TREE_CODE (wtype) == ARRAY_TYPE)
45465 /* If va_list is an array type, the argument may have decayed
45466 to a pointer type, e.g. by being passed to another function.
45467 In that case, unwrap both types so that we can compare the
45468 underlying records. */
45469 if (TREE_CODE (htype) == ARRAY_TYPE
45470 || POINTER_TYPE_P (htype))
45472 wtype = TREE_TYPE (wtype);
45473 htype = TREE_TYPE (htype);
45476 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45477 return ms_va_list_type_node;
45478 return NULL_TREE;
45480 return std_canonical_va_list_type (type);
45483 /* Iterate through the target-specific builtin types for va_list.
45484 IDX denotes the iterator, *PTREE is set to the result type of
45485 the va_list builtin, and *PNAME to its internal type.
45486 Returns zero if there is no element for this index, otherwise
45487 IDX should be increased upon the next call.
45488 Note, do not iterate a base builtin's name like __builtin_va_list.
45489 Used from c_common_nodes_and_builtins. */
45491 static int
45492 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45494 if (TARGET_64BIT)
45496 switch (idx)
45498 default:
45499 break;
45501 case 0:
45502 *ptree = ms_va_list_type_node;
45503 *pname = "__builtin_ms_va_list";
45504 return 1;
45506 case 1:
45507 *ptree = sysv_va_list_type_node;
45508 *pname = "__builtin_sysv_va_list";
45509 return 1;
45513 return 0;
45516 #undef TARGET_SCHED_DISPATCH
45517 #define TARGET_SCHED_DISPATCH has_dispatch
45518 #undef TARGET_SCHED_DISPATCH_DO
45519 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45520 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45521 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45522 #undef TARGET_SCHED_REORDER
45523 #define TARGET_SCHED_REORDER ix86_sched_reorder
45524 #undef TARGET_SCHED_ADJUST_PRIORITY
45525 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45526 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45527 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45528 ix86_dependencies_evaluation_hook
45530 /* The size of the dispatch window is the total number of bytes of
45531 object code allowed in a window. */
45532 #define DISPATCH_WINDOW_SIZE 16
45534 /* Number of dispatch windows considered for scheduling. */
45535 #define MAX_DISPATCH_WINDOWS 3
45537 /* Maximum number of instructions in a window. */
45538 #define MAX_INSN 4
45540 /* Maximum number of immediate operands in a window. */
45541 #define MAX_IMM 4
45543 /* Maximum number of immediate bits allowed in a window. */
45544 #define MAX_IMM_SIZE 128
45546 /* Maximum number of 32 bit immediates allowed in a window. */
45547 #define MAX_IMM_32 4
45549 /* Maximum number of 64 bit immediates allowed in a window. */
45550 #define MAX_IMM_64 2
45552 /* Maximum total of loads or prefetches allowed in a window. */
45553 #define MAX_LOAD 2
45555 /* Maximum total of stores allowed in a window. */
45556 #define MAX_STORE 1
45558 #undef BIG
45559 #define BIG 100
45562 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45563 enum dispatch_group {
45564 disp_no_group = 0,
45565 disp_load,
45566 disp_store,
45567 disp_load_store,
45568 disp_prefetch,
45569 disp_imm,
45570 disp_imm_32,
45571 disp_imm_64,
45572 disp_branch,
45573 disp_cmp,
45574 disp_jcc,
45575 disp_last
45578 /* Number of allowable groups in a dispatch window. It is an array
45579 indexed by dispatch_group enum. 100 is used as a big number,
45580 because the number of these kind of operations does not have any
45581 effect in dispatch window, but we need them for other reasons in
45582 the table. */
45583 static unsigned int num_allowable_groups[disp_last] = {
45584 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45587 char group_name[disp_last + 1][16] = {
45588 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45589 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45590 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45593 /* Instruction path. */
45594 enum insn_path {
45595 no_path = 0,
45596 path_single, /* Single micro op. */
45597 path_double, /* Double micro op. */
45598 path_multi, /* Instructions with more than 2 micro op.. */
45599 last_path
45602 /* sched_insn_info defines a window to the instructions scheduled in
45603 the basic block. It contains a pointer to the insn_info table and
45604 the instruction scheduled.
45606 Windows are allocated for each basic block and are linked
45607 together. */
45608 typedef struct sched_insn_info_s {
45609 rtx insn;
45610 enum dispatch_group group;
45611 enum insn_path path;
45612 int byte_len;
45613 int imm_bytes;
45614 } sched_insn_info;
45616 /* Linked list of dispatch windows. This is a two way list of
45617 dispatch windows of a basic block. It contains information about
45618 the number of uops in the window and the total number of
45619 instructions and of bytes in the object code for this dispatch
45620 window. */
45621 typedef struct dispatch_windows_s {
45622 int num_insn; /* Number of insn in the window. */
45623 int num_uops; /* Number of uops in the window. */
45624 int window_size; /* Number of bytes in the window. */
45625 int window_num; /* Window number between 0 or 1. */
45626 int num_imm; /* Number of immediates in an insn. */
45627 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45628 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45629 int imm_size; /* Total immediates in the window. */
45630 int num_loads; /* Total memory loads in the window. */
45631 int num_stores; /* Total memory stores in the window. */
45632 int violation; /* Violation exists in window. */
45633 sched_insn_info *window; /* Pointer to the window. */
45634 struct dispatch_windows_s *next;
45635 struct dispatch_windows_s *prev;
45636 } dispatch_windows;
45638 /* Immediate valuse used in an insn. */
45639 typedef struct imm_info_s
45641 int imm;
45642 int imm32;
45643 int imm64;
45644 } imm_info;
45646 static dispatch_windows *dispatch_window_list;
45647 static dispatch_windows *dispatch_window_list1;
45649 /* Get dispatch group of insn. */
45651 static enum dispatch_group
45652 get_mem_group (rtx insn)
45654 enum attr_memory memory;
45656 if (INSN_CODE (insn) < 0)
45657 return disp_no_group;
45658 memory = get_attr_memory (insn);
45659 if (memory == MEMORY_STORE)
45660 return disp_store;
45662 if (memory == MEMORY_LOAD)
45663 return disp_load;
45665 if (memory == MEMORY_BOTH)
45666 return disp_load_store;
45668 return disp_no_group;
45671 /* Return true if insn is a compare instruction. */
45673 static bool
45674 is_cmp (rtx insn)
45676 enum attr_type type;
45678 type = get_attr_type (insn);
45679 return (type == TYPE_TEST
45680 || type == TYPE_ICMP
45681 || type == TYPE_FCMP
45682 || GET_CODE (PATTERN (insn)) == COMPARE);
45685 /* Return true if a dispatch violation encountered. */
45687 static bool
45688 dispatch_violation (void)
45690 if (dispatch_window_list->next)
45691 return dispatch_window_list->next->violation;
45692 return dispatch_window_list->violation;
45695 /* Return true if insn is a branch instruction. */
45697 static bool
45698 is_branch (rtx insn)
45700 return (CALL_P (insn) || JUMP_P (insn));
45703 /* Return true if insn is a prefetch instruction. */
45705 static bool
45706 is_prefetch (rtx insn)
45708 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45711 /* This function initializes a dispatch window and the list container holding a
45712 pointer to the window. */
45714 static void
45715 init_window (int window_num)
45717 int i;
45718 dispatch_windows *new_list;
45720 if (window_num == 0)
45721 new_list = dispatch_window_list;
45722 else
45723 new_list = dispatch_window_list1;
45725 new_list->num_insn = 0;
45726 new_list->num_uops = 0;
45727 new_list->window_size = 0;
45728 new_list->next = NULL;
45729 new_list->prev = NULL;
45730 new_list->window_num = window_num;
45731 new_list->num_imm = 0;
45732 new_list->num_imm_32 = 0;
45733 new_list->num_imm_64 = 0;
45734 new_list->imm_size = 0;
45735 new_list->num_loads = 0;
45736 new_list->num_stores = 0;
45737 new_list->violation = false;
45739 for (i = 0; i < MAX_INSN; i++)
45741 new_list->window[i].insn = NULL;
45742 new_list->window[i].group = disp_no_group;
45743 new_list->window[i].path = no_path;
45744 new_list->window[i].byte_len = 0;
45745 new_list->window[i].imm_bytes = 0;
45747 return;
45750 /* This function allocates and initializes a dispatch window and the
45751 list container holding a pointer to the window. */
45753 static dispatch_windows *
45754 allocate_window (void)
45756 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45757 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45759 return new_list;
45762 /* This routine initializes the dispatch scheduling information. It
45763 initiates building dispatch scheduler tables and constructs the
45764 first dispatch window. */
45766 static void
45767 init_dispatch_sched (void)
45769 /* Allocate a dispatch list and a window. */
45770 dispatch_window_list = allocate_window ();
45771 dispatch_window_list1 = allocate_window ();
45772 init_window (0);
45773 init_window (1);
45776 /* This function returns true if a branch is detected. End of a basic block
45777 does not have to be a branch, but here we assume only branches end a
45778 window. */
45780 static bool
45781 is_end_basic_block (enum dispatch_group group)
45783 return group == disp_branch;
45786 /* This function is called when the end of a window processing is reached. */
45788 static void
45789 process_end_window (void)
45791 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45792 if (dispatch_window_list->next)
45794 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45795 gcc_assert (dispatch_window_list->window_size
45796 + dispatch_window_list1->window_size <= 48);
45797 init_window (1);
45799 init_window (0);
45802 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45803 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45804 for 48 bytes of instructions. Note that these windows are not dispatch
45805 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45807 static dispatch_windows *
45808 allocate_next_window (int window_num)
45810 if (window_num == 0)
45812 if (dispatch_window_list->next)
45813 init_window (1);
45814 init_window (0);
45815 return dispatch_window_list;
45818 dispatch_window_list->next = dispatch_window_list1;
45819 dispatch_window_list1->prev = dispatch_window_list;
45821 return dispatch_window_list1;
45824 /* Increment the number of immediate operands of an instruction. */
45826 static int
45827 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45829 if (*in_rtx == 0)
45830 return 0;
45832 switch ( GET_CODE (*in_rtx))
45834 case CONST:
45835 case SYMBOL_REF:
45836 case CONST_INT:
45837 (imm_values->imm)++;
45838 if (x86_64_immediate_operand (*in_rtx, SImode))
45839 (imm_values->imm32)++;
45840 else
45841 (imm_values->imm64)++;
45842 break;
45844 case CONST_DOUBLE:
45845 (imm_values->imm)++;
45846 (imm_values->imm64)++;
45847 break;
45849 case CODE_LABEL:
45850 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45852 (imm_values->imm)++;
45853 (imm_values->imm32)++;
45855 break;
45857 default:
45858 break;
45861 return 0;
45864 /* Compute number of immediate operands of an instruction. */
45866 static void
45867 find_constant (rtx in_rtx, imm_info *imm_values)
45869 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45870 (rtx_function) find_constant_1, (void *) imm_values);
45873 /* Return total size of immediate operands of an instruction along with number
45874 of corresponding immediate-operands. It initializes its parameters to zero
45875 befor calling FIND_CONSTANT.
45876 INSN is the input instruction. IMM is the total of immediates.
45877 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45878 bit immediates. */
45880 static int
45881 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45883 imm_info imm_values = {0, 0, 0};
45885 find_constant (insn, &imm_values);
45886 *imm = imm_values.imm;
45887 *imm32 = imm_values.imm32;
45888 *imm64 = imm_values.imm64;
45889 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45892 /* This function indicates if an operand of an instruction is an
45893 immediate. */
45895 static bool
45896 has_immediate (rtx insn)
45898 int num_imm_operand;
45899 int num_imm32_operand;
45900 int num_imm64_operand;
45902 if (insn)
45903 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45904 &num_imm64_operand);
45905 return false;
45908 /* Return single or double path for instructions. */
45910 static enum insn_path
45911 get_insn_path (rtx insn)
45913 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45915 if ((int)path == 0)
45916 return path_single;
45918 if ((int)path == 1)
45919 return path_double;
45921 return path_multi;
45924 /* Return insn dispatch group. */
45926 static enum dispatch_group
45927 get_insn_group (rtx insn)
45929 enum dispatch_group group = get_mem_group (insn);
45930 if (group)
45931 return group;
45933 if (is_branch (insn))
45934 return disp_branch;
45936 if (is_cmp (insn))
45937 return disp_cmp;
45939 if (has_immediate (insn))
45940 return disp_imm;
45942 if (is_prefetch (insn))
45943 return disp_prefetch;
45945 return disp_no_group;
45948 /* Count number of GROUP restricted instructions in a dispatch
45949 window WINDOW_LIST. */
45951 static int
45952 count_num_restricted (rtx insn, dispatch_windows *window_list)
45954 enum dispatch_group group = get_insn_group (insn);
45955 int imm_size;
45956 int num_imm_operand;
45957 int num_imm32_operand;
45958 int num_imm64_operand;
45960 if (group == disp_no_group)
45961 return 0;
45963 if (group == disp_imm)
45965 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45966 &num_imm64_operand);
45967 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45968 || num_imm_operand + window_list->num_imm > MAX_IMM
45969 || (num_imm32_operand > 0
45970 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45971 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45972 || (num_imm64_operand > 0
45973 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45974 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45975 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45976 && num_imm64_operand > 0
45977 && ((window_list->num_imm_64 > 0
45978 && window_list->num_insn >= 2)
45979 || window_list->num_insn >= 3)))
45980 return BIG;
45982 return 1;
45985 if ((group == disp_load_store
45986 && (window_list->num_loads >= MAX_LOAD
45987 || window_list->num_stores >= MAX_STORE))
45988 || ((group == disp_load
45989 || group == disp_prefetch)
45990 && window_list->num_loads >= MAX_LOAD)
45991 || (group == disp_store
45992 && window_list->num_stores >= MAX_STORE))
45993 return BIG;
45995 return 1;
45998 /* This function returns true if insn satisfies dispatch rules on the
45999 last window scheduled. */
46001 static bool
46002 fits_dispatch_window (rtx insn)
46004 dispatch_windows *window_list = dispatch_window_list;
46005 dispatch_windows *window_list_next = dispatch_window_list->next;
46006 unsigned int num_restrict;
46007 enum dispatch_group group = get_insn_group (insn);
46008 enum insn_path path = get_insn_path (insn);
46009 int sum;
46011 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46012 instructions should be given the lowest priority in the
46013 scheduling process in Haifa scheduler to make sure they will be
46014 scheduled in the same dispatch window as the reference to them. */
46015 if (group == disp_jcc || group == disp_cmp)
46016 return false;
46018 /* Check nonrestricted. */
46019 if (group == disp_no_group || group == disp_branch)
46020 return true;
46022 /* Get last dispatch window. */
46023 if (window_list_next)
46024 window_list = window_list_next;
46026 if (window_list->window_num == 1)
46028 sum = window_list->prev->window_size + window_list->window_size;
46030 if (sum == 32
46031 || (min_insn_size (insn) + sum) >= 48)
46032 /* Window 1 is full. Go for next window. */
46033 return true;
46036 num_restrict = count_num_restricted (insn, window_list);
46038 if (num_restrict > num_allowable_groups[group])
46039 return false;
46041 /* See if it fits in the first window. */
46042 if (window_list->window_num == 0)
46044 /* The first widow should have only single and double path
46045 uops. */
46046 if (path == path_double
46047 && (window_list->num_uops + 2) > MAX_INSN)
46048 return false;
46049 else if (path != path_single)
46050 return false;
46052 return true;
46055 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46056 dispatch window WINDOW_LIST. */
46058 static void
46059 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46061 int byte_len = min_insn_size (insn);
46062 int num_insn = window_list->num_insn;
46063 int imm_size;
46064 sched_insn_info *window = window_list->window;
46065 enum dispatch_group group = get_insn_group (insn);
46066 enum insn_path path = get_insn_path (insn);
46067 int num_imm_operand;
46068 int num_imm32_operand;
46069 int num_imm64_operand;
46071 if (!window_list->violation && group != disp_cmp
46072 && !fits_dispatch_window (insn))
46073 window_list->violation = true;
46075 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46076 &num_imm64_operand);
46078 /* Initialize window with new instruction. */
46079 window[num_insn].insn = insn;
46080 window[num_insn].byte_len = byte_len;
46081 window[num_insn].group = group;
46082 window[num_insn].path = path;
46083 window[num_insn].imm_bytes = imm_size;
46085 window_list->window_size += byte_len;
46086 window_list->num_insn = num_insn + 1;
46087 window_list->num_uops = window_list->num_uops + num_uops;
46088 window_list->imm_size += imm_size;
46089 window_list->num_imm += num_imm_operand;
46090 window_list->num_imm_32 += num_imm32_operand;
46091 window_list->num_imm_64 += num_imm64_operand;
46093 if (group == disp_store)
46094 window_list->num_stores += 1;
46095 else if (group == disp_load
46096 || group == disp_prefetch)
46097 window_list->num_loads += 1;
46098 else if (group == disp_load_store)
46100 window_list->num_stores += 1;
46101 window_list->num_loads += 1;
46105 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46106 If the total bytes of instructions or the number of instructions in
46107 the window exceed allowable, it allocates a new window. */
46109 static void
46110 add_to_dispatch_window (rtx insn)
46112 int byte_len;
46113 dispatch_windows *window_list;
46114 dispatch_windows *next_list;
46115 dispatch_windows *window0_list;
46116 enum insn_path path;
46117 enum dispatch_group insn_group;
46118 bool insn_fits;
46119 int num_insn;
46120 int num_uops;
46121 int window_num;
46122 int insn_num_uops;
46123 int sum;
46125 if (INSN_CODE (insn) < 0)
46126 return;
46128 byte_len = min_insn_size (insn);
46129 window_list = dispatch_window_list;
46130 next_list = window_list->next;
46131 path = get_insn_path (insn);
46132 insn_group = get_insn_group (insn);
46134 /* Get the last dispatch window. */
46135 if (next_list)
46136 window_list = dispatch_window_list->next;
46138 if (path == path_single)
46139 insn_num_uops = 1;
46140 else if (path == path_double)
46141 insn_num_uops = 2;
46142 else
46143 insn_num_uops = (int) path;
46145 /* If current window is full, get a new window.
46146 Window number zero is full, if MAX_INSN uops are scheduled in it.
46147 Window number one is full, if window zero's bytes plus window
46148 one's bytes is 32, or if the bytes of the new instruction added
46149 to the total makes it greater than 48, or it has already MAX_INSN
46150 instructions in it. */
46151 num_insn = window_list->num_insn;
46152 num_uops = window_list->num_uops;
46153 window_num = window_list->window_num;
46154 insn_fits = fits_dispatch_window (insn);
46156 if (num_insn >= MAX_INSN
46157 || num_uops + insn_num_uops > MAX_INSN
46158 || !(insn_fits))
46160 window_num = ~window_num & 1;
46161 window_list = allocate_next_window (window_num);
46164 if (window_num == 0)
46166 add_insn_window (insn, window_list, insn_num_uops);
46167 if (window_list->num_insn >= MAX_INSN
46168 && insn_group == disp_branch)
46170 process_end_window ();
46171 return;
46174 else if (window_num == 1)
46176 window0_list = window_list->prev;
46177 sum = window0_list->window_size + window_list->window_size;
46178 if (sum == 32
46179 || (byte_len + sum) >= 48)
46181 process_end_window ();
46182 window_list = dispatch_window_list;
46185 add_insn_window (insn, window_list, insn_num_uops);
46187 else
46188 gcc_unreachable ();
46190 if (is_end_basic_block (insn_group))
46192 /* End of basic block is reached do end-basic-block process. */
46193 process_end_window ();
46194 return;
46198 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46200 DEBUG_FUNCTION static void
46201 debug_dispatch_window_file (FILE *file, int window_num)
46203 dispatch_windows *list;
46204 int i;
46206 if (window_num == 0)
46207 list = dispatch_window_list;
46208 else
46209 list = dispatch_window_list1;
46211 fprintf (file, "Window #%d:\n", list->window_num);
46212 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46213 list->num_insn, list->num_uops, list->window_size);
46214 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46215 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46217 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46218 list->num_stores);
46219 fprintf (file, " insn info:\n");
46221 for (i = 0; i < MAX_INSN; i++)
46223 if (!list->window[i].insn)
46224 break;
46225 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46226 i, group_name[list->window[i].group],
46227 i, (void *)list->window[i].insn,
46228 i, list->window[i].path,
46229 i, list->window[i].byte_len,
46230 i, list->window[i].imm_bytes);
46234 /* Print to stdout a dispatch window. */
46236 DEBUG_FUNCTION void
46237 debug_dispatch_window (int window_num)
46239 debug_dispatch_window_file (stdout, window_num);
46242 /* Print INSN dispatch information to FILE. */
46244 DEBUG_FUNCTION static void
46245 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46247 int byte_len;
46248 enum insn_path path;
46249 enum dispatch_group group;
46250 int imm_size;
46251 int num_imm_operand;
46252 int num_imm32_operand;
46253 int num_imm64_operand;
46255 if (INSN_CODE (insn) < 0)
46256 return;
46258 byte_len = min_insn_size (insn);
46259 path = get_insn_path (insn);
46260 group = get_insn_group (insn);
46261 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46262 &num_imm64_operand);
46264 fprintf (file, " insn info:\n");
46265 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46266 group_name[group], path, byte_len);
46267 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46268 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46271 /* Print to STDERR the status of the ready list with respect to
46272 dispatch windows. */
46274 DEBUG_FUNCTION void
46275 debug_ready_dispatch (void)
46277 int i;
46278 int no_ready = number_in_ready ();
46280 fprintf (stdout, "Number of ready: %d\n", no_ready);
46282 for (i = 0; i < no_ready; i++)
46283 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46286 /* This routine is the driver of the dispatch scheduler. */
46288 static void
46289 do_dispatch (rtx insn, int mode)
46291 if (mode == DISPATCH_INIT)
46292 init_dispatch_sched ();
46293 else if (mode == ADD_TO_DISPATCH_WINDOW)
46294 add_to_dispatch_window (insn);
46297 /* Return TRUE if Dispatch Scheduling is supported. */
46299 static bool
46300 has_dispatch (rtx insn, int action)
46302 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46303 && flag_dispatch_scheduler)
46304 switch (action)
46306 default:
46307 return false;
46309 case IS_DISPATCH_ON:
46310 return true;
46311 break;
46313 case IS_CMP:
46314 return is_cmp (insn);
46316 case DISPATCH_VIOLATION:
46317 return dispatch_violation ();
46319 case FITS_DISPATCH_WINDOW:
46320 return fits_dispatch_window (insn);
46323 return false;
46326 /* Implementation of reassociation_width target hook used by
46327 reassoc phase to identify parallelism level in reassociated
46328 tree. Statements tree_code is passed in OPC. Arguments type
46329 is passed in MODE.
46331 Currently parallel reassociation is enabled for Atom
46332 processors only and we set reassociation width to be 2
46333 because Atom may issue up to 2 instructions per cycle.
46335 Return value should be fixed if parallel reassociation is
46336 enabled for other processors. */
46338 static int
46339 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46340 enum machine_mode mode)
46342 int res = 1;
46344 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46345 res = 2;
46346 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46347 res = 2;
46349 return res;
46352 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46353 place emms and femms instructions. */
46355 static enum machine_mode
46356 ix86_preferred_simd_mode (enum machine_mode mode)
46358 if (!TARGET_SSE)
46359 return word_mode;
46361 switch (mode)
46363 case QImode:
46364 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46365 case HImode:
46366 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46367 case SImode:
46368 return TARGET_AVX512F ? V16SImode :
46369 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46370 case DImode:
46371 return TARGET_AVX512F ? V8DImode :
46372 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46374 case SFmode:
46375 if (TARGET_AVX512F)
46376 return V16SFmode;
46377 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46378 return V8SFmode;
46379 else
46380 return V4SFmode;
46382 case DFmode:
46383 if (!TARGET_VECTORIZE_DOUBLE)
46384 return word_mode;
46385 else if (TARGET_AVX512F)
46386 return V8DFmode;
46387 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46388 return V4DFmode;
46389 else if (TARGET_SSE2)
46390 return V2DFmode;
46391 /* FALLTHRU */
46393 default:
46394 return word_mode;
46398 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46399 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46400 256bit and 128bit vectors. */
46402 static unsigned int
46403 ix86_autovectorize_vector_sizes (void)
46405 return TARGET_AVX512F ? 64 | 32 | 16 :
46406 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46411 /* Return class of registers which could be used for pseudo of MODE
46412 and of class RCLASS for spilling instead of memory. Return NO_REGS
46413 if it is not possible or non-profitable. */
46414 static reg_class_t
46415 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46417 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46418 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46419 && INTEGER_CLASS_P (rclass))
46420 return ALL_SSE_REGS;
46421 return NO_REGS;
46424 /* Implement targetm.vectorize.init_cost. */
46426 static void *
46427 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46429 unsigned *cost = XNEWVEC (unsigned, 3);
46430 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46431 return cost;
46434 /* Implement targetm.vectorize.add_stmt_cost. */
46436 static unsigned
46437 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46438 struct _stmt_vec_info *stmt_info, int misalign,
46439 enum vect_cost_model_location where)
46441 unsigned *cost = (unsigned *) data;
46442 unsigned retval = 0;
46444 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46445 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46447 /* Statements in an inner loop relative to the loop being
46448 vectorized are weighted more heavily. The value here is
46449 arbitrary and could potentially be improved with analysis. */
46450 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46451 count *= 50; /* FIXME. */
46453 retval = (unsigned) (count * stmt_cost);
46454 cost[where] += retval;
46456 return retval;
46459 /* Implement targetm.vectorize.finish_cost. */
46461 static void
46462 ix86_finish_cost (void *data, unsigned *prologue_cost,
46463 unsigned *body_cost, unsigned *epilogue_cost)
46465 unsigned *cost = (unsigned *) data;
46466 *prologue_cost = cost[vect_prologue];
46467 *body_cost = cost[vect_body];
46468 *epilogue_cost = cost[vect_epilogue];
46471 /* Implement targetm.vectorize.destroy_cost_data. */
46473 static void
46474 ix86_destroy_cost_data (void *data)
46476 free (data);
46479 /* Validate target specific memory model bits in VAL. */
46481 static unsigned HOST_WIDE_INT
46482 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46484 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46485 bool strong;
46487 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46488 |MEMMODEL_MASK)
46489 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46491 warning (OPT_Winvalid_memory_model,
46492 "Unknown architecture specific memory model");
46493 return MEMMODEL_SEQ_CST;
46495 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46496 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46498 warning (OPT_Winvalid_memory_model,
46499 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46500 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46502 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46504 warning (OPT_Winvalid_memory_model,
46505 "HLE_RELEASE not used with RELEASE or stronger memory model");
46506 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46508 return val;
46511 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46512 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46513 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46514 or number of vecsize_mangle variants that should be emitted. */
46516 static int
46517 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46518 struct cgraph_simd_clone *clonei,
46519 tree base_type, int num)
46521 int ret = 1;
46523 if (clonei->simdlen
46524 && (clonei->simdlen < 2
46525 || clonei->simdlen > 16
46526 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46528 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46529 "unsupported simdlen %d", clonei->simdlen);
46530 return 0;
46533 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46534 if (TREE_CODE (ret_type) != VOID_TYPE)
46535 switch (TYPE_MODE (ret_type))
46537 case QImode:
46538 case HImode:
46539 case SImode:
46540 case DImode:
46541 case SFmode:
46542 case DFmode:
46543 /* case SCmode: */
46544 /* case DCmode: */
46545 break;
46546 default:
46547 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46548 "unsupported return type %qT for simd\n", ret_type);
46549 return 0;
46552 tree t;
46553 int i;
46555 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46556 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46557 switch (TYPE_MODE (TREE_TYPE (t)))
46559 case QImode:
46560 case HImode:
46561 case SImode:
46562 case DImode:
46563 case SFmode:
46564 case DFmode:
46565 /* case SCmode: */
46566 /* case DCmode: */
46567 break;
46568 default:
46569 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46570 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46571 return 0;
46574 if (clonei->cilk_elemental)
46576 /* Parse here processor clause. If not present, default to 'b'. */
46577 clonei->vecsize_mangle = 'b';
46579 else if (!TREE_PUBLIC (node->decl))
46581 /* If the function isn't exported, we can pick up just one ISA
46582 for the clones. */
46583 if (TARGET_AVX2)
46584 clonei->vecsize_mangle = 'd';
46585 else if (TARGET_AVX)
46586 clonei->vecsize_mangle = 'c';
46587 else
46588 clonei->vecsize_mangle = 'b';
46589 ret = 1;
46591 else
46593 clonei->vecsize_mangle = "bcd"[num];
46594 ret = 3;
46596 switch (clonei->vecsize_mangle)
46598 case 'b':
46599 clonei->vecsize_int = 128;
46600 clonei->vecsize_float = 128;
46601 break;
46602 case 'c':
46603 clonei->vecsize_int = 128;
46604 clonei->vecsize_float = 256;
46605 break;
46606 case 'd':
46607 clonei->vecsize_int = 256;
46608 clonei->vecsize_float = 256;
46609 break;
46611 if (clonei->simdlen == 0)
46613 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46614 clonei->simdlen = clonei->vecsize_int;
46615 else
46616 clonei->simdlen = clonei->vecsize_float;
46617 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46618 if (clonei->simdlen > 16)
46619 clonei->simdlen = 16;
46621 return ret;
46624 /* Add target attribute to SIMD clone NODE if needed. */
46626 static void
46627 ix86_simd_clone_adjust (struct cgraph_node *node)
46629 const char *str = NULL;
46630 gcc_assert (node->decl == cfun->decl);
46631 switch (node->simdclone->vecsize_mangle)
46633 case 'b':
46634 if (!TARGET_SSE2)
46635 str = "sse2";
46636 break;
46637 case 'c':
46638 if (!TARGET_AVX)
46639 str = "avx";
46640 break;
46641 case 'd':
46642 if (!TARGET_AVX2)
46643 str = "avx2";
46644 break;
46645 default:
46646 gcc_unreachable ();
46648 if (str == NULL)
46649 return;
46650 push_cfun (NULL);
46651 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46652 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46653 gcc_assert (ok);
46654 pop_cfun ();
46655 ix86_previous_fndecl = NULL_TREE;
46656 ix86_set_current_function (node->decl);
46659 /* If SIMD clone NODE can't be used in a vectorized loop
46660 in current function, return -1, otherwise return a badness of using it
46661 (0 if it is most desirable from vecsize_mangle point of view, 1
46662 slightly less desirable, etc.). */
46664 static int
46665 ix86_simd_clone_usable (struct cgraph_node *node)
46667 switch (node->simdclone->vecsize_mangle)
46669 case 'b':
46670 if (!TARGET_SSE2)
46671 return -1;
46672 if (!TARGET_AVX)
46673 return 0;
46674 return TARGET_AVX2 ? 2 : 1;
46675 case 'c':
46676 if (!TARGET_AVX)
46677 return -1;
46678 return TARGET_AVX2 ? 1 : 0;
46679 break;
46680 case 'd':
46681 if (!TARGET_AVX2)
46682 return -1;
46683 return 0;
46684 default:
46685 gcc_unreachable ();
46689 /* This function gives out the number of memory references.
46690 This value determines the unrolling factor for
46691 bdver3 and bdver4 architectures. */
46693 static int
46694 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46696 if (*x != NULL_RTX && MEM_P (*x))
46698 enum machine_mode mode;
46699 unsigned int n_words;
46701 mode = GET_MODE (*x);
46702 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46704 if (n_words > 4)
46705 (*mem_count)+=2;
46706 else
46707 (*mem_count)+=1;
46709 return 0;
46712 /* This function adjusts the unroll factor based on
46713 the hardware capabilities. For ex, bdver3 has
46714 a loop buffer which makes unrolling of smaller
46715 loops less important. This function decides the
46716 unroll factor using number of memory references
46717 (value 32 is used) as a heuristic. */
46719 static unsigned
46720 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46722 basic_block *bbs;
46723 rtx insn;
46724 unsigned i;
46725 unsigned mem_count = 0;
46727 if (!TARGET_ADJUST_UNROLL)
46728 return nunroll;
46730 /* Count the number of memory references within the loop body. */
46731 bbs = get_loop_body (loop);
46732 for (i = 0; i < loop->num_nodes; i++)
46734 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46735 if (NONDEBUG_INSN_P (insn))
46736 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46738 free (bbs);
46740 if (mem_count && mem_count <=32)
46741 return 32/mem_count;
46743 return nunroll;
46747 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46749 static bool
46750 ix86_float_exceptions_rounding_supported_p (void)
46752 /* For x87 floating point with standard excess precision handling,
46753 there is no adddf3 pattern (since x87 floating point only has
46754 XFmode operations) so the default hook implementation gets this
46755 wrong. */
46756 return TARGET_80387 || TARGET_SSE_MATH;
46759 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46761 static void
46762 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46764 if (!TARGET_80387 && !TARGET_SSE_MATH)
46765 return;
46766 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46767 if (TARGET_80387)
46769 tree fenv_index_type = build_index_type (size_int (6));
46770 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46771 tree fenv_var = create_tmp_var (fenv_type, NULL);
46772 mark_addressable (fenv_var);
46773 tree fenv_ptr = build_pointer_type (fenv_type);
46774 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46775 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46776 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46777 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46778 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46779 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46780 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46781 tree hold_fnclex = build_call_expr (fnclex, 0);
46782 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46783 hold_fnclex);
46784 *clear = build_call_expr (fnclex, 0);
46785 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46786 mark_addressable (sw_var);
46787 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46788 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46789 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46790 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46791 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46792 exceptions_var, exceptions_x87);
46793 *update = build2 (COMPOUND_EXPR, integer_type_node,
46794 fnstsw_call, update_mod);
46795 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46796 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46798 if (TARGET_SSE_MATH)
46800 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46801 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46802 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46803 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46804 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46805 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46806 mxcsr_orig_var, stmxcsr_hold_call);
46807 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46808 mxcsr_orig_var,
46809 build_int_cst (unsigned_type_node, 0x1f80));
46810 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46811 build_int_cst (unsigned_type_node, 0xffffffc0));
46812 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46813 mxcsr_mod_var, hold_mod_val);
46814 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46815 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46816 hold_assign_orig, hold_assign_mod);
46817 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46818 ldmxcsr_hold_call);
46819 if (*hold)
46820 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46821 else
46822 *hold = hold_all;
46823 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46824 if (*clear)
46825 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46826 ldmxcsr_clear_call);
46827 else
46828 *clear = ldmxcsr_clear_call;
46829 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46830 tree exceptions_sse = fold_convert (integer_type_node,
46831 stxmcsr_update_call);
46832 if (*update)
46834 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46835 exceptions_var, exceptions_sse);
46836 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46837 exceptions_var, exceptions_mod);
46838 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46839 exceptions_assign);
46841 else
46842 *update = build2 (MODIFY_EXPR, integer_type_node,
46843 exceptions_var, exceptions_sse);
46844 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46845 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46846 ldmxcsr_update_call);
46848 tree atomic_feraiseexcept
46849 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46850 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46851 1, exceptions_var);
46852 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46853 atomic_feraiseexcept_call);
46856 /* Initialize the GCC target structure. */
46857 #undef TARGET_RETURN_IN_MEMORY
46858 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46860 #undef TARGET_LEGITIMIZE_ADDRESS
46861 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46863 #undef TARGET_ATTRIBUTE_TABLE
46864 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46865 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46866 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46867 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46868 # undef TARGET_MERGE_DECL_ATTRIBUTES
46869 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46870 #endif
46872 #undef TARGET_COMP_TYPE_ATTRIBUTES
46873 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46875 #undef TARGET_INIT_BUILTINS
46876 #define TARGET_INIT_BUILTINS ix86_init_builtins
46877 #undef TARGET_BUILTIN_DECL
46878 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46879 #undef TARGET_EXPAND_BUILTIN
46880 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46882 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46883 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46884 ix86_builtin_vectorized_function
46886 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46887 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46889 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46890 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46892 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46893 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46895 #undef TARGET_BUILTIN_RECIPROCAL
46896 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46898 #undef TARGET_ASM_FUNCTION_EPILOGUE
46899 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46901 #undef TARGET_ENCODE_SECTION_INFO
46902 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46903 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46904 #else
46905 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46906 #endif
46908 #undef TARGET_ASM_OPEN_PAREN
46909 #define TARGET_ASM_OPEN_PAREN ""
46910 #undef TARGET_ASM_CLOSE_PAREN
46911 #define TARGET_ASM_CLOSE_PAREN ""
46913 #undef TARGET_ASM_BYTE_OP
46914 #define TARGET_ASM_BYTE_OP ASM_BYTE
46916 #undef TARGET_ASM_ALIGNED_HI_OP
46917 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46918 #undef TARGET_ASM_ALIGNED_SI_OP
46919 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46920 #ifdef ASM_QUAD
46921 #undef TARGET_ASM_ALIGNED_DI_OP
46922 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46923 #endif
46925 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46926 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46928 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46929 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46931 #undef TARGET_ASM_UNALIGNED_HI_OP
46932 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46933 #undef TARGET_ASM_UNALIGNED_SI_OP
46934 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46935 #undef TARGET_ASM_UNALIGNED_DI_OP
46936 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46938 #undef TARGET_PRINT_OPERAND
46939 #define TARGET_PRINT_OPERAND ix86_print_operand
46940 #undef TARGET_PRINT_OPERAND_ADDRESS
46941 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46942 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46943 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46944 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46945 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46947 #undef TARGET_SCHED_INIT_GLOBAL
46948 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46949 #undef TARGET_SCHED_ADJUST_COST
46950 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46951 #undef TARGET_SCHED_ISSUE_RATE
46952 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46953 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46954 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46955 ia32_multipass_dfa_lookahead
46956 #undef TARGET_SCHED_MACRO_FUSION_P
46957 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46958 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46959 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46961 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46962 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46964 #undef TARGET_MEMMODEL_CHECK
46965 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46967 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46968 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46970 #ifdef HAVE_AS_TLS
46971 #undef TARGET_HAVE_TLS
46972 #define TARGET_HAVE_TLS true
46973 #endif
46974 #undef TARGET_CANNOT_FORCE_CONST_MEM
46975 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46976 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46977 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46979 #undef TARGET_DELEGITIMIZE_ADDRESS
46980 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46982 #undef TARGET_MS_BITFIELD_LAYOUT_P
46983 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46985 #if TARGET_MACHO
46986 #undef TARGET_BINDS_LOCAL_P
46987 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46988 #endif
46989 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46990 #undef TARGET_BINDS_LOCAL_P
46991 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46992 #endif
46994 #undef TARGET_ASM_OUTPUT_MI_THUNK
46995 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
46996 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
46997 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
46999 #undef TARGET_ASM_FILE_START
47000 #define TARGET_ASM_FILE_START x86_file_start
47002 #undef TARGET_OPTION_OVERRIDE
47003 #define TARGET_OPTION_OVERRIDE ix86_option_override
47005 #undef TARGET_REGISTER_MOVE_COST
47006 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47007 #undef TARGET_MEMORY_MOVE_COST
47008 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47009 #undef TARGET_RTX_COSTS
47010 #define TARGET_RTX_COSTS ix86_rtx_costs
47011 #undef TARGET_ADDRESS_COST
47012 #define TARGET_ADDRESS_COST ix86_address_cost
47014 #undef TARGET_FIXED_CONDITION_CODE_REGS
47015 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47016 #undef TARGET_CC_MODES_COMPATIBLE
47017 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47019 #undef TARGET_MACHINE_DEPENDENT_REORG
47020 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47022 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47023 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47025 #undef TARGET_BUILD_BUILTIN_VA_LIST
47026 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47028 #undef TARGET_FOLD_BUILTIN
47029 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47031 #undef TARGET_COMPARE_VERSION_PRIORITY
47032 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47034 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47035 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47036 ix86_generate_version_dispatcher_body
47038 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47039 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47040 ix86_get_function_versions_dispatcher
47042 #undef TARGET_ENUM_VA_LIST_P
47043 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47045 #undef TARGET_FN_ABI_VA_LIST
47046 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47048 #undef TARGET_CANONICAL_VA_LIST_TYPE
47049 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47051 #undef TARGET_EXPAND_BUILTIN_VA_START
47052 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47054 #undef TARGET_MD_ASM_CLOBBERS
47055 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47057 #undef TARGET_PROMOTE_PROTOTYPES
47058 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47059 #undef TARGET_SETUP_INCOMING_VARARGS
47060 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47061 #undef TARGET_MUST_PASS_IN_STACK
47062 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47063 #undef TARGET_FUNCTION_ARG_ADVANCE
47064 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47065 #undef TARGET_FUNCTION_ARG
47066 #define TARGET_FUNCTION_ARG ix86_function_arg
47067 #undef TARGET_FUNCTION_ARG_BOUNDARY
47068 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47069 #undef TARGET_PASS_BY_REFERENCE
47070 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47071 #undef TARGET_INTERNAL_ARG_POINTER
47072 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47073 #undef TARGET_UPDATE_STACK_BOUNDARY
47074 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47075 #undef TARGET_GET_DRAP_RTX
47076 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47077 #undef TARGET_STRICT_ARGUMENT_NAMING
47078 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47079 #undef TARGET_STATIC_CHAIN
47080 #define TARGET_STATIC_CHAIN ix86_static_chain
47081 #undef TARGET_TRAMPOLINE_INIT
47082 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47083 #undef TARGET_RETURN_POPS_ARGS
47084 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47086 #undef TARGET_LEGITIMATE_COMBINED_INSN
47087 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47089 #undef TARGET_ASAN_SHADOW_OFFSET
47090 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47092 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47093 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47095 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47096 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47098 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47099 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47101 #undef TARGET_C_MODE_FOR_SUFFIX
47102 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47104 #ifdef HAVE_AS_TLS
47105 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47106 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47107 #endif
47109 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47110 #undef TARGET_INSERT_ATTRIBUTES
47111 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47112 #endif
47114 #undef TARGET_MANGLE_TYPE
47115 #define TARGET_MANGLE_TYPE ix86_mangle_type
47117 #if !TARGET_MACHO
47118 #undef TARGET_STACK_PROTECT_FAIL
47119 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47120 #endif
47122 #undef TARGET_FUNCTION_VALUE
47123 #define TARGET_FUNCTION_VALUE ix86_function_value
47125 #undef TARGET_FUNCTION_VALUE_REGNO_P
47126 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47128 #undef TARGET_PROMOTE_FUNCTION_MODE
47129 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47131 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47132 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47134 #undef TARGET_INSTANTIATE_DECLS
47135 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47137 #undef TARGET_SECONDARY_RELOAD
47138 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47140 #undef TARGET_CLASS_MAX_NREGS
47141 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47143 #undef TARGET_PREFERRED_RELOAD_CLASS
47144 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47145 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47146 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47147 #undef TARGET_CLASS_LIKELY_SPILLED_P
47148 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47150 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47151 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47152 ix86_builtin_vectorization_cost
47153 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47154 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47155 ix86_vectorize_vec_perm_const_ok
47156 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47157 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47158 ix86_preferred_simd_mode
47159 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47160 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47161 ix86_autovectorize_vector_sizes
47162 #undef TARGET_VECTORIZE_INIT_COST
47163 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47164 #undef TARGET_VECTORIZE_ADD_STMT_COST
47165 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47166 #undef TARGET_VECTORIZE_FINISH_COST
47167 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47168 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47169 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47171 #undef TARGET_SET_CURRENT_FUNCTION
47172 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47174 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47175 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47177 #undef TARGET_OPTION_SAVE
47178 #define TARGET_OPTION_SAVE ix86_function_specific_save
47180 #undef TARGET_OPTION_RESTORE
47181 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47183 #undef TARGET_OPTION_PRINT
47184 #define TARGET_OPTION_PRINT ix86_function_specific_print
47186 #undef TARGET_OPTION_FUNCTION_VERSIONS
47187 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47189 #undef TARGET_CAN_INLINE_P
47190 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47192 #undef TARGET_EXPAND_TO_RTL_HOOK
47193 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47195 #undef TARGET_LEGITIMATE_ADDRESS_P
47196 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47198 #undef TARGET_LRA_P
47199 #define TARGET_LRA_P hook_bool_void_true
47201 #undef TARGET_REGISTER_PRIORITY
47202 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47204 #undef TARGET_REGISTER_USAGE_LEVELING_P
47205 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47207 #undef TARGET_LEGITIMATE_CONSTANT_P
47208 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47210 #undef TARGET_FRAME_POINTER_REQUIRED
47211 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47213 #undef TARGET_CAN_ELIMINATE
47214 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47216 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47217 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47219 #undef TARGET_ASM_CODE_END
47220 #define TARGET_ASM_CODE_END ix86_code_end
47222 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47223 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47225 #if TARGET_MACHO
47226 #undef TARGET_INIT_LIBFUNCS
47227 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47228 #endif
47230 #undef TARGET_LOOP_UNROLL_ADJUST
47231 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47233 #undef TARGET_SPILL_CLASS
47234 #define TARGET_SPILL_CLASS ix86_spill_class
47236 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47237 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47238 ix86_simd_clone_compute_vecsize_and_simdlen
47240 #undef TARGET_SIMD_CLONE_ADJUST
47241 #define TARGET_SIMD_CLONE_ADJUST \
47242 ix86_simd_clone_adjust
47244 #undef TARGET_SIMD_CLONE_USABLE
47245 #define TARGET_SIMD_CLONE_USABLE \
47246 ix86_simd_clone_usable
47248 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47249 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47250 ix86_float_exceptions_rounding_supported_p
47252 struct gcc_target targetm = TARGET_INITIALIZER;
47254 #include "gt-i386.h"