* config/i386/i386.c (decide_alg): Correctly handle
[official-gcc.git] / gcc / config / i386 / i386.c
blob77d54e5bcb72363c12f752caec064aebdcdfadeb
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "wide-int.h"
82 #include "context.h"
83 #include "pass_manager.h"
84 #include "target-globals.h"
85 #include "tree-vectorizer.h"
86 #include "shrink-wrap.h"
87 #include "builtins.h"
89 static rtx legitimize_dllimport_symbol (rtx, bool);
90 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
91 static rtx legitimize_pe_coff_symbol (rtx, bool);
93 #ifndef CHECK_STACK_LIMIT
94 #define CHECK_STACK_LIMIT (-1)
95 #endif
97 /* Return index of given mode in mult and division cost tables. */
98 #define MODE_INDEX(mode) \
99 ((mode) == QImode ? 0 \
100 : (mode) == HImode ? 1 \
101 : (mode) == SImode ? 2 \
102 : (mode) == DImode ? 3 \
103 : 4)
105 /* Processor costs (relative to an add) */
106 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
107 #define COSTS_N_BYTES(N) ((N) * 2)
109 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
111 static stringop_algs ix86_size_memcpy[2] = {
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
113 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 static stringop_algs ix86_size_memset[2] = {
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
116 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
118 const
119 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
120 COSTS_N_BYTES (2), /* cost of an add instruction */
121 COSTS_N_BYTES (3), /* cost of a lea instruction */
122 COSTS_N_BYTES (2), /* variable shift costs */
123 COSTS_N_BYTES (3), /* constant shift costs */
124 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
125 COSTS_N_BYTES (3), /* HI */
126 COSTS_N_BYTES (3), /* SI */
127 COSTS_N_BYTES (3), /* DI */
128 COSTS_N_BYTES (5)}, /* other */
129 0, /* cost of multiply per each bit set */
130 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
131 COSTS_N_BYTES (3), /* HI */
132 COSTS_N_BYTES (3), /* SI */
133 COSTS_N_BYTES (3), /* DI */
134 COSTS_N_BYTES (5)}, /* other */
135 COSTS_N_BYTES (3), /* cost of movsx */
136 COSTS_N_BYTES (3), /* cost of movzx */
137 0, /* "large" insn */
138 2, /* MOVE_RATIO */
139 2, /* cost for loading QImode using movzbl */
140 {2, 2, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 2, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {2, 2, 2}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {2, 2, 2}, /* cost of storing fp registers
148 in SFmode, DFmode and XFmode */
149 3, /* cost of moving MMX register */
150 {3, 3}, /* cost of loading MMX registers
151 in SImode and DImode */
152 {3, 3}, /* cost of storing MMX registers
153 in SImode and DImode */
154 3, /* cost of moving SSE register */
155 {3, 3, 3}, /* cost of loading SSE registers
156 in SImode, DImode and TImode */
157 {3, 3, 3}, /* cost of storing SSE registers
158 in SImode, DImode and TImode */
159 3, /* MMX or SSE register to integer */
160 0, /* size of l1 cache */
161 0, /* size of l2 cache */
162 0, /* size of prefetch block */
163 0, /* number of parallel prefetches */
164 2, /* Branch cost */
165 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
166 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
167 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
168 COSTS_N_BYTES (2), /* cost of FABS instruction. */
169 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
170 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
171 ix86_size_memcpy,
172 ix86_size_memset,
173 1, /* scalar_stmt_cost. */
174 1, /* scalar load_cost. */
175 1, /* scalar_store_cost. */
176 1, /* vec_stmt_cost. */
177 1, /* vec_to_scalar_cost. */
178 1, /* scalar_to_vec_cost. */
179 1, /* vec_align_load_cost. */
180 1, /* vec_unalign_load_cost. */
181 1, /* vec_store_cost. */
182 1, /* cond_taken_branch_cost. */
183 1, /* cond_not_taken_branch_cost. */
186 /* Processor costs (relative to an add) */
187 static stringop_algs i386_memcpy[2] = {
188 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
189 DUMMY_STRINGOP_ALGS};
190 static stringop_algs i386_memset[2] = {
191 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
192 DUMMY_STRINGOP_ALGS};
194 static const
195 struct processor_costs i386_cost = { /* 386 specific costs */
196 COSTS_N_INSNS (1), /* cost of an add instruction */
197 COSTS_N_INSNS (1), /* cost of a lea instruction */
198 COSTS_N_INSNS (3), /* variable shift costs */
199 COSTS_N_INSNS (2), /* constant shift costs */
200 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
201 COSTS_N_INSNS (6), /* HI */
202 COSTS_N_INSNS (6), /* SI */
203 COSTS_N_INSNS (6), /* DI */
204 COSTS_N_INSNS (6)}, /* other */
205 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
206 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
207 COSTS_N_INSNS (23), /* HI */
208 COSTS_N_INSNS (23), /* SI */
209 COSTS_N_INSNS (23), /* DI */
210 COSTS_N_INSNS (23)}, /* other */
211 COSTS_N_INSNS (3), /* cost of movsx */
212 COSTS_N_INSNS (2), /* cost of movzx */
213 15, /* "large" insn */
214 3, /* MOVE_RATIO */
215 4, /* cost for loading QImode using movzbl */
216 {2, 4, 2}, /* cost of loading integer registers
217 in QImode, HImode and SImode.
218 Relative to reg-reg move (2). */
219 {2, 4, 2}, /* cost of storing integer registers */
220 2, /* cost of reg,reg fld/fst */
221 {8, 8, 8}, /* cost of loading fp registers
222 in SFmode, DFmode and XFmode */
223 {8, 8, 8}, /* cost of storing fp registers
224 in SFmode, DFmode and XFmode */
225 2, /* cost of moving MMX register */
226 {4, 8}, /* cost of loading MMX registers
227 in SImode and DImode */
228 {4, 8}, /* cost of storing MMX registers
229 in SImode and DImode */
230 2, /* cost of moving SSE register */
231 {4, 8, 16}, /* cost of loading SSE registers
232 in SImode, DImode and TImode */
233 {4, 8, 16}, /* cost of storing SSE registers
234 in SImode, DImode and TImode */
235 3, /* MMX or SSE register to integer */
236 0, /* size of l1 cache */
237 0, /* size of l2 cache */
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
240 1, /* Branch cost */
241 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (22), /* cost of FABS instruction. */
245 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
247 i386_memcpy,
248 i386_memset,
249 1, /* scalar_stmt_cost. */
250 1, /* scalar load_cost. */
251 1, /* scalar_store_cost. */
252 1, /* vec_stmt_cost. */
253 1, /* vec_to_scalar_cost. */
254 1, /* scalar_to_vec_cost. */
255 1, /* vec_align_load_cost. */
256 2, /* vec_unalign_load_cost. */
257 1, /* vec_store_cost. */
258 3, /* cond_taken_branch_cost. */
259 1, /* cond_not_taken_branch_cost. */
262 static stringop_algs i486_memcpy[2] = {
263 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
264 DUMMY_STRINGOP_ALGS};
265 static stringop_algs i486_memset[2] = {
266 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
267 DUMMY_STRINGOP_ALGS};
269 static const
270 struct processor_costs i486_cost = { /* 486 specific costs */
271 COSTS_N_INSNS (1), /* cost of an add instruction */
272 COSTS_N_INSNS (1), /* cost of a lea instruction */
273 COSTS_N_INSNS (3), /* variable shift costs */
274 COSTS_N_INSNS (2), /* constant shift costs */
275 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
276 COSTS_N_INSNS (12), /* HI */
277 COSTS_N_INSNS (12), /* SI */
278 COSTS_N_INSNS (12), /* DI */
279 COSTS_N_INSNS (12)}, /* other */
280 1, /* cost of multiply per each bit set */
281 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
282 COSTS_N_INSNS (40), /* HI */
283 COSTS_N_INSNS (40), /* SI */
284 COSTS_N_INSNS (40), /* DI */
285 COSTS_N_INSNS (40)}, /* other */
286 COSTS_N_INSNS (3), /* cost of movsx */
287 COSTS_N_INSNS (2), /* cost of movzx */
288 15, /* "large" insn */
289 3, /* MOVE_RATIO */
290 4, /* cost for loading QImode using movzbl */
291 {2, 4, 2}, /* cost of loading integer registers
292 in QImode, HImode and SImode.
293 Relative to reg-reg move (2). */
294 {2, 4, 2}, /* cost of storing integer registers */
295 2, /* cost of reg,reg fld/fst */
296 {8, 8, 8}, /* cost of loading fp registers
297 in SFmode, DFmode and XFmode */
298 {8, 8, 8}, /* cost of storing fp registers
299 in SFmode, DFmode and XFmode */
300 2, /* cost of moving MMX register */
301 {4, 8}, /* cost of loading MMX registers
302 in SImode and DImode */
303 {4, 8}, /* cost of storing MMX registers
304 in SImode and DImode */
305 2, /* cost of moving SSE register */
306 {4, 8, 16}, /* cost of loading SSE registers
307 in SImode, DImode and TImode */
308 {4, 8, 16}, /* cost of storing SSE registers
309 in SImode, DImode and TImode */
310 3, /* MMX or SSE register to integer */
311 4, /* size of l1 cache. 486 has 8kB cache
312 shared for code and data, so 4kB is
313 not really precise. */
314 4, /* size of l2 cache */
315 0, /* size of prefetch block */
316 0, /* number of parallel prefetches */
317 1, /* Branch cost */
318 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
319 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
320 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
321 COSTS_N_INSNS (3), /* cost of FABS instruction. */
322 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
323 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
324 i486_memcpy,
325 i486_memset,
326 1, /* scalar_stmt_cost. */
327 1, /* scalar load_cost. */
328 1, /* scalar_store_cost. */
329 1, /* vec_stmt_cost. */
330 1, /* vec_to_scalar_cost. */
331 1, /* scalar_to_vec_cost. */
332 1, /* vec_align_load_cost. */
333 2, /* vec_unalign_load_cost. */
334 1, /* vec_store_cost. */
335 3, /* cond_taken_branch_cost. */
336 1, /* cond_not_taken_branch_cost. */
339 static stringop_algs pentium_memcpy[2] = {
340 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
341 DUMMY_STRINGOP_ALGS};
342 static stringop_algs pentium_memset[2] = {
343 {libcall, {{-1, rep_prefix_4_byte, false}}},
344 DUMMY_STRINGOP_ALGS};
346 static const
347 struct processor_costs pentium_cost = {
348 COSTS_N_INSNS (1), /* cost of an add instruction */
349 COSTS_N_INSNS (1), /* cost of a lea instruction */
350 COSTS_N_INSNS (4), /* variable shift costs */
351 COSTS_N_INSNS (1), /* constant shift costs */
352 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
353 COSTS_N_INSNS (11), /* HI */
354 COSTS_N_INSNS (11), /* SI */
355 COSTS_N_INSNS (11), /* DI */
356 COSTS_N_INSNS (11)}, /* other */
357 0, /* cost of multiply per each bit set */
358 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
359 COSTS_N_INSNS (25), /* HI */
360 COSTS_N_INSNS (25), /* SI */
361 COSTS_N_INSNS (25), /* DI */
362 COSTS_N_INSNS (25)}, /* other */
363 COSTS_N_INSNS (3), /* cost of movsx */
364 COSTS_N_INSNS (2), /* cost of movzx */
365 8, /* "large" insn */
366 6, /* MOVE_RATIO */
367 6, /* cost for loading QImode using movzbl */
368 {2, 4, 2}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 4, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of storing fp registers
376 in SFmode, DFmode and XFmode */
377 8, /* cost of moving MMX register */
378 {8, 8}, /* cost of loading MMX registers
379 in SImode and DImode */
380 {8, 8}, /* cost of storing MMX registers
381 in SImode and DImode */
382 2, /* cost of moving SSE register */
383 {4, 8, 16}, /* cost of loading SSE registers
384 in SImode, DImode and TImode */
385 {4, 8, 16}, /* cost of storing SSE registers
386 in SImode, DImode and TImode */
387 3, /* MMX or SSE register to integer */
388 8, /* size of l1 cache. */
389 8, /* size of l2 cache */
390 0, /* size of prefetch block */
391 0, /* number of parallel prefetches */
392 2, /* Branch cost */
393 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
394 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
395 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
396 COSTS_N_INSNS (1), /* cost of FABS instruction. */
397 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
398 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
399 pentium_memcpy,
400 pentium_memset,
401 1, /* scalar_stmt_cost. */
402 1, /* scalar load_cost. */
403 1, /* scalar_store_cost. */
404 1, /* vec_stmt_cost. */
405 1, /* vec_to_scalar_cost. */
406 1, /* scalar_to_vec_cost. */
407 1, /* vec_align_load_cost. */
408 2, /* vec_unalign_load_cost. */
409 1, /* vec_store_cost. */
410 3, /* cond_taken_branch_cost. */
411 1, /* cond_not_taken_branch_cost. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
415 (we ensure the alignment). For small blocks inline loop is still a
416 noticeable win, for bigger blocks either rep movsl or rep movsb is
417 way to go. Rep movsb has apparently more expensive startup time in CPU,
418 but after 4K the difference is down in the noise. */
419 static stringop_algs pentiumpro_memcpy[2] = {
420 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
421 {8192, rep_prefix_4_byte, false},
422 {-1, rep_prefix_1_byte, false}}},
423 DUMMY_STRINGOP_ALGS};
424 static stringop_algs pentiumpro_memset[2] = {
425 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
426 {8192, rep_prefix_4_byte, false},
427 {-1, libcall, false}}},
428 DUMMY_STRINGOP_ALGS};
429 static const
430 struct processor_costs pentiumpro_cost = {
431 COSTS_N_INSNS (1), /* cost of an add instruction */
432 COSTS_N_INSNS (1), /* cost of a lea instruction */
433 COSTS_N_INSNS (1), /* variable shift costs */
434 COSTS_N_INSNS (1), /* constant shift costs */
435 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
436 COSTS_N_INSNS (4), /* HI */
437 COSTS_N_INSNS (4), /* SI */
438 COSTS_N_INSNS (4), /* DI */
439 COSTS_N_INSNS (4)}, /* other */
440 0, /* cost of multiply per each bit set */
441 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
442 COSTS_N_INSNS (17), /* HI */
443 COSTS_N_INSNS (17), /* SI */
444 COSTS_N_INSNS (17), /* DI */
445 COSTS_N_INSNS (17)}, /* other */
446 COSTS_N_INSNS (1), /* cost of movsx */
447 COSTS_N_INSNS (1), /* cost of movzx */
448 8, /* "large" insn */
449 6, /* MOVE_RATIO */
450 2, /* cost for loading QImode using movzbl */
451 {4, 4, 4}, /* cost of loading integer registers
452 in QImode, HImode and SImode.
453 Relative to reg-reg move (2). */
454 {2, 2, 2}, /* cost of storing integer registers */
455 2, /* cost of reg,reg fld/fst */
456 {2, 2, 6}, /* cost of loading fp registers
457 in SFmode, DFmode and XFmode */
458 {4, 4, 6}, /* cost of storing fp registers
459 in SFmode, DFmode and XFmode */
460 2, /* cost of moving MMX register */
461 {2, 2}, /* cost of loading MMX registers
462 in SImode and DImode */
463 {2, 2}, /* cost of storing MMX registers
464 in SImode and DImode */
465 2, /* cost of moving SSE register */
466 {2, 2, 8}, /* cost of loading SSE registers
467 in SImode, DImode and TImode */
468 {2, 2, 8}, /* cost of storing SSE registers
469 in SImode, DImode and TImode */
470 3, /* MMX or SSE register to integer */
471 8, /* size of l1 cache. */
472 256, /* size of l2 cache */
473 32, /* size of prefetch block */
474 6, /* number of parallel prefetches */
475 2, /* Branch cost */
476 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
477 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
478 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
479 COSTS_N_INSNS (2), /* cost of FABS instruction. */
480 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
481 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
482 pentiumpro_memcpy,
483 pentiumpro_memset,
484 1, /* scalar_stmt_cost. */
485 1, /* scalar load_cost. */
486 1, /* scalar_store_cost. */
487 1, /* vec_stmt_cost. */
488 1, /* vec_to_scalar_cost. */
489 1, /* scalar_to_vec_cost. */
490 1, /* vec_align_load_cost. */
491 2, /* vec_unalign_load_cost. */
492 1, /* vec_store_cost. */
493 3, /* cond_taken_branch_cost. */
494 1, /* cond_not_taken_branch_cost. */
497 static stringop_algs geode_memcpy[2] = {
498 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
499 DUMMY_STRINGOP_ALGS};
500 static stringop_algs geode_memset[2] = {
501 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
502 DUMMY_STRINGOP_ALGS};
503 static const
504 struct processor_costs geode_cost = {
505 COSTS_N_INSNS (1), /* cost of an add instruction */
506 COSTS_N_INSNS (1), /* cost of a lea instruction */
507 COSTS_N_INSNS (2), /* variable shift costs */
508 COSTS_N_INSNS (1), /* constant shift costs */
509 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
510 COSTS_N_INSNS (4), /* HI */
511 COSTS_N_INSNS (7), /* SI */
512 COSTS_N_INSNS (7), /* DI */
513 COSTS_N_INSNS (7)}, /* other */
514 0, /* cost of multiply per each bit set */
515 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
516 COSTS_N_INSNS (23), /* HI */
517 COSTS_N_INSNS (39), /* SI */
518 COSTS_N_INSNS (39), /* DI */
519 COSTS_N_INSNS (39)}, /* other */
520 COSTS_N_INSNS (1), /* cost of movsx */
521 COSTS_N_INSNS (1), /* cost of movzx */
522 8, /* "large" insn */
523 4, /* MOVE_RATIO */
524 1, /* cost for loading QImode using movzbl */
525 {1, 1, 1}, /* cost of loading integer registers
526 in QImode, HImode and SImode.
527 Relative to reg-reg move (2). */
528 {1, 1, 1}, /* cost of storing integer registers */
529 1, /* cost of reg,reg fld/fst */
530 {1, 1, 1}, /* cost of loading fp registers
531 in SFmode, DFmode and XFmode */
532 {4, 6, 6}, /* cost of storing fp registers
533 in SFmode, DFmode and XFmode */
535 1, /* cost of moving MMX register */
536 {1, 1}, /* cost of loading MMX registers
537 in SImode and DImode */
538 {1, 1}, /* cost of storing MMX registers
539 in SImode and DImode */
540 1, /* cost of moving SSE register */
541 {1, 1, 1}, /* cost of loading SSE registers
542 in SImode, DImode and TImode */
543 {1, 1, 1}, /* cost of storing SSE registers
544 in SImode, DImode and TImode */
545 1, /* MMX or SSE register to integer */
546 64, /* size of l1 cache. */
547 128, /* size of l2 cache. */
548 32, /* size of prefetch block */
549 1, /* number of parallel prefetches */
550 1, /* Branch cost */
551 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
552 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
553 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
554 COSTS_N_INSNS (1), /* cost of FABS instruction. */
555 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
556 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
557 geode_memcpy,
558 geode_memset,
559 1, /* scalar_stmt_cost. */
560 1, /* scalar load_cost. */
561 1, /* scalar_store_cost. */
562 1, /* vec_stmt_cost. */
563 1, /* vec_to_scalar_cost. */
564 1, /* scalar_to_vec_cost. */
565 1, /* vec_align_load_cost. */
566 2, /* vec_unalign_load_cost. */
567 1, /* vec_store_cost. */
568 3, /* cond_taken_branch_cost. */
569 1, /* cond_not_taken_branch_cost. */
572 static stringop_algs k6_memcpy[2] = {
573 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
574 DUMMY_STRINGOP_ALGS};
575 static stringop_algs k6_memset[2] = {
576 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
577 DUMMY_STRINGOP_ALGS};
578 static const
579 struct processor_costs k6_cost = {
580 COSTS_N_INSNS (1), /* cost of an add instruction */
581 COSTS_N_INSNS (2), /* cost of a lea instruction */
582 COSTS_N_INSNS (1), /* variable shift costs */
583 COSTS_N_INSNS (1), /* constant shift costs */
584 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
585 COSTS_N_INSNS (3), /* HI */
586 COSTS_N_INSNS (3), /* SI */
587 COSTS_N_INSNS (3), /* DI */
588 COSTS_N_INSNS (3)}, /* other */
589 0, /* cost of multiply per each bit set */
590 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
591 COSTS_N_INSNS (18), /* HI */
592 COSTS_N_INSNS (18), /* SI */
593 COSTS_N_INSNS (18), /* DI */
594 COSTS_N_INSNS (18)}, /* other */
595 COSTS_N_INSNS (2), /* cost of movsx */
596 COSTS_N_INSNS (2), /* cost of movzx */
597 8, /* "large" insn */
598 4, /* MOVE_RATIO */
599 3, /* cost for loading QImode using movzbl */
600 {4, 5, 4}, /* cost of loading integer registers
601 in QImode, HImode and SImode.
602 Relative to reg-reg move (2). */
603 {2, 3, 2}, /* cost of storing integer registers */
604 4, /* cost of reg,reg fld/fst */
605 {6, 6, 6}, /* cost of loading fp registers
606 in SFmode, DFmode and XFmode */
607 {4, 4, 4}, /* cost of storing fp registers
608 in SFmode, DFmode and XFmode */
609 2, /* cost of moving MMX register */
610 {2, 2}, /* cost of loading MMX registers
611 in SImode and DImode */
612 {2, 2}, /* cost of storing MMX registers
613 in SImode and DImode */
614 2, /* cost of moving SSE register */
615 {2, 2, 8}, /* cost of loading SSE registers
616 in SImode, DImode and TImode */
617 {2, 2, 8}, /* cost of storing SSE registers
618 in SImode, DImode and TImode */
619 6, /* MMX or SSE register to integer */
620 32, /* size of l1 cache. */
621 32, /* size of l2 cache. Some models
622 have integrated l2 cache, but
623 optimizing for k6 is not important
624 enough to worry about that. */
625 32, /* size of prefetch block */
626 1, /* number of parallel prefetches */
627 1, /* Branch cost */
628 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (2), /* cost of FABS instruction. */
632 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
634 k6_memcpy,
635 k6_memset,
636 1, /* scalar_stmt_cost. */
637 1, /* scalar load_cost. */
638 1, /* scalar_store_cost. */
639 1, /* vec_stmt_cost. */
640 1, /* vec_to_scalar_cost. */
641 1, /* scalar_to_vec_cost. */
642 1, /* vec_align_load_cost. */
643 2, /* vec_unalign_load_cost. */
644 1, /* vec_store_cost. */
645 3, /* cond_taken_branch_cost. */
646 1, /* cond_not_taken_branch_cost. */
649 /* For some reason, Athlon deals better with REP prefix (relative to loops)
650 compared to K8. Alignment becomes important after 8 bytes for memcpy and
651 128 bytes for memset. */
652 static stringop_algs athlon_memcpy[2] = {
653 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
654 DUMMY_STRINGOP_ALGS};
655 static stringop_algs athlon_memset[2] = {
656 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
657 DUMMY_STRINGOP_ALGS};
658 static const
659 struct processor_costs athlon_cost = {
660 COSTS_N_INSNS (1), /* cost of an add instruction */
661 COSTS_N_INSNS (2), /* cost of a lea instruction */
662 COSTS_N_INSNS (1), /* variable shift costs */
663 COSTS_N_INSNS (1), /* constant shift costs */
664 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
665 COSTS_N_INSNS (5), /* HI */
666 COSTS_N_INSNS (5), /* SI */
667 COSTS_N_INSNS (5), /* DI */
668 COSTS_N_INSNS (5)}, /* other */
669 0, /* cost of multiply per each bit set */
670 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
671 COSTS_N_INSNS (26), /* HI */
672 COSTS_N_INSNS (42), /* SI */
673 COSTS_N_INSNS (74), /* DI */
674 COSTS_N_INSNS (74)}, /* other */
675 COSTS_N_INSNS (1), /* cost of movsx */
676 COSTS_N_INSNS (1), /* cost of movzx */
677 8, /* "large" insn */
678 9, /* MOVE_RATIO */
679 4, /* cost for loading QImode using movzbl */
680 {3, 4, 3}, /* cost of loading integer registers
681 in QImode, HImode and SImode.
682 Relative to reg-reg move (2). */
683 {3, 4, 3}, /* cost of storing integer registers */
684 4, /* cost of reg,reg fld/fst */
685 {4, 4, 12}, /* cost of loading fp registers
686 in SFmode, DFmode and XFmode */
687 {6, 6, 8}, /* cost of storing fp registers
688 in SFmode, DFmode and XFmode */
689 2, /* cost of moving MMX register */
690 {4, 4}, /* cost of loading MMX registers
691 in SImode and DImode */
692 {4, 4}, /* cost of storing MMX registers
693 in SImode and DImode */
694 2, /* cost of moving SSE register */
695 {4, 4, 6}, /* cost of loading SSE registers
696 in SImode, DImode and TImode */
697 {4, 4, 5}, /* cost of storing SSE registers
698 in SImode, DImode and TImode */
699 5, /* MMX or SSE register to integer */
700 64, /* size of l1 cache. */
701 256, /* size of l2 cache. */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 5, /* Branch cost */
705 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (2), /* cost of FABS instruction. */
709 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
711 athlon_memcpy,
712 athlon_memset,
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
726 /* K8 has optimized REP instruction for medium sized blocks, but for very
727 small blocks it is better to use loop. For large blocks, libcall can
728 do nontemporary accesses and beat inline considerably. */
729 static stringop_algs k8_memcpy[2] = {
730 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
731 {-1, rep_prefix_4_byte, false}}},
732 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
733 {-1, libcall, false}}}};
734 static stringop_algs k8_memset[2] = {
735 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
736 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
737 {libcall, {{48, unrolled_loop, false},
738 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
739 static const
740 struct processor_costs k8_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (2), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (4), /* HI */
747 COSTS_N_INSNS (3), /* SI */
748 COSTS_N_INSNS (4), /* DI */
749 COSTS_N_INSNS (5)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (26), /* HI */
753 COSTS_N_INSNS (42), /* SI */
754 COSTS_N_INSNS (74), /* DI */
755 COSTS_N_INSNS (74)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 8, /* "large" insn */
759 9, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {3, 4, 3}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {3, 4, 3}, /* cost of storing integer registers */
765 4, /* cost of reg,reg fld/fst */
766 {4, 4, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {6, 6, 8}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 2, /* cost of moving MMX register */
771 {3, 3}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {4, 4}, /* cost of storing MMX registers
774 in SImode and DImode */
775 2, /* cost of moving SSE register */
776 {4, 3, 6}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {4, 4, 5}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 5, /* MMX or SSE register to integer */
781 64, /* size of l1 cache. */
782 512, /* size of l2 cache. */
783 64, /* size of prefetch block */
784 /* New AMD processors never drop prefetches; if they cannot be performed
785 immediately, they are queued. We set number of simultaneous prefetches
786 to a large constant to reflect this (it probably is not a good idea not
787 to limit number of prefetches at all, as their execution also takes some
788 time). */
789 100, /* number of parallel prefetches */
790 3, /* Branch cost */
791 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
792 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
793 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
794 COSTS_N_INSNS (2), /* cost of FABS instruction. */
795 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
796 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
798 k8_memcpy,
799 k8_memset,
800 4, /* scalar_stmt_cost. */
801 2, /* scalar load_cost. */
802 2, /* scalar_store_cost. */
803 5, /* vec_stmt_cost. */
804 0, /* vec_to_scalar_cost. */
805 2, /* scalar_to_vec_cost. */
806 2, /* vec_align_load_cost. */
807 3, /* vec_unalign_load_cost. */
808 3, /* vec_store_cost. */
809 3, /* cond_taken_branch_cost. */
810 2, /* cond_not_taken_branch_cost. */
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 static stringop_algs amdfam10_memcpy[2] = {
817 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
818 {-1, rep_prefix_4_byte, false}}},
819 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
820 {-1, libcall, false}}}};
821 static stringop_algs amdfam10_memset[2] = {
822 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
823 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
824 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
825 {-1, libcall, false}}}};
826 struct processor_costs amdfam10_cost = {
827 COSTS_N_INSNS (1), /* cost of an add instruction */
828 COSTS_N_INSNS (2), /* cost of a lea instruction */
829 COSTS_N_INSNS (1), /* variable shift costs */
830 COSTS_N_INSNS (1), /* constant shift costs */
831 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
832 COSTS_N_INSNS (4), /* HI */
833 COSTS_N_INSNS (3), /* SI */
834 COSTS_N_INSNS (4), /* DI */
835 COSTS_N_INSNS (5)}, /* other */
836 0, /* cost of multiply per each bit set */
837 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
838 COSTS_N_INSNS (35), /* HI */
839 COSTS_N_INSNS (51), /* SI */
840 COSTS_N_INSNS (83), /* DI */
841 COSTS_N_INSNS (83)}, /* other */
842 COSTS_N_INSNS (1), /* cost of movsx */
843 COSTS_N_INSNS (1), /* cost of movzx */
844 8, /* "large" insn */
845 9, /* MOVE_RATIO */
846 4, /* cost for loading QImode using movzbl */
847 {3, 4, 3}, /* cost of loading integer registers
848 in QImode, HImode and SImode.
849 Relative to reg-reg move (2). */
850 {3, 4, 3}, /* cost of storing integer registers */
851 4, /* cost of reg,reg fld/fst */
852 {4, 4, 12}, /* cost of loading fp registers
853 in SFmode, DFmode and XFmode */
854 {6, 6, 8}, /* cost of storing fp registers
855 in SFmode, DFmode and XFmode */
856 2, /* cost of moving MMX register */
857 {3, 3}, /* cost of loading MMX registers
858 in SImode and DImode */
859 {4, 4}, /* cost of storing MMX registers
860 in SImode and DImode */
861 2, /* cost of moving SSE register */
862 {4, 4, 3}, /* cost of loading SSE registers
863 in SImode, DImode and TImode */
864 {4, 4, 5}, /* cost of storing SSE registers
865 in SImode, DImode and TImode */
866 3, /* MMX or SSE register to integer */
867 /* On K8:
868 MOVD reg64, xmmreg Double FSTORE 4
869 MOVD reg32, xmmreg Double FSTORE 4
870 On AMDFAM10:
871 MOVD reg64, xmmreg Double FADD 3
872 1/1 1/1
873 MOVD reg32, xmmreg Double FADD 3
874 1/1 1/1 */
875 64, /* size of l1 cache. */
876 512, /* size of l2 cache. */
877 64, /* size of prefetch block */
878 /* New AMD processors never drop prefetches; if they cannot be performed
879 immediately, they are queued. We set number of simultaneous prefetches
880 to a large constant to reflect this (it probably is not a good idea not
881 to limit number of prefetches at all, as their execution also takes some
882 time). */
883 100, /* number of parallel prefetches */
884 2, /* Branch cost */
885 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
886 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
887 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
888 COSTS_N_INSNS (2), /* cost of FABS instruction. */
889 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
890 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
892 amdfam10_memcpy,
893 amdfam10_memset,
894 4, /* scalar_stmt_cost. */
895 2, /* scalar load_cost. */
896 2, /* scalar_store_cost. */
897 6, /* vec_stmt_cost. */
898 0, /* vec_to_scalar_cost. */
899 2, /* scalar_to_vec_cost. */
900 2, /* vec_align_load_cost. */
901 2, /* vec_unalign_load_cost. */
902 2, /* vec_store_cost. */
903 2, /* cond_taken_branch_cost. */
904 1, /* cond_not_taken_branch_cost. */
907 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
908 very small blocks it is better to use loop. For large blocks, libcall
909 can do nontemporary accesses and beat inline considerably. */
910 static stringop_algs bdver1_memcpy[2] = {
911 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
912 {-1, rep_prefix_4_byte, false}}},
913 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
914 {-1, libcall, false}}}};
915 static stringop_algs bdver1_memset[2] = {
916 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
917 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
918 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
919 {-1, libcall, false}}}};
921 const struct processor_costs bdver1_cost = {
922 COSTS_N_INSNS (1), /* cost of an add instruction */
923 COSTS_N_INSNS (1), /* cost of a lea instruction */
924 COSTS_N_INSNS (1), /* variable shift costs */
925 COSTS_N_INSNS (1), /* constant shift costs */
926 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
927 COSTS_N_INSNS (4), /* HI */
928 COSTS_N_INSNS (4), /* SI */
929 COSTS_N_INSNS (6), /* DI */
930 COSTS_N_INSNS (6)}, /* other */
931 0, /* cost of multiply per each bit set */
932 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
933 COSTS_N_INSNS (35), /* HI */
934 COSTS_N_INSNS (51), /* SI */
935 COSTS_N_INSNS (83), /* DI */
936 COSTS_N_INSNS (83)}, /* other */
937 COSTS_N_INSNS (1), /* cost of movsx */
938 COSTS_N_INSNS (1), /* cost of movzx */
939 8, /* "large" insn */
940 9, /* MOVE_RATIO */
941 4, /* cost for loading QImode using movzbl */
942 {5, 5, 4}, /* cost of loading integer registers
943 in QImode, HImode and SImode.
944 Relative to reg-reg move (2). */
945 {4, 4, 4}, /* cost of storing integer registers */
946 2, /* cost of reg,reg fld/fst */
947 {5, 5, 12}, /* cost of loading fp registers
948 in SFmode, DFmode and XFmode */
949 {4, 4, 8}, /* cost of storing fp registers
950 in SFmode, DFmode and XFmode */
951 2, /* cost of moving MMX register */
952 {4, 4}, /* cost of loading MMX registers
953 in SImode and DImode */
954 {4, 4}, /* cost of storing MMX registers
955 in SImode and DImode */
956 2, /* cost of moving SSE register */
957 {4, 4, 4}, /* cost of loading SSE registers
958 in SImode, DImode and TImode */
959 {4, 4, 4}, /* cost of storing SSE registers
960 in SImode, DImode and TImode */
961 2, /* MMX or SSE register to integer */
962 /* On K8:
963 MOVD reg64, xmmreg Double FSTORE 4
964 MOVD reg32, xmmreg Double FSTORE 4
965 On AMDFAM10:
966 MOVD reg64, xmmreg Double FADD 3
967 1/1 1/1
968 MOVD reg32, xmmreg Double FADD 3
969 1/1 1/1 */
970 16, /* size of l1 cache. */
971 2048, /* size of l2 cache. */
972 64, /* size of prefetch block */
973 /* New AMD processors never drop prefetches; if they cannot be performed
974 immediately, they are queued. We set number of simultaneous prefetches
975 to a large constant to reflect this (it probably is not a good idea not
976 to limit number of prefetches at all, as their execution also takes some
977 time). */
978 100, /* number of parallel prefetches */
979 2, /* Branch cost */
980 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
981 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
982 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
983 COSTS_N_INSNS (2), /* cost of FABS instruction. */
984 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
985 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
987 bdver1_memcpy,
988 bdver1_memset,
989 6, /* scalar_stmt_cost. */
990 4, /* scalar load_cost. */
991 4, /* scalar_store_cost. */
992 6, /* vec_stmt_cost. */
993 0, /* vec_to_scalar_cost. */
994 2, /* scalar_to_vec_cost. */
995 4, /* vec_align_load_cost. */
996 4, /* vec_unalign_load_cost. */
997 4, /* vec_store_cost. */
998 2, /* cond_taken_branch_cost. */
999 1, /* cond_not_taken_branch_cost. */
1002 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1003 very small blocks it is better to use loop. For large blocks, libcall
1004 can do nontemporary accesses and beat inline considerably. */
1006 static stringop_algs bdver2_memcpy[2] = {
1007 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1008 {-1, rep_prefix_4_byte, false}}},
1009 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1010 {-1, libcall, false}}}};
1011 static stringop_algs bdver2_memset[2] = {
1012 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1013 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1014 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1015 {-1, libcall, false}}}};
1017 const struct processor_costs bdver2_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1036 9, /* MOVE_RATIO */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 /* On K8:
1059 MOVD reg64, xmmreg Double FSTORE 4
1060 MOVD reg32, xmmreg Double FSTORE 4
1061 On AMDFAM10:
1062 MOVD reg64, xmmreg Double FADD 3
1063 1/1 1/1
1064 MOVD reg32, xmmreg Double FADD 3
1065 1/1 1/1 */
1066 16, /* size of l1 cache. */
1067 2048, /* size of l2 cache. */
1068 64, /* size of prefetch block */
1069 /* New AMD processors never drop prefetches; if they cannot be performed
1070 immediately, they are queued. We set number of simultaneous prefetches
1071 to a large constant to reflect this (it probably is not a good idea not
1072 to limit number of prefetches at all, as their execution also takes some
1073 time). */
1074 100, /* number of parallel prefetches */
1075 2, /* Branch cost */
1076 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1077 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1078 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1079 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1080 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1081 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1083 bdver2_memcpy,
1084 bdver2_memset,
1085 6, /* scalar_stmt_cost. */
1086 4, /* scalar load_cost. */
1087 4, /* scalar_store_cost. */
1088 6, /* vec_stmt_cost. */
1089 0, /* vec_to_scalar_cost. */
1090 2, /* scalar_to_vec_cost. */
1091 4, /* vec_align_load_cost. */
1092 4, /* vec_unalign_load_cost. */
1093 4, /* vec_store_cost. */
1094 2, /* cond_taken_branch_cost. */
1095 1, /* cond_not_taken_branch_cost. */
1099 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1100 very small blocks it is better to use loop. For large blocks, libcall
1101 can do nontemporary accesses and beat inline considerably. */
1102 static stringop_algs bdver3_memcpy[2] = {
1103 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1104 {-1, rep_prefix_4_byte, false}}},
1105 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1106 {-1, libcall, false}}}};
1107 static stringop_algs bdver3_memset[2] = {
1108 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1109 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1110 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1111 {-1, libcall, false}}}};
1112 struct processor_costs bdver3_cost = {
1113 COSTS_N_INSNS (1), /* cost of an add instruction */
1114 COSTS_N_INSNS (1), /* cost of a lea instruction */
1115 COSTS_N_INSNS (1), /* variable shift costs */
1116 COSTS_N_INSNS (1), /* constant shift costs */
1117 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1118 COSTS_N_INSNS (4), /* HI */
1119 COSTS_N_INSNS (4), /* SI */
1120 COSTS_N_INSNS (6), /* DI */
1121 COSTS_N_INSNS (6)}, /* other */
1122 0, /* cost of multiply per each bit set */
1123 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1124 COSTS_N_INSNS (35), /* HI */
1125 COSTS_N_INSNS (51), /* SI */
1126 COSTS_N_INSNS (83), /* DI */
1127 COSTS_N_INSNS (83)}, /* other */
1128 COSTS_N_INSNS (1), /* cost of movsx */
1129 COSTS_N_INSNS (1), /* cost of movzx */
1130 8, /* "large" insn */
1131 9, /* MOVE_RATIO */
1132 4, /* cost for loading QImode using movzbl */
1133 {5, 5, 4}, /* cost of loading integer registers
1134 in QImode, HImode and SImode.
1135 Relative to reg-reg move (2). */
1136 {4, 4, 4}, /* cost of storing integer registers */
1137 2, /* cost of reg,reg fld/fst */
1138 {5, 5, 12}, /* cost of loading fp registers
1139 in SFmode, DFmode and XFmode */
1140 {4, 4, 8}, /* cost of storing fp registers
1141 in SFmode, DFmode and XFmode */
1142 2, /* cost of moving MMX register */
1143 {4, 4}, /* cost of loading MMX registers
1144 in SImode and DImode */
1145 {4, 4}, /* cost of storing MMX registers
1146 in SImode and DImode */
1147 2, /* cost of moving SSE register */
1148 {4, 4, 4}, /* cost of loading SSE registers
1149 in SImode, DImode and TImode */
1150 {4, 4, 4}, /* cost of storing SSE registers
1151 in SImode, DImode and TImode */
1152 2, /* MMX or SSE register to integer */
1153 16, /* size of l1 cache. */
1154 2048, /* size of l2 cache. */
1155 64, /* size of prefetch block */
1156 /* New AMD processors never drop prefetches; if they cannot be performed
1157 immediately, they are queued. We set number of simultaneous prefetches
1158 to a large constant to reflect this (it probably is not a good idea not
1159 to limit number of prefetches at all, as their execution also takes some
1160 time). */
1161 100, /* number of parallel prefetches */
1162 2, /* Branch cost */
1163 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1164 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1165 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1166 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1167 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1168 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1170 bdver3_memcpy,
1171 bdver3_memset,
1172 6, /* scalar_stmt_cost. */
1173 4, /* scalar load_cost. */
1174 4, /* scalar_store_cost. */
1175 6, /* vec_stmt_cost. */
1176 0, /* vec_to_scalar_cost. */
1177 2, /* scalar_to_vec_cost. */
1178 4, /* vec_align_load_cost. */
1179 4, /* vec_unalign_load_cost. */
1180 4, /* vec_store_cost. */
1181 2, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1186 very small blocks it is better to use loop. For large blocks, libcall
1187 can do nontemporary accesses and beat inline considerably. */
1188 static stringop_algs bdver4_memcpy[2] = {
1189 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1190 {-1, rep_prefix_4_byte, false}}},
1191 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1192 {-1, libcall, false}}}};
1193 static stringop_algs bdver4_memset[2] = {
1194 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1195 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1196 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1197 {-1, libcall, false}}}};
1198 struct processor_costs bdver4_cost = {
1199 COSTS_N_INSNS (1), /* cost of an add instruction */
1200 COSTS_N_INSNS (1), /* cost of a lea instruction */
1201 COSTS_N_INSNS (1), /* variable shift costs */
1202 COSTS_N_INSNS (1), /* constant shift costs */
1203 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1204 COSTS_N_INSNS (4), /* HI */
1205 COSTS_N_INSNS (4), /* SI */
1206 COSTS_N_INSNS (6), /* DI */
1207 COSTS_N_INSNS (6)}, /* other */
1208 0, /* cost of multiply per each bit set */
1209 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1210 COSTS_N_INSNS (35), /* HI */
1211 COSTS_N_INSNS (51), /* SI */
1212 COSTS_N_INSNS (83), /* DI */
1213 COSTS_N_INSNS (83)}, /* other */
1214 COSTS_N_INSNS (1), /* cost of movsx */
1215 COSTS_N_INSNS (1), /* cost of movzx */
1216 8, /* "large" insn */
1217 9, /* MOVE_RATIO */
1218 4, /* cost for loading QImode using movzbl */
1219 {5, 5, 4}, /* cost of loading integer registers
1220 in QImode, HImode and SImode.
1221 Relative to reg-reg move (2). */
1222 {4, 4, 4}, /* cost of storing integer registers */
1223 2, /* cost of reg,reg fld/fst */
1224 {5, 5, 12}, /* cost of loading fp registers
1225 in SFmode, DFmode and XFmode */
1226 {4, 4, 8}, /* cost of storing fp registers
1227 in SFmode, DFmode and XFmode */
1228 2, /* cost of moving MMX register */
1229 {4, 4}, /* cost of loading MMX registers
1230 in SImode and DImode */
1231 {4, 4}, /* cost of storing MMX registers
1232 in SImode and DImode */
1233 2, /* cost of moving SSE register */
1234 {4, 4, 4}, /* cost of loading SSE registers
1235 in SImode, DImode and TImode */
1236 {4, 4, 4}, /* cost of storing SSE registers
1237 in SImode, DImode and TImode */
1238 2, /* MMX or SSE register to integer */
1239 16, /* size of l1 cache. */
1240 2048, /* size of l2 cache. */
1241 64, /* size of prefetch block */
1242 /* New AMD processors never drop prefetches; if they cannot be performed
1243 immediately, they are queued. We set number of simultaneous prefetches
1244 to a large constant to reflect this (it probably is not a good idea not
1245 to limit number of prefetches at all, as their execution also takes some
1246 time). */
1247 100, /* number of parallel prefetches */
1248 2, /* Branch cost */
1249 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1250 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1251 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1252 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1253 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1254 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1256 bdver4_memcpy,
1257 bdver4_memset,
1258 6, /* scalar_stmt_cost. */
1259 4, /* scalar load_cost. */
1260 4, /* scalar_store_cost. */
1261 6, /* vec_stmt_cost. */
1262 0, /* vec_to_scalar_cost. */
1263 2, /* scalar_to_vec_cost. */
1264 4, /* vec_align_load_cost. */
1265 4, /* vec_unalign_load_cost. */
1266 4, /* vec_store_cost. */
1267 2, /* cond_taken_branch_cost. */
1268 1, /* cond_not_taken_branch_cost. */
1271 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1272 very small blocks it is better to use loop. For large blocks, libcall can
1273 do nontemporary accesses and beat inline considerably. */
1274 static stringop_algs btver1_memcpy[2] = {
1275 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1276 {-1, rep_prefix_4_byte, false}}},
1277 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1278 {-1, libcall, false}}}};
1279 static stringop_algs btver1_memset[2] = {
1280 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1281 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1282 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1283 {-1, libcall, false}}}};
1284 const struct processor_costs btver1_cost = {
1285 COSTS_N_INSNS (1), /* cost of an add instruction */
1286 COSTS_N_INSNS (2), /* cost of a lea instruction */
1287 COSTS_N_INSNS (1), /* variable shift costs */
1288 COSTS_N_INSNS (1), /* constant shift costs */
1289 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1290 COSTS_N_INSNS (4), /* HI */
1291 COSTS_N_INSNS (3), /* SI */
1292 COSTS_N_INSNS (4), /* DI */
1293 COSTS_N_INSNS (5)}, /* other */
1294 0, /* cost of multiply per each bit set */
1295 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1296 COSTS_N_INSNS (35), /* HI */
1297 COSTS_N_INSNS (51), /* SI */
1298 COSTS_N_INSNS (83), /* DI */
1299 COSTS_N_INSNS (83)}, /* other */
1300 COSTS_N_INSNS (1), /* cost of movsx */
1301 COSTS_N_INSNS (1), /* cost of movzx */
1302 8, /* "large" insn */
1303 9, /* MOVE_RATIO */
1304 4, /* cost for loading QImode using movzbl */
1305 {3, 4, 3}, /* cost of loading integer registers
1306 in QImode, HImode and SImode.
1307 Relative to reg-reg move (2). */
1308 {3, 4, 3}, /* cost of storing integer registers */
1309 4, /* cost of reg,reg fld/fst */
1310 {4, 4, 12}, /* cost of loading fp registers
1311 in SFmode, DFmode and XFmode */
1312 {6, 6, 8}, /* cost of storing fp registers
1313 in SFmode, DFmode and XFmode */
1314 2, /* cost of moving MMX register */
1315 {3, 3}, /* cost of loading MMX registers
1316 in SImode and DImode */
1317 {4, 4}, /* cost of storing MMX registers
1318 in SImode and DImode */
1319 2, /* cost of moving SSE register */
1320 {4, 4, 3}, /* cost of loading SSE registers
1321 in SImode, DImode and TImode */
1322 {4, 4, 5}, /* cost of storing SSE registers
1323 in SImode, DImode and TImode */
1324 3, /* MMX or SSE register to integer */
1325 /* On K8:
1326 MOVD reg64, xmmreg Double FSTORE 4
1327 MOVD reg32, xmmreg Double FSTORE 4
1328 On AMDFAM10:
1329 MOVD reg64, xmmreg Double FADD 3
1330 1/1 1/1
1331 MOVD reg32, xmmreg Double FADD 3
1332 1/1 1/1 */
1333 32, /* size of l1 cache. */
1334 512, /* size of l2 cache. */
1335 64, /* size of prefetch block */
1336 100, /* number of parallel prefetches */
1337 2, /* Branch cost */
1338 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1339 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1340 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1341 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1342 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1343 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1345 btver1_memcpy,
1346 btver1_memset,
1347 4, /* scalar_stmt_cost. */
1348 2, /* scalar load_cost. */
1349 2, /* scalar_store_cost. */
1350 6, /* vec_stmt_cost. */
1351 0, /* vec_to_scalar_cost. */
1352 2, /* scalar_to_vec_cost. */
1353 2, /* vec_align_load_cost. */
1354 2, /* vec_unalign_load_cost. */
1355 2, /* vec_store_cost. */
1356 2, /* cond_taken_branch_cost. */
1357 1, /* cond_not_taken_branch_cost. */
1360 static stringop_algs btver2_memcpy[2] = {
1361 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1362 {-1, rep_prefix_4_byte, false}}},
1363 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1364 {-1, libcall, false}}}};
1365 static stringop_algs btver2_memset[2] = {
1366 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1367 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1368 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1369 {-1, libcall, false}}}};
1370 const struct processor_costs btver2_cost = {
1371 COSTS_N_INSNS (1), /* cost of an add instruction */
1372 COSTS_N_INSNS (2), /* cost of a lea instruction */
1373 COSTS_N_INSNS (1), /* variable shift costs */
1374 COSTS_N_INSNS (1), /* constant shift costs */
1375 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1376 COSTS_N_INSNS (4), /* HI */
1377 COSTS_N_INSNS (3), /* SI */
1378 COSTS_N_INSNS (4), /* DI */
1379 COSTS_N_INSNS (5)}, /* other */
1380 0, /* cost of multiply per each bit set */
1381 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1382 COSTS_N_INSNS (35), /* HI */
1383 COSTS_N_INSNS (51), /* SI */
1384 COSTS_N_INSNS (83), /* DI */
1385 COSTS_N_INSNS (83)}, /* other */
1386 COSTS_N_INSNS (1), /* cost of movsx */
1387 COSTS_N_INSNS (1), /* cost of movzx */
1388 8, /* "large" insn */
1389 9, /* MOVE_RATIO */
1390 4, /* cost for loading QImode using movzbl */
1391 {3, 4, 3}, /* cost of loading integer registers
1392 in QImode, HImode and SImode.
1393 Relative to reg-reg move (2). */
1394 {3, 4, 3}, /* cost of storing integer registers */
1395 4, /* cost of reg,reg fld/fst */
1396 {4, 4, 12}, /* cost of loading fp registers
1397 in SFmode, DFmode and XFmode */
1398 {6, 6, 8}, /* cost of storing fp registers
1399 in SFmode, DFmode and XFmode */
1400 2, /* cost of moving MMX register */
1401 {3, 3}, /* cost of loading MMX registers
1402 in SImode and DImode */
1403 {4, 4}, /* cost of storing MMX registers
1404 in SImode and DImode */
1405 2, /* cost of moving SSE register */
1406 {4, 4, 3}, /* cost of loading SSE registers
1407 in SImode, DImode and TImode */
1408 {4, 4, 5}, /* cost of storing SSE registers
1409 in SImode, DImode and TImode */
1410 3, /* MMX or SSE register to integer */
1411 /* On K8:
1412 MOVD reg64, xmmreg Double FSTORE 4
1413 MOVD reg32, xmmreg Double FSTORE 4
1414 On AMDFAM10:
1415 MOVD reg64, xmmreg Double FADD 3
1416 1/1 1/1
1417 MOVD reg32, xmmreg Double FADD 3
1418 1/1 1/1 */
1419 32, /* size of l1 cache. */
1420 2048, /* size of l2 cache. */
1421 64, /* size of prefetch block */
1422 100, /* number of parallel prefetches */
1423 2, /* Branch cost */
1424 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1425 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1426 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1427 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1428 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1429 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1430 btver2_memcpy,
1431 btver2_memset,
1432 4, /* scalar_stmt_cost. */
1433 2, /* scalar load_cost. */
1434 2, /* scalar_store_cost. */
1435 6, /* vec_stmt_cost. */
1436 0, /* vec_to_scalar_cost. */
1437 2, /* scalar_to_vec_cost. */
1438 2, /* vec_align_load_cost. */
1439 2, /* vec_unalign_load_cost. */
1440 2, /* vec_store_cost. */
1441 2, /* cond_taken_branch_cost. */
1442 1, /* cond_not_taken_branch_cost. */
1445 static stringop_algs pentium4_memcpy[2] = {
1446 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1447 DUMMY_STRINGOP_ALGS};
1448 static stringop_algs pentium4_memset[2] = {
1449 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1450 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1451 DUMMY_STRINGOP_ALGS};
1453 static const
1454 struct processor_costs pentium4_cost = {
1455 COSTS_N_INSNS (1), /* cost of an add instruction */
1456 COSTS_N_INSNS (3), /* cost of a lea instruction */
1457 COSTS_N_INSNS (4), /* variable shift costs */
1458 COSTS_N_INSNS (4), /* constant shift costs */
1459 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1460 COSTS_N_INSNS (15), /* HI */
1461 COSTS_N_INSNS (15), /* SI */
1462 COSTS_N_INSNS (15), /* DI */
1463 COSTS_N_INSNS (15)}, /* other */
1464 0, /* cost of multiply per each bit set */
1465 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1466 COSTS_N_INSNS (56), /* HI */
1467 COSTS_N_INSNS (56), /* SI */
1468 COSTS_N_INSNS (56), /* DI */
1469 COSTS_N_INSNS (56)}, /* other */
1470 COSTS_N_INSNS (1), /* cost of movsx */
1471 COSTS_N_INSNS (1), /* cost of movzx */
1472 16, /* "large" insn */
1473 6, /* MOVE_RATIO */
1474 2, /* cost for loading QImode using movzbl */
1475 {4, 5, 4}, /* cost of loading integer registers
1476 in QImode, HImode and SImode.
1477 Relative to reg-reg move (2). */
1478 {2, 3, 2}, /* cost of storing integer registers */
1479 2, /* cost of reg,reg fld/fst */
1480 {2, 2, 6}, /* cost of loading fp registers
1481 in SFmode, DFmode and XFmode */
1482 {4, 4, 6}, /* cost of storing fp registers
1483 in SFmode, DFmode and XFmode */
1484 2, /* cost of moving MMX register */
1485 {2, 2}, /* cost of loading MMX registers
1486 in SImode and DImode */
1487 {2, 2}, /* cost of storing MMX registers
1488 in SImode and DImode */
1489 12, /* cost of moving SSE register */
1490 {12, 12, 12}, /* cost of loading SSE registers
1491 in SImode, DImode and TImode */
1492 {2, 2, 8}, /* cost of storing SSE registers
1493 in SImode, DImode and TImode */
1494 10, /* MMX or SSE register to integer */
1495 8, /* size of l1 cache. */
1496 256, /* size of l2 cache. */
1497 64, /* size of prefetch block */
1498 6, /* number of parallel prefetches */
1499 2, /* Branch cost */
1500 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1501 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1502 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1503 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1504 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1505 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1506 pentium4_memcpy,
1507 pentium4_memset,
1508 1, /* scalar_stmt_cost. */
1509 1, /* scalar load_cost. */
1510 1, /* scalar_store_cost. */
1511 1, /* vec_stmt_cost. */
1512 1, /* vec_to_scalar_cost. */
1513 1, /* scalar_to_vec_cost. */
1514 1, /* vec_align_load_cost. */
1515 2, /* vec_unalign_load_cost. */
1516 1, /* vec_store_cost. */
1517 3, /* cond_taken_branch_cost. */
1518 1, /* cond_not_taken_branch_cost. */
1521 static stringop_algs nocona_memcpy[2] = {
1522 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1523 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1524 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1526 static stringop_algs nocona_memset[2] = {
1527 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1528 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1529 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1530 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1532 static const
1533 struct processor_costs nocona_cost = {
1534 COSTS_N_INSNS (1), /* cost of an add instruction */
1535 COSTS_N_INSNS (1), /* cost of a lea instruction */
1536 COSTS_N_INSNS (1), /* variable shift costs */
1537 COSTS_N_INSNS (1), /* constant shift costs */
1538 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1539 COSTS_N_INSNS (10), /* HI */
1540 COSTS_N_INSNS (10), /* SI */
1541 COSTS_N_INSNS (10), /* DI */
1542 COSTS_N_INSNS (10)}, /* other */
1543 0, /* cost of multiply per each bit set */
1544 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1545 COSTS_N_INSNS (66), /* HI */
1546 COSTS_N_INSNS (66), /* SI */
1547 COSTS_N_INSNS (66), /* DI */
1548 COSTS_N_INSNS (66)}, /* other */
1549 COSTS_N_INSNS (1), /* cost of movsx */
1550 COSTS_N_INSNS (1), /* cost of movzx */
1551 16, /* "large" insn */
1552 17, /* MOVE_RATIO */
1553 4, /* cost for loading QImode using movzbl */
1554 {4, 4, 4}, /* cost of loading integer registers
1555 in QImode, HImode and SImode.
1556 Relative to reg-reg move (2). */
1557 {4, 4, 4}, /* cost of storing integer registers */
1558 3, /* cost of reg,reg fld/fst */
1559 {12, 12, 12}, /* cost of loading fp registers
1560 in SFmode, DFmode and XFmode */
1561 {4, 4, 4}, /* cost of storing fp registers
1562 in SFmode, DFmode and XFmode */
1563 6, /* cost of moving MMX register */
1564 {12, 12}, /* cost of loading MMX registers
1565 in SImode and DImode */
1566 {12, 12}, /* cost of storing MMX registers
1567 in SImode and DImode */
1568 6, /* cost of moving SSE register */
1569 {12, 12, 12}, /* cost of loading SSE registers
1570 in SImode, DImode and TImode */
1571 {12, 12, 12}, /* cost of storing SSE registers
1572 in SImode, DImode and TImode */
1573 8, /* MMX or SSE register to integer */
1574 8, /* size of l1 cache. */
1575 1024, /* size of l2 cache. */
1576 64, /* size of prefetch block */
1577 8, /* number of parallel prefetches */
1578 1, /* Branch cost */
1579 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1580 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1581 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1582 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1583 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1584 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1585 nocona_memcpy,
1586 nocona_memset,
1587 1, /* scalar_stmt_cost. */
1588 1, /* scalar load_cost. */
1589 1, /* scalar_store_cost. */
1590 1, /* vec_stmt_cost. */
1591 1, /* vec_to_scalar_cost. */
1592 1, /* scalar_to_vec_cost. */
1593 1, /* vec_align_load_cost. */
1594 2, /* vec_unalign_load_cost. */
1595 1, /* vec_store_cost. */
1596 3, /* cond_taken_branch_cost. */
1597 1, /* cond_not_taken_branch_cost. */
1600 static stringop_algs atom_memcpy[2] = {
1601 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1602 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1603 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1604 static stringop_algs atom_memset[2] = {
1605 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1606 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1607 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1608 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1609 static const
1610 struct processor_costs atom_cost = {
1611 COSTS_N_INSNS (1), /* cost of an add instruction */
1612 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1613 COSTS_N_INSNS (1), /* variable shift costs */
1614 COSTS_N_INSNS (1), /* constant shift costs */
1615 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1616 COSTS_N_INSNS (4), /* HI */
1617 COSTS_N_INSNS (3), /* SI */
1618 COSTS_N_INSNS (4), /* DI */
1619 COSTS_N_INSNS (2)}, /* other */
1620 0, /* cost of multiply per each bit set */
1621 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1622 COSTS_N_INSNS (26), /* HI */
1623 COSTS_N_INSNS (42), /* SI */
1624 COSTS_N_INSNS (74), /* DI */
1625 COSTS_N_INSNS (74)}, /* other */
1626 COSTS_N_INSNS (1), /* cost of movsx */
1627 COSTS_N_INSNS (1), /* cost of movzx */
1628 8, /* "large" insn */
1629 17, /* MOVE_RATIO */
1630 4, /* cost for loading QImode using movzbl */
1631 {4, 4, 4}, /* cost of loading integer registers
1632 in QImode, HImode and SImode.
1633 Relative to reg-reg move (2). */
1634 {4, 4, 4}, /* cost of storing integer registers */
1635 4, /* cost of reg,reg fld/fst */
1636 {12, 12, 12}, /* cost of loading fp registers
1637 in SFmode, DFmode and XFmode */
1638 {6, 6, 8}, /* cost of storing fp registers
1639 in SFmode, DFmode and XFmode */
1640 2, /* cost of moving MMX register */
1641 {8, 8}, /* cost of loading MMX registers
1642 in SImode and DImode */
1643 {8, 8}, /* cost of storing MMX registers
1644 in SImode and DImode */
1645 2, /* cost of moving SSE register */
1646 {8, 8, 8}, /* cost of loading SSE registers
1647 in SImode, DImode and TImode */
1648 {8, 8, 8}, /* cost of storing SSE registers
1649 in SImode, DImode and TImode */
1650 5, /* MMX or SSE register to integer */
1651 32, /* size of l1 cache. */
1652 256, /* size of l2 cache. */
1653 64, /* size of prefetch block */
1654 6, /* number of parallel prefetches */
1655 3, /* Branch cost */
1656 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1657 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1658 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1659 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1660 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1661 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1662 atom_memcpy,
1663 atom_memset,
1664 1, /* scalar_stmt_cost. */
1665 1, /* scalar load_cost. */
1666 1, /* scalar_store_cost. */
1667 1, /* vec_stmt_cost. */
1668 1, /* vec_to_scalar_cost. */
1669 1, /* scalar_to_vec_cost. */
1670 1, /* vec_align_load_cost. */
1671 2, /* vec_unalign_load_cost. */
1672 1, /* vec_store_cost. */
1673 3, /* cond_taken_branch_cost. */
1674 1, /* cond_not_taken_branch_cost. */
1677 static stringop_algs slm_memcpy[2] = {
1678 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1679 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1680 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1681 static stringop_algs slm_memset[2] = {
1682 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1683 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1684 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1685 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1686 static const
1687 struct processor_costs slm_cost = {
1688 COSTS_N_INSNS (1), /* cost of an add instruction */
1689 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1690 COSTS_N_INSNS (1), /* variable shift costs */
1691 COSTS_N_INSNS (1), /* constant shift costs */
1692 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1693 COSTS_N_INSNS (3), /* HI */
1694 COSTS_N_INSNS (3), /* SI */
1695 COSTS_N_INSNS (4), /* DI */
1696 COSTS_N_INSNS (2)}, /* other */
1697 0, /* cost of multiply per each bit set */
1698 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1699 COSTS_N_INSNS (26), /* HI */
1700 COSTS_N_INSNS (42), /* SI */
1701 COSTS_N_INSNS (74), /* DI */
1702 COSTS_N_INSNS (74)}, /* other */
1703 COSTS_N_INSNS (1), /* cost of movsx */
1704 COSTS_N_INSNS (1), /* cost of movzx */
1705 8, /* "large" insn */
1706 17, /* MOVE_RATIO */
1707 4, /* cost for loading QImode using movzbl */
1708 {4, 4, 4}, /* cost of loading integer registers
1709 in QImode, HImode and SImode.
1710 Relative to reg-reg move (2). */
1711 {4, 4, 4}, /* cost of storing integer registers */
1712 4, /* cost of reg,reg fld/fst */
1713 {12, 12, 12}, /* cost of loading fp registers
1714 in SFmode, DFmode and XFmode */
1715 {6, 6, 8}, /* cost of storing fp registers
1716 in SFmode, DFmode and XFmode */
1717 2, /* cost of moving MMX register */
1718 {8, 8}, /* cost of loading MMX registers
1719 in SImode and DImode */
1720 {8, 8}, /* cost of storing MMX registers
1721 in SImode and DImode */
1722 2, /* cost of moving SSE register */
1723 {8, 8, 8}, /* cost of loading SSE registers
1724 in SImode, DImode and TImode */
1725 {8, 8, 8}, /* cost of storing SSE registers
1726 in SImode, DImode and TImode */
1727 5, /* MMX or SSE register to integer */
1728 32, /* size of l1 cache. */
1729 256, /* size of l2 cache. */
1730 64, /* size of prefetch block */
1731 6, /* number of parallel prefetches */
1732 3, /* Branch cost */
1733 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1734 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1735 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1736 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1737 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1738 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1739 slm_memcpy,
1740 slm_memset,
1741 1, /* scalar_stmt_cost. */
1742 1, /* scalar load_cost. */
1743 1, /* scalar_store_cost. */
1744 1, /* vec_stmt_cost. */
1745 4, /* vec_to_scalar_cost. */
1746 1, /* scalar_to_vec_cost. */
1747 1, /* vec_align_load_cost. */
1748 2, /* vec_unalign_load_cost. */
1749 1, /* vec_store_cost. */
1750 3, /* cond_taken_branch_cost. */
1751 1, /* cond_not_taken_branch_cost. */
1754 static stringop_algs intel_memcpy[2] = {
1755 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1756 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1757 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1758 static stringop_algs intel_memset[2] = {
1759 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1760 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1761 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1762 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1763 static const
1764 struct processor_costs intel_cost = {
1765 COSTS_N_INSNS (1), /* cost of an add instruction */
1766 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1767 COSTS_N_INSNS (1), /* variable shift costs */
1768 COSTS_N_INSNS (1), /* constant shift costs */
1769 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1770 COSTS_N_INSNS (3), /* HI */
1771 COSTS_N_INSNS (3), /* SI */
1772 COSTS_N_INSNS (4), /* DI */
1773 COSTS_N_INSNS (2)}, /* other */
1774 0, /* cost of multiply per each bit set */
1775 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1776 COSTS_N_INSNS (26), /* HI */
1777 COSTS_N_INSNS (42), /* SI */
1778 COSTS_N_INSNS (74), /* DI */
1779 COSTS_N_INSNS (74)}, /* other */
1780 COSTS_N_INSNS (1), /* cost of movsx */
1781 COSTS_N_INSNS (1), /* cost of movzx */
1782 8, /* "large" insn */
1783 17, /* MOVE_RATIO */
1784 4, /* cost for loading QImode using movzbl */
1785 {4, 4, 4}, /* cost of loading integer registers
1786 in QImode, HImode and SImode.
1787 Relative to reg-reg move (2). */
1788 {4, 4, 4}, /* cost of storing integer registers */
1789 4, /* cost of reg,reg fld/fst */
1790 {12, 12, 12}, /* cost of loading fp registers
1791 in SFmode, DFmode and XFmode */
1792 {6, 6, 8}, /* cost of storing fp registers
1793 in SFmode, DFmode and XFmode */
1794 2, /* cost of moving MMX register */
1795 {8, 8}, /* cost of loading MMX registers
1796 in SImode and DImode */
1797 {8, 8}, /* cost of storing MMX registers
1798 in SImode and DImode */
1799 2, /* cost of moving SSE register */
1800 {8, 8, 8}, /* cost of loading SSE registers
1801 in SImode, DImode and TImode */
1802 {8, 8, 8}, /* cost of storing SSE registers
1803 in SImode, DImode and TImode */
1804 5, /* MMX or SSE register to integer */
1805 32, /* size of l1 cache. */
1806 256, /* size of l2 cache. */
1807 64, /* size of prefetch block */
1808 6, /* number of parallel prefetches */
1809 3, /* Branch cost */
1810 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1811 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1812 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1813 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1814 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1815 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1816 intel_memcpy,
1817 intel_memset,
1818 1, /* scalar_stmt_cost. */
1819 1, /* scalar load_cost. */
1820 1, /* scalar_store_cost. */
1821 1, /* vec_stmt_cost. */
1822 4, /* vec_to_scalar_cost. */
1823 1, /* scalar_to_vec_cost. */
1824 1, /* vec_align_load_cost. */
1825 2, /* vec_unalign_load_cost. */
1826 1, /* vec_store_cost. */
1827 3, /* cond_taken_branch_cost. */
1828 1, /* cond_not_taken_branch_cost. */
1831 /* Generic should produce code tuned for Core-i7 (and newer chips)
1832 and btver1 (and newer chips). */
1834 static stringop_algs generic_memcpy[2] = {
1835 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1836 {-1, libcall, false}}},
1837 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1838 {-1, libcall, false}}}};
1839 static stringop_algs generic_memset[2] = {
1840 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1841 {-1, libcall, false}}},
1842 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1843 {-1, libcall, false}}}};
1844 static const
1845 struct processor_costs generic_cost = {
1846 COSTS_N_INSNS (1), /* cost of an add instruction */
1847 /* On all chips taken into consideration lea is 2 cycles and more. With
1848 this cost however our current implementation of synth_mult results in
1849 use of unnecessary temporary registers causing regression on several
1850 SPECfp benchmarks. */
1851 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1852 COSTS_N_INSNS (1), /* variable shift costs */
1853 COSTS_N_INSNS (1), /* constant shift costs */
1854 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1855 COSTS_N_INSNS (4), /* HI */
1856 COSTS_N_INSNS (3), /* SI */
1857 COSTS_N_INSNS (4), /* DI */
1858 COSTS_N_INSNS (2)}, /* other */
1859 0, /* cost of multiply per each bit set */
1860 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1861 COSTS_N_INSNS (26), /* HI */
1862 COSTS_N_INSNS (42), /* SI */
1863 COSTS_N_INSNS (74), /* DI */
1864 COSTS_N_INSNS (74)}, /* other */
1865 COSTS_N_INSNS (1), /* cost of movsx */
1866 COSTS_N_INSNS (1), /* cost of movzx */
1867 8, /* "large" insn */
1868 17, /* MOVE_RATIO */
1869 4, /* cost for loading QImode using movzbl */
1870 {4, 4, 4}, /* cost of loading integer registers
1871 in QImode, HImode and SImode.
1872 Relative to reg-reg move (2). */
1873 {4, 4, 4}, /* cost of storing integer registers */
1874 4, /* cost of reg,reg fld/fst */
1875 {12, 12, 12}, /* cost of loading fp registers
1876 in SFmode, DFmode and XFmode */
1877 {6, 6, 8}, /* cost of storing fp registers
1878 in SFmode, DFmode and XFmode */
1879 2, /* cost of moving MMX register */
1880 {8, 8}, /* cost of loading MMX registers
1881 in SImode and DImode */
1882 {8, 8}, /* cost of storing MMX registers
1883 in SImode and DImode */
1884 2, /* cost of moving SSE register */
1885 {8, 8, 8}, /* cost of loading SSE registers
1886 in SImode, DImode and TImode */
1887 {8, 8, 8}, /* cost of storing SSE registers
1888 in SImode, DImode and TImode */
1889 5, /* MMX or SSE register to integer */
1890 32, /* size of l1 cache. */
1891 512, /* size of l2 cache. */
1892 64, /* size of prefetch block */
1893 6, /* number of parallel prefetches */
1894 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1895 value is increased to perhaps more appropriate value of 5. */
1896 3, /* Branch cost */
1897 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1898 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1899 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1900 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1901 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1902 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1903 generic_memcpy,
1904 generic_memset,
1905 1, /* scalar_stmt_cost. */
1906 1, /* scalar load_cost. */
1907 1, /* scalar_store_cost. */
1908 1, /* vec_stmt_cost. */
1909 1, /* vec_to_scalar_cost. */
1910 1, /* scalar_to_vec_cost. */
1911 1, /* vec_align_load_cost. */
1912 2, /* vec_unalign_load_cost. */
1913 1, /* vec_store_cost. */
1914 3, /* cond_taken_branch_cost. */
1915 1, /* cond_not_taken_branch_cost. */
1918 /* core_cost should produce code tuned for Core familly of CPUs. */
1919 static stringop_algs core_memcpy[2] = {
1920 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1921 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1922 {-1, libcall, false}}}};
1923 static stringop_algs core_memset[2] = {
1924 {libcall, {{6, loop_1_byte, true},
1925 {24, loop, true},
1926 {8192, rep_prefix_4_byte, true},
1927 {-1, libcall, false}}},
1928 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1929 {-1, libcall, false}}}};
1931 static const
1932 struct processor_costs core_cost = {
1933 COSTS_N_INSNS (1), /* cost of an add instruction */
1934 /* On all chips taken into consideration lea is 2 cycles and more. With
1935 this cost however our current implementation of synth_mult results in
1936 use of unnecessary temporary registers causing regression on several
1937 SPECfp benchmarks. */
1938 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1939 COSTS_N_INSNS (1), /* variable shift costs */
1940 COSTS_N_INSNS (1), /* constant shift costs */
1941 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1942 COSTS_N_INSNS (4), /* HI */
1943 COSTS_N_INSNS (3), /* SI */
1944 COSTS_N_INSNS (4), /* DI */
1945 COSTS_N_INSNS (2)}, /* other */
1946 0, /* cost of multiply per each bit set */
1947 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1948 COSTS_N_INSNS (26), /* HI */
1949 COSTS_N_INSNS (42), /* SI */
1950 COSTS_N_INSNS (74), /* DI */
1951 COSTS_N_INSNS (74)}, /* other */
1952 COSTS_N_INSNS (1), /* cost of movsx */
1953 COSTS_N_INSNS (1), /* cost of movzx */
1954 8, /* "large" insn */
1955 17, /* MOVE_RATIO */
1956 4, /* cost for loading QImode using movzbl */
1957 {4, 4, 4}, /* cost of loading integer registers
1958 in QImode, HImode and SImode.
1959 Relative to reg-reg move (2). */
1960 {4, 4, 4}, /* cost of storing integer registers */
1961 4, /* cost of reg,reg fld/fst */
1962 {12, 12, 12}, /* cost of loading fp registers
1963 in SFmode, DFmode and XFmode */
1964 {6, 6, 8}, /* cost of storing fp registers
1965 in SFmode, DFmode and XFmode */
1966 2, /* cost of moving MMX register */
1967 {8, 8}, /* cost of loading MMX registers
1968 in SImode and DImode */
1969 {8, 8}, /* cost of storing MMX registers
1970 in SImode and DImode */
1971 2, /* cost of moving SSE register */
1972 {8, 8, 8}, /* cost of loading SSE registers
1973 in SImode, DImode and TImode */
1974 {8, 8, 8}, /* cost of storing SSE registers
1975 in SImode, DImode and TImode */
1976 5, /* MMX or SSE register to integer */
1977 64, /* size of l1 cache. */
1978 512, /* size of l2 cache. */
1979 64, /* size of prefetch block */
1980 6, /* number of parallel prefetches */
1981 /* FIXME perhaps more appropriate value is 5. */
1982 3, /* Branch cost */
1983 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1984 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1985 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1986 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1987 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1988 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1989 core_memcpy,
1990 core_memset,
1991 1, /* scalar_stmt_cost. */
1992 1, /* scalar load_cost. */
1993 1, /* scalar_store_cost. */
1994 1, /* vec_stmt_cost. */
1995 1, /* vec_to_scalar_cost. */
1996 1, /* scalar_to_vec_cost. */
1997 1, /* vec_align_load_cost. */
1998 2, /* vec_unalign_load_cost. */
1999 1, /* vec_store_cost. */
2000 3, /* cond_taken_branch_cost. */
2001 1, /* cond_not_taken_branch_cost. */
2005 /* Set by -mtune. */
2006 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2008 /* Set by -mtune or -Os. */
2009 const struct processor_costs *ix86_cost = &pentium_cost;
2011 /* Processor feature/optimization bitmasks. */
2012 #define m_386 (1<<PROCESSOR_I386)
2013 #define m_486 (1<<PROCESSOR_I486)
2014 #define m_PENT (1<<PROCESSOR_PENTIUM)
2015 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2016 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2017 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2018 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2019 #define m_CORE2 (1<<PROCESSOR_CORE2)
2020 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2021 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2022 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2023 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2024 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2025 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2026 #define m_INTEL (1<<PROCESSOR_INTEL)
2028 #define m_GEODE (1<<PROCESSOR_GEODE)
2029 #define m_K6 (1<<PROCESSOR_K6)
2030 #define m_K6_GEODE (m_K6 | m_GEODE)
2031 #define m_K8 (1<<PROCESSOR_K8)
2032 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2033 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2034 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2035 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2036 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2037 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2038 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2039 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2040 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2041 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2042 #define m_BTVER (m_BTVER1 | m_BTVER2)
2043 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2045 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2047 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2048 #undef DEF_TUNE
2049 #define DEF_TUNE(tune, name, selector) name,
2050 #include "x86-tune.def"
2051 #undef DEF_TUNE
2054 /* Feature tests against the various tunings. */
2055 unsigned char ix86_tune_features[X86_TUNE_LAST];
2057 /* Feature tests against the various tunings used to create ix86_tune_features
2058 based on the processor mask. */
2059 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2060 #undef DEF_TUNE
2061 #define DEF_TUNE(tune, name, selector) selector,
2062 #include "x86-tune.def"
2063 #undef DEF_TUNE
2066 /* Feature tests against the various architecture variations. */
2067 unsigned char ix86_arch_features[X86_ARCH_LAST];
2069 /* Feature tests against the various architecture variations, used to create
2070 ix86_arch_features based on the processor mask. */
2071 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2072 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2073 ~(m_386 | m_486 | m_PENT | m_K6),
2075 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2076 ~m_386,
2078 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2079 ~(m_386 | m_486),
2081 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2082 ~m_386,
2084 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2085 ~m_386,
2088 /* In case the average insn count for single function invocation is
2089 lower than this constant, emit fast (but longer) prologue and
2090 epilogue code. */
2091 #define FAST_PROLOGUE_INSN_COUNT 20
2093 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2094 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2095 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2096 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2098 /* Array of the smallest class containing reg number REGNO, indexed by
2099 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2101 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2103 /* ax, dx, cx, bx */
2104 AREG, DREG, CREG, BREG,
2105 /* si, di, bp, sp */
2106 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2107 /* FP registers */
2108 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2109 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2110 /* arg pointer */
2111 NON_Q_REGS,
2112 /* flags, fpsr, fpcr, frame */
2113 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2114 /* SSE registers */
2115 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2116 SSE_REGS, SSE_REGS,
2117 /* MMX registers */
2118 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2119 MMX_REGS, MMX_REGS,
2120 /* REX registers */
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2123 /* SSE REX registers */
2124 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2125 SSE_REGS, SSE_REGS,
2126 /* AVX-512 SSE registers */
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2131 /* Mask registers. */
2132 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2136 /* The "default" register map used in 32bit mode. */
2138 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2140 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2141 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2142 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2143 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2144 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2148 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2149 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2152 /* The "default" register map used in 64bit mode. */
2154 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2156 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2157 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2158 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2159 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2160 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2161 8,9,10,11,12,13,14,15, /* extended integer registers */
2162 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2163 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2164 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2165 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2168 /* Define the register numbers to be used in Dwarf debugging information.
2169 The SVR4 reference port C compiler uses the following register numbers
2170 in its Dwarf output code:
2171 0 for %eax (gcc regno = 0)
2172 1 for %ecx (gcc regno = 2)
2173 2 for %edx (gcc regno = 1)
2174 3 for %ebx (gcc regno = 3)
2175 4 for %esp (gcc regno = 7)
2176 5 for %ebp (gcc regno = 6)
2177 6 for %esi (gcc regno = 4)
2178 7 for %edi (gcc regno = 5)
2179 The following three DWARF register numbers are never generated by
2180 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2181 believes these numbers have these meanings.
2182 8 for %eip (no gcc equivalent)
2183 9 for %eflags (gcc regno = 17)
2184 10 for %trapno (no gcc equivalent)
2185 It is not at all clear how we should number the FP stack registers
2186 for the x86 architecture. If the version of SDB on x86/svr4 were
2187 a bit less brain dead with respect to floating-point then we would
2188 have a precedent to follow with respect to DWARF register numbers
2189 for x86 FP registers, but the SDB on x86/svr4 is so completely
2190 broken with respect to FP registers that it is hardly worth thinking
2191 of it as something to strive for compatibility with.
2192 The version of x86/svr4 SDB I have at the moment does (partially)
2193 seem to believe that DWARF register number 11 is associated with
2194 the x86 register %st(0), but that's about all. Higher DWARF
2195 register numbers don't seem to be associated with anything in
2196 particular, and even for DWARF regno 11, SDB only seems to under-
2197 stand that it should say that a variable lives in %st(0) (when
2198 asked via an `=' command) if we said it was in DWARF regno 11,
2199 but SDB still prints garbage when asked for the value of the
2200 variable in question (via a `/' command).
2201 (Also note that the labels SDB prints for various FP stack regs
2202 when doing an `x' command are all wrong.)
2203 Note that these problems generally don't affect the native SVR4
2204 C compiler because it doesn't allow the use of -O with -g and
2205 because when it is *not* optimizing, it allocates a memory
2206 location for each floating-point variable, and the memory
2207 location is what gets described in the DWARF AT_location
2208 attribute for the variable in question.
2209 Regardless of the severe mental illness of the x86/svr4 SDB, we
2210 do something sensible here and we use the following DWARF
2211 register numbers. Note that these are all stack-top-relative
2212 numbers.
2213 11 for %st(0) (gcc regno = 8)
2214 12 for %st(1) (gcc regno = 9)
2215 13 for %st(2) (gcc regno = 10)
2216 14 for %st(3) (gcc regno = 11)
2217 15 for %st(4) (gcc regno = 12)
2218 16 for %st(5) (gcc regno = 13)
2219 17 for %st(6) (gcc regno = 14)
2220 18 for %st(7) (gcc regno = 15)
2222 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2224 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2225 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2226 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2227 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2228 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2232 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2233 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2236 /* Define parameter passing and return registers. */
2238 static int const x86_64_int_parameter_registers[6] =
2240 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2243 static int const x86_64_ms_abi_int_parameter_registers[4] =
2245 CX_REG, DX_REG, R8_REG, R9_REG
2248 static int const x86_64_int_return_registers[4] =
2250 AX_REG, DX_REG, DI_REG, SI_REG
2253 /* Additional registers that are clobbered by SYSV calls. */
2255 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2257 SI_REG, DI_REG,
2258 XMM6_REG, XMM7_REG,
2259 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2260 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2263 /* Define the structure for the machine field in struct function. */
2265 struct GTY(()) stack_local_entry {
2266 unsigned short mode;
2267 unsigned short n;
2268 rtx rtl;
2269 struct stack_local_entry *next;
2272 /* Structure describing stack frame layout.
2273 Stack grows downward:
2275 [arguments]
2276 <- ARG_POINTER
2277 saved pc
2279 saved static chain if ix86_static_chain_on_stack
2281 saved frame pointer if frame_pointer_needed
2282 <- HARD_FRAME_POINTER
2283 [saved regs]
2284 <- regs_save_offset
2285 [padding0]
2287 [saved SSE regs]
2288 <- sse_regs_save_offset
2289 [padding1] |
2290 | <- FRAME_POINTER
2291 [va_arg registers] |
2293 [frame] |
2295 [padding2] | = to_allocate
2296 <- STACK_POINTER
2298 struct ix86_frame
2300 int nsseregs;
2301 int nregs;
2302 int va_arg_size;
2303 int red_zone_size;
2304 int outgoing_arguments_size;
2306 /* The offsets relative to ARG_POINTER. */
2307 HOST_WIDE_INT frame_pointer_offset;
2308 HOST_WIDE_INT hard_frame_pointer_offset;
2309 HOST_WIDE_INT stack_pointer_offset;
2310 HOST_WIDE_INT hfp_save_offset;
2311 HOST_WIDE_INT reg_save_offset;
2312 HOST_WIDE_INT sse_reg_save_offset;
2314 /* When save_regs_using_mov is set, emit prologue using
2315 move instead of push instructions. */
2316 bool save_regs_using_mov;
2319 /* Which cpu are we scheduling for. */
2320 enum attr_cpu ix86_schedule;
2322 /* Which cpu are we optimizing for. */
2323 enum processor_type ix86_tune;
2325 /* Which instruction set architecture to use. */
2326 enum processor_type ix86_arch;
2328 /* True if processor has SSE prefetch instruction. */
2329 unsigned char x86_prefetch_sse;
2331 /* -mstackrealign option */
2332 static const char ix86_force_align_arg_pointer_string[]
2333 = "force_align_arg_pointer";
2335 static rtx (*ix86_gen_leave) (void);
2336 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2339 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2340 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2343 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2346 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2348 /* Preferred alignment for stack boundary in bits. */
2349 unsigned int ix86_preferred_stack_boundary;
2351 /* Alignment for incoming stack boundary in bits specified at
2352 command line. */
2353 static unsigned int ix86_user_incoming_stack_boundary;
2355 /* Default alignment for incoming stack boundary in bits. */
2356 static unsigned int ix86_default_incoming_stack_boundary;
2358 /* Alignment for incoming stack boundary in bits. */
2359 unsigned int ix86_incoming_stack_boundary;
2361 /* Calling abi specific va_list type nodes. */
2362 static GTY(()) tree sysv_va_list_type_node;
2363 static GTY(()) tree ms_va_list_type_node;
2365 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2366 char internal_label_prefix[16];
2367 int internal_label_prefix_len;
2369 /* Fence to use after loop using movnt. */
2370 tree x86_mfence;
2372 /* Register class used for passing given 64bit part of the argument.
2373 These represent classes as documented by the PS ABI, with the exception
2374 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2375 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2377 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2378 whenever possible (upper half does contain padding). */
2379 enum x86_64_reg_class
2381 X86_64_NO_CLASS,
2382 X86_64_INTEGER_CLASS,
2383 X86_64_INTEGERSI_CLASS,
2384 X86_64_SSE_CLASS,
2385 X86_64_SSESF_CLASS,
2386 X86_64_SSEDF_CLASS,
2387 X86_64_SSEUP_CLASS,
2388 X86_64_X87_CLASS,
2389 X86_64_X87UP_CLASS,
2390 X86_64_COMPLEX_X87_CLASS,
2391 X86_64_MEMORY_CLASS
2394 #define MAX_CLASSES 8
2396 /* Table of constants used by fldpi, fldln2, etc.... */
2397 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2398 static bool ext_80387_constants_init = 0;
2401 static struct machine_function * ix86_init_machine_status (void);
2402 static rtx ix86_function_value (const_tree, const_tree, bool);
2403 static bool ix86_function_value_regno_p (const unsigned int);
2404 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2405 const_tree);
2406 static rtx ix86_static_chain (const_tree, bool);
2407 static int ix86_function_regparm (const_tree, const_tree);
2408 static void ix86_compute_frame_layout (struct ix86_frame *);
2409 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2410 rtx, rtx, int);
2411 static void ix86_add_new_builtins (HOST_WIDE_INT);
2412 static tree ix86_canonical_va_list_type (tree);
2413 static void predict_jump (int);
2414 static unsigned int split_stack_prologue_scratch_regno (void);
2415 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2417 enum ix86_function_specific_strings
2419 IX86_FUNCTION_SPECIFIC_ARCH,
2420 IX86_FUNCTION_SPECIFIC_TUNE,
2421 IX86_FUNCTION_SPECIFIC_MAX
2424 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2425 const char *, enum fpmath_unit, bool);
2426 static void ix86_function_specific_save (struct cl_target_option *,
2427 struct gcc_options *opts);
2428 static void ix86_function_specific_restore (struct gcc_options *opts,
2429 struct cl_target_option *);
2430 static void ix86_function_specific_print (FILE *, int,
2431 struct cl_target_option *);
2432 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2433 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2434 struct gcc_options *,
2435 struct gcc_options *,
2436 struct gcc_options *);
2437 static bool ix86_can_inline_p (tree, tree);
2438 static void ix86_set_current_function (tree);
2439 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2441 static enum calling_abi ix86_function_abi (const_tree);
2444 #ifndef SUBTARGET32_DEFAULT_CPU
2445 #define SUBTARGET32_DEFAULT_CPU "i386"
2446 #endif
2448 /* Whether -mtune= or -march= were specified */
2449 static int ix86_tune_defaulted;
2450 static int ix86_arch_specified;
2452 /* Vectorization library interface and handlers. */
2453 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2456 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2458 /* Processor target table, indexed by processor number */
2459 struct ptt
2461 const char *const name; /* processor name */
2462 const struct processor_costs *cost; /* Processor costs */
2463 const int align_loop; /* Default alignments. */
2464 const int align_loop_max_skip;
2465 const int align_jump;
2466 const int align_jump_max_skip;
2467 const int align_func;
2470 /* This table must be in sync with enum processor_type in i386.h. */
2471 static const struct ptt processor_target_table[PROCESSOR_max] =
2473 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2474 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2475 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2476 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2477 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2478 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2479 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2480 {"core2", &core_cost, 16, 10, 16, 10, 16},
2481 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2482 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2483 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2484 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2485 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2486 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2487 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2488 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2489 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2490 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2491 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2492 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2493 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2494 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2495 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2496 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2497 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2500 static unsigned int
2501 rest_of_handle_insert_vzeroupper (void)
2503 int i;
2505 /* vzeroupper instructions are inserted immediately after reload to
2506 account for possible spills from 256bit registers. The pass
2507 reuses mode switching infrastructure by re-running mode insertion
2508 pass, so disable entities that have already been processed. */
2509 for (i = 0; i < MAX_386_ENTITIES; i++)
2510 ix86_optimize_mode_switching[i] = 0;
2512 ix86_optimize_mode_switching[AVX_U128] = 1;
2514 /* Call optimize_mode_switching. */
2515 g->get_passes ()->execute_pass_mode_switching ();
2516 return 0;
2519 namespace {
2521 const pass_data pass_data_insert_vzeroupper =
2523 RTL_PASS, /* type */
2524 "vzeroupper", /* name */
2525 OPTGROUP_NONE, /* optinfo_flags */
2526 true, /* has_execute */
2527 TV_NONE, /* tv_id */
2528 0, /* properties_required */
2529 0, /* properties_provided */
2530 0, /* properties_destroyed */
2531 0, /* todo_flags_start */
2532 TODO_df_finish, /* todo_flags_finish */
2535 class pass_insert_vzeroupper : public rtl_opt_pass
2537 public:
2538 pass_insert_vzeroupper(gcc::context *ctxt)
2539 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2542 /* opt_pass methods: */
2543 virtual bool gate (function *)
2545 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2548 virtual unsigned int execute (function *)
2550 return rest_of_handle_insert_vzeroupper ();
2553 }; // class pass_insert_vzeroupper
2555 } // anon namespace
2557 rtl_opt_pass *
2558 make_pass_insert_vzeroupper (gcc::context *ctxt)
2560 return new pass_insert_vzeroupper (ctxt);
2563 /* Return true if a red-zone is in use. */
2565 static inline bool
2566 ix86_using_red_zone (void)
2568 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2571 /* Return a string that documents the current -m options. The caller is
2572 responsible for freeing the string. */
2574 static char *
2575 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2576 const char *tune, enum fpmath_unit fpmath,
2577 bool add_nl_p)
2579 struct ix86_target_opts
2581 const char *option; /* option string */
2582 HOST_WIDE_INT mask; /* isa mask options */
2585 /* This table is ordered so that options like -msse4.2 that imply
2586 preceding options while match those first. */
2587 static struct ix86_target_opts isa_opts[] =
2589 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2590 { "-mfma", OPTION_MASK_ISA_FMA },
2591 { "-mxop", OPTION_MASK_ISA_XOP },
2592 { "-mlwp", OPTION_MASK_ISA_LWP },
2593 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2594 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2595 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2596 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2597 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2598 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2599 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2600 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2601 { "-msse3", OPTION_MASK_ISA_SSE3 },
2602 { "-msse2", OPTION_MASK_ISA_SSE2 },
2603 { "-msse", OPTION_MASK_ISA_SSE },
2604 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2605 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2606 { "-mmmx", OPTION_MASK_ISA_MMX },
2607 { "-mabm", OPTION_MASK_ISA_ABM },
2608 { "-mbmi", OPTION_MASK_ISA_BMI },
2609 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2610 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2611 { "-mhle", OPTION_MASK_ISA_HLE },
2612 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2613 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2614 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2615 { "-madx", OPTION_MASK_ISA_ADX },
2616 { "-mtbm", OPTION_MASK_ISA_TBM },
2617 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2618 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2619 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2620 { "-maes", OPTION_MASK_ISA_AES },
2621 { "-msha", OPTION_MASK_ISA_SHA },
2622 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2623 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2624 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2625 { "-mf16c", OPTION_MASK_ISA_F16C },
2626 { "-mrtm", OPTION_MASK_ISA_RTM },
2627 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2628 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2629 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2630 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2631 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2632 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2635 /* Flag options. */
2636 static struct ix86_target_opts flag_opts[] =
2638 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2639 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2640 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2641 { "-m80387", MASK_80387 },
2642 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2643 { "-malign-double", MASK_ALIGN_DOUBLE },
2644 { "-mcld", MASK_CLD },
2645 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2646 { "-mieee-fp", MASK_IEEE_FP },
2647 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2648 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2649 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2650 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2651 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2652 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2653 { "-mno-red-zone", MASK_NO_RED_ZONE },
2654 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2655 { "-mrecip", MASK_RECIP },
2656 { "-mrtd", MASK_RTD },
2657 { "-msseregparm", MASK_SSEREGPARM },
2658 { "-mstack-arg-probe", MASK_STACK_PROBE },
2659 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2660 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2661 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2662 { "-mvzeroupper", MASK_VZEROUPPER },
2663 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2664 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2665 { "-mprefer-avx128", MASK_PREFER_AVX128},
2668 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2670 char isa_other[40];
2671 char target_other[40];
2672 unsigned num = 0;
2673 unsigned i, j;
2674 char *ret;
2675 char *ptr;
2676 size_t len;
2677 size_t line_len;
2678 size_t sep_len;
2679 const char *abi;
2681 memset (opts, '\0', sizeof (opts));
2683 /* Add -march= option. */
2684 if (arch)
2686 opts[num][0] = "-march=";
2687 opts[num++][1] = arch;
2690 /* Add -mtune= option. */
2691 if (tune)
2693 opts[num][0] = "-mtune=";
2694 opts[num++][1] = tune;
2697 /* Add -m32/-m64/-mx32. */
2698 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2700 if ((isa & OPTION_MASK_ABI_64) != 0)
2701 abi = "-m64";
2702 else
2703 abi = "-mx32";
2704 isa &= ~ (OPTION_MASK_ISA_64BIT
2705 | OPTION_MASK_ABI_64
2706 | OPTION_MASK_ABI_X32);
2708 else
2709 abi = "-m32";
2710 opts[num++][0] = abi;
2712 /* Pick out the options in isa options. */
2713 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2715 if ((isa & isa_opts[i].mask) != 0)
2717 opts[num++][0] = isa_opts[i].option;
2718 isa &= ~ isa_opts[i].mask;
2722 if (isa && add_nl_p)
2724 opts[num++][0] = isa_other;
2725 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2726 isa);
2729 /* Add flag options. */
2730 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2732 if ((flags & flag_opts[i].mask) != 0)
2734 opts[num++][0] = flag_opts[i].option;
2735 flags &= ~ flag_opts[i].mask;
2739 if (flags && add_nl_p)
2741 opts[num++][0] = target_other;
2742 sprintf (target_other, "(other flags: %#x)", flags);
2745 /* Add -fpmath= option. */
2746 if (fpmath)
2748 opts[num][0] = "-mfpmath=";
2749 switch ((int) fpmath)
2751 case FPMATH_387:
2752 opts[num++][1] = "387";
2753 break;
2755 case FPMATH_SSE:
2756 opts[num++][1] = "sse";
2757 break;
2759 case FPMATH_387 | FPMATH_SSE:
2760 opts[num++][1] = "sse+387";
2761 break;
2763 default:
2764 gcc_unreachable ();
2768 /* Any options? */
2769 if (num == 0)
2770 return NULL;
2772 gcc_assert (num < ARRAY_SIZE (opts));
2774 /* Size the string. */
2775 len = 0;
2776 sep_len = (add_nl_p) ? 3 : 1;
2777 for (i = 0; i < num; i++)
2779 len += sep_len;
2780 for (j = 0; j < 2; j++)
2781 if (opts[i][j])
2782 len += strlen (opts[i][j]);
2785 /* Build the string. */
2786 ret = ptr = (char *) xmalloc (len);
2787 line_len = 0;
2789 for (i = 0; i < num; i++)
2791 size_t len2[2];
2793 for (j = 0; j < 2; j++)
2794 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2796 if (i != 0)
2798 *ptr++ = ' ';
2799 line_len++;
2801 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2803 *ptr++ = '\\';
2804 *ptr++ = '\n';
2805 line_len = 0;
2809 for (j = 0; j < 2; j++)
2810 if (opts[i][j])
2812 memcpy (ptr, opts[i][j], len2[j]);
2813 ptr += len2[j];
2814 line_len += len2[j];
2818 *ptr = '\0';
2819 gcc_assert (ret + len >= ptr);
2821 return ret;
2824 /* Return true, if profiling code should be emitted before
2825 prologue. Otherwise it returns false.
2826 Note: For x86 with "hotfix" it is sorried. */
2827 static bool
2828 ix86_profile_before_prologue (void)
2830 return flag_fentry != 0;
2833 /* Function that is callable from the debugger to print the current
2834 options. */
2835 void ATTRIBUTE_UNUSED
2836 ix86_debug_options (void)
2838 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2839 ix86_arch_string, ix86_tune_string,
2840 ix86_fpmath, true);
2842 if (opts)
2844 fprintf (stderr, "%s\n\n", opts);
2845 free (opts);
2847 else
2848 fputs ("<no options>\n\n", stderr);
2850 return;
2853 static const char *stringop_alg_names[] = {
2854 #define DEF_ENUM
2855 #define DEF_ALG(alg, name) #name,
2856 #include "stringop.def"
2857 #undef DEF_ENUM
2858 #undef DEF_ALG
2861 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2862 The string is of the following form (or comma separated list of it):
2864 strategy_alg:max_size:[align|noalign]
2866 where the full size range for the strategy is either [0, max_size] or
2867 [min_size, max_size], in which min_size is the max_size + 1 of the
2868 preceding range. The last size range must have max_size == -1.
2870 Examples:
2873 -mmemcpy-strategy=libcall:-1:noalign
2875 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2879 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2881 This is to tell the compiler to use the following strategy for memset
2882 1) when the expected size is between [1, 16], use rep_8byte strategy;
2883 2) when the size is between [17, 2048], use vector_loop;
2884 3) when the size is > 2048, use libcall. */
2886 struct stringop_size_range
2888 int max;
2889 stringop_alg alg;
2890 bool noalign;
2893 static void
2894 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2896 const struct stringop_algs *default_algs;
2897 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2898 char *curr_range_str, *next_range_str;
2899 int i = 0, n = 0;
2901 if (is_memset)
2902 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2903 else
2904 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2906 curr_range_str = strategy_str;
2910 int maxs;
2911 char alg_name[128];
2912 char align[16];
2913 next_range_str = strchr (curr_range_str, ',');
2914 if (next_range_str)
2915 *next_range_str++ = '\0';
2917 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2918 alg_name, &maxs, align))
2920 error ("wrong arg %s to option %s", curr_range_str,
2921 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2922 return;
2925 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2927 error ("size ranges of option %s should be increasing",
2928 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2929 return;
2932 for (i = 0; i < last_alg; i++)
2933 if (!strcmp (alg_name, stringop_alg_names[i]))
2934 break;
2936 if (i == last_alg)
2938 error ("wrong stringop strategy name %s specified for option %s",
2939 alg_name,
2940 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2941 return;
2944 input_ranges[n].max = maxs;
2945 input_ranges[n].alg = (stringop_alg) i;
2946 if (!strcmp (align, "align"))
2947 input_ranges[n].noalign = false;
2948 else if (!strcmp (align, "noalign"))
2949 input_ranges[n].noalign = true;
2950 else
2952 error ("unknown alignment %s specified for option %s",
2953 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2954 return;
2956 n++;
2957 curr_range_str = next_range_str;
2959 while (curr_range_str);
2961 if (input_ranges[n - 1].max != -1)
2963 error ("the max value for the last size range should be -1"
2964 " for option %s",
2965 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2966 return;
2969 if (n > MAX_STRINGOP_ALGS)
2971 error ("too many size ranges specified in option %s",
2972 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2973 return;
2976 /* Now override the default algs array. */
2977 for (i = 0; i < n; i++)
2979 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2980 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2981 = input_ranges[i].alg;
2982 *const_cast<int *>(&default_algs->size[i].noalign)
2983 = input_ranges[i].noalign;
2988 /* parse -mtune-ctrl= option. When DUMP is true,
2989 print the features that are explicitly set. */
2991 static void
2992 parse_mtune_ctrl_str (bool dump)
2994 if (!ix86_tune_ctrl_string)
2995 return;
2997 char *next_feature_string = NULL;
2998 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2999 char *orig = curr_feature_string;
3000 int i;
3003 bool clear = false;
3005 next_feature_string = strchr (curr_feature_string, ',');
3006 if (next_feature_string)
3007 *next_feature_string++ = '\0';
3008 if (*curr_feature_string == '^')
3010 curr_feature_string++;
3011 clear = true;
3013 for (i = 0; i < X86_TUNE_LAST; i++)
3015 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3017 ix86_tune_features[i] = !clear;
3018 if (dump)
3019 fprintf (stderr, "Explicitly %s feature %s\n",
3020 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3021 break;
3024 if (i == X86_TUNE_LAST)
3025 error ("Unknown parameter to option -mtune-ctrl: %s",
3026 clear ? curr_feature_string - 1 : curr_feature_string);
3027 curr_feature_string = next_feature_string;
3029 while (curr_feature_string);
3030 free (orig);
3033 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3034 processor type. */
3036 static void
3037 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3039 unsigned int ix86_tune_mask = 1u << ix86_tune;
3040 int i;
3042 for (i = 0; i < X86_TUNE_LAST; ++i)
3044 if (ix86_tune_no_default)
3045 ix86_tune_features[i] = 0;
3046 else
3047 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3050 if (dump)
3052 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3053 for (i = 0; i < X86_TUNE_LAST; i++)
3054 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3055 ix86_tune_features[i] ? "on" : "off");
3058 parse_mtune_ctrl_str (dump);
3062 /* Override various settings based on options. If MAIN_ARGS_P, the
3063 options are from the command line, otherwise they are from
3064 attributes. */
3066 static void
3067 ix86_option_override_internal (bool main_args_p,
3068 struct gcc_options *opts,
3069 struct gcc_options *opts_set)
3071 int i;
3072 unsigned int ix86_arch_mask;
3073 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3074 const char *prefix;
3075 const char *suffix;
3076 const char *sw;
3078 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3079 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3080 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3081 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3082 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3083 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3084 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3085 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3086 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3087 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3088 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3089 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3090 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3091 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3092 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3093 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3094 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3095 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3096 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3097 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3098 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3099 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3100 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3101 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3102 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3103 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3104 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3105 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3106 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3107 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3108 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3109 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3110 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3111 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3112 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3113 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3114 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3115 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3116 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3117 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3118 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3119 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3120 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3121 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3122 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3123 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3124 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3125 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3126 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3128 #define PTA_CORE2 \
3129 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3130 | PTA_CX16 | PTA_FXSR)
3131 #define PTA_NEHALEM \
3132 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3133 #define PTA_WESTMERE \
3134 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3135 #define PTA_SANDYBRIDGE \
3136 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3137 #define PTA_IVYBRIDGE \
3138 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3139 #define PTA_HASWELL \
3140 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3141 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3142 #define PTA_BROADWELL \
3143 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3144 #define PTA_BONNELL \
3145 (PTA_CORE2 | PTA_MOVBE)
3146 #define PTA_SILVERMONT \
3147 (PTA_WESTMERE | PTA_MOVBE)
3149 /* if this reaches 64, need to widen struct pta flags below */
3151 static struct pta
3153 const char *const name; /* processor name or nickname. */
3154 const enum processor_type processor;
3155 const enum attr_cpu schedule;
3156 const unsigned HOST_WIDE_INT flags;
3158 const processor_alias_table[] =
3160 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3161 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3162 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3163 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3164 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3165 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3166 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3167 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3168 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3169 PTA_MMX | PTA_SSE | PTA_FXSR},
3170 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3171 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3172 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3173 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3174 PTA_MMX | PTA_SSE | PTA_FXSR},
3175 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3176 PTA_MMX | PTA_SSE | PTA_FXSR},
3177 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3179 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3180 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3181 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3184 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3185 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3186 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3187 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3188 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3189 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3190 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3191 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3192 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3193 PTA_SANDYBRIDGE},
3194 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3195 PTA_SANDYBRIDGE},
3196 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_IVYBRIDGE},
3198 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_IVYBRIDGE},
3200 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3201 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3202 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3203 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3204 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3205 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3206 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3207 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3208 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3209 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3210 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3211 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3212 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3213 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3214 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3215 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3216 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3217 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3219 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3221 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"x86-64", PROCESSOR_K8, CPU_K8,
3224 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3225 {"k8", PROCESSOR_K8, CPU_K8,
3226 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3227 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3228 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3229 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3230 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3231 {"opteron", PROCESSOR_K8, CPU_K8,
3232 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3233 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3234 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3235 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3236 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3237 {"athlon64", PROCESSOR_K8, CPU_K8,
3238 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3239 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3240 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3241 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3242 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3243 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3244 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3245 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3246 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3247 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3248 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3249 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3250 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3251 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3252 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3253 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3254 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3255 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3256 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3257 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3258 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3259 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3260 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3261 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3262 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3263 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3264 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3265 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3266 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3267 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3268 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3269 | PTA_XSAVEOPT | PTA_FSGSBASE},
3270 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3271 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3272 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3273 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3274 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3275 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3276 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3277 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3278 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3279 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3280 | PTA_FXSR | PTA_XSAVE},
3281 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3283 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3284 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3285 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3286 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3288 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3289 PTA_64BIT
3290 | PTA_HLE /* flags are only used for -march switch. */ },
3293 /* -mrecip options. */
3294 static struct
3296 const char *string; /* option name */
3297 unsigned int mask; /* mask bits to set */
3299 const recip_options[] =
3301 { "all", RECIP_MASK_ALL },
3302 { "none", RECIP_MASK_NONE },
3303 { "div", RECIP_MASK_DIV },
3304 { "sqrt", RECIP_MASK_SQRT },
3305 { "vec-div", RECIP_MASK_VEC_DIV },
3306 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3309 int const pta_size = ARRAY_SIZE (processor_alias_table);
3311 /* Set up prefix/suffix so the error messages refer to either the command
3312 line argument, or the attribute(target). */
3313 if (main_args_p)
3315 prefix = "-m";
3316 suffix = "";
3317 sw = "switch";
3319 else
3321 prefix = "option(\"";
3322 suffix = "\")";
3323 sw = "attribute";
3326 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3327 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3328 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3329 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3330 #ifdef TARGET_BI_ARCH
3331 else
3333 #if TARGET_BI_ARCH == 1
3334 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3335 is on and OPTION_MASK_ABI_X32 is off. We turn off
3336 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3337 -mx32. */
3338 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3339 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3340 #else
3341 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3342 on and OPTION_MASK_ABI_64 is off. We turn off
3343 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3344 -m64. */
3345 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3346 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3347 #endif
3349 #endif
3351 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3353 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3354 OPTION_MASK_ABI_64 for TARGET_X32. */
3355 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3356 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3358 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3359 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3360 | OPTION_MASK_ABI_X32
3361 | OPTION_MASK_ABI_64);
3362 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3364 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3365 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3366 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3367 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3370 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3371 SUBTARGET_OVERRIDE_OPTIONS;
3372 #endif
3374 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3375 SUBSUBTARGET_OVERRIDE_OPTIONS;
3376 #endif
3378 /* -fPIC is the default for x86_64. */
3379 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3380 opts->x_flag_pic = 2;
3382 /* Need to check -mtune=generic first. */
3383 if (opts->x_ix86_tune_string)
3385 /* As special support for cross compilers we read -mtune=native
3386 as -mtune=generic. With native compilers we won't see the
3387 -mtune=native, as it was changed by the driver. */
3388 if (!strcmp (opts->x_ix86_tune_string, "native"))
3390 opts->x_ix86_tune_string = "generic";
3392 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3393 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3394 "%stune=k8%s or %stune=generic%s instead as appropriate",
3395 prefix, suffix, prefix, suffix, prefix, suffix);
3397 else
3399 if (opts->x_ix86_arch_string)
3400 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3401 if (!opts->x_ix86_tune_string)
3403 opts->x_ix86_tune_string
3404 = processor_target_table[TARGET_CPU_DEFAULT].name;
3405 ix86_tune_defaulted = 1;
3408 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3409 or defaulted. We need to use a sensible tune option. */
3410 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3412 opts->x_ix86_tune_string = "generic";
3416 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3417 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3419 /* rep; movq isn't available in 32-bit code. */
3420 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3421 opts->x_ix86_stringop_alg = no_stringop;
3424 if (!opts->x_ix86_arch_string)
3425 opts->x_ix86_arch_string
3426 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3427 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3428 else
3429 ix86_arch_specified = 1;
3431 if (opts_set->x_ix86_pmode)
3433 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3434 && opts->x_ix86_pmode == PMODE_SI)
3435 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3436 && opts->x_ix86_pmode == PMODE_DI))
3437 error ("address mode %qs not supported in the %s bit mode",
3438 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3439 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3441 else
3442 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3443 ? PMODE_DI : PMODE_SI;
3445 if (!opts_set->x_ix86_abi)
3446 opts->x_ix86_abi = DEFAULT_ABI;
3448 /* For targets using ms ABI enable ms-extensions, if not
3449 explicit turned off. For non-ms ABI we turn off this
3450 option. */
3451 if (!opts_set->x_flag_ms_extensions)
3452 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3454 if (opts_set->x_ix86_cmodel)
3456 switch (opts->x_ix86_cmodel)
3458 case CM_SMALL:
3459 case CM_SMALL_PIC:
3460 if (opts->x_flag_pic)
3461 opts->x_ix86_cmodel = CM_SMALL_PIC;
3462 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3463 error ("code model %qs not supported in the %s bit mode",
3464 "small", "32");
3465 break;
3467 case CM_MEDIUM:
3468 case CM_MEDIUM_PIC:
3469 if (opts->x_flag_pic)
3470 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3471 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3472 error ("code model %qs not supported in the %s bit mode",
3473 "medium", "32");
3474 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3475 error ("code model %qs not supported in x32 mode",
3476 "medium");
3477 break;
3479 case CM_LARGE:
3480 case CM_LARGE_PIC:
3481 if (opts->x_flag_pic)
3482 opts->x_ix86_cmodel = CM_LARGE_PIC;
3483 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3484 error ("code model %qs not supported in the %s bit mode",
3485 "large", "32");
3486 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3487 error ("code model %qs not supported in x32 mode",
3488 "large");
3489 break;
3491 case CM_32:
3492 if (opts->x_flag_pic)
3493 error ("code model %s does not support PIC mode", "32");
3494 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3495 error ("code model %qs not supported in the %s bit mode",
3496 "32", "64");
3497 break;
3499 case CM_KERNEL:
3500 if (opts->x_flag_pic)
3502 error ("code model %s does not support PIC mode", "kernel");
3503 opts->x_ix86_cmodel = CM_32;
3505 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3506 error ("code model %qs not supported in the %s bit mode",
3507 "kernel", "32");
3508 break;
3510 default:
3511 gcc_unreachable ();
3514 else
3516 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3517 use of rip-relative addressing. This eliminates fixups that
3518 would otherwise be needed if this object is to be placed in a
3519 DLL, and is essentially just as efficient as direct addressing. */
3520 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3521 && (TARGET_RDOS || TARGET_PECOFF))
3522 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3523 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3524 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3525 else
3526 opts->x_ix86_cmodel = CM_32;
3528 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3530 error ("-masm=intel not supported in this configuration");
3531 opts->x_ix86_asm_dialect = ASM_ATT;
3533 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3534 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3535 sorry ("%i-bit mode not compiled in",
3536 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3538 for (i = 0; i < pta_size; i++)
3539 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3541 ix86_schedule = processor_alias_table[i].schedule;
3542 ix86_arch = processor_alias_table[i].processor;
3543 /* Default cpu tuning to the architecture. */
3544 ix86_tune = ix86_arch;
3546 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3547 && !(processor_alias_table[i].flags & PTA_64BIT))
3548 error ("CPU you selected does not support x86-64 "
3549 "instruction set");
3551 if (processor_alias_table[i].flags & PTA_MMX
3552 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3553 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3554 if (processor_alias_table[i].flags & PTA_3DNOW
3555 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3556 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3557 if (processor_alias_table[i].flags & PTA_3DNOW_A
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3560 if (processor_alias_table[i].flags & PTA_SSE
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3563 if (processor_alias_table[i].flags & PTA_SSE2
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3566 if (processor_alias_table[i].flags & PTA_SSE3
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3569 if (processor_alias_table[i].flags & PTA_SSSE3
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3572 if (processor_alias_table[i].flags & PTA_SSE4_1
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3575 if (processor_alias_table[i].flags & PTA_SSE4_2
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3578 if (processor_alias_table[i].flags & PTA_AVX
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3581 if (processor_alias_table[i].flags & PTA_AVX2
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3584 if (processor_alias_table[i].flags & PTA_FMA
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3587 if (processor_alias_table[i].flags & PTA_SSE4A
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3590 if (processor_alias_table[i].flags & PTA_FMA4
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3593 if (processor_alias_table[i].flags & PTA_XOP
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3596 if (processor_alias_table[i].flags & PTA_LWP
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3599 if (processor_alias_table[i].flags & PTA_ABM
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3602 if (processor_alias_table[i].flags & PTA_BMI
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3605 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3608 if (processor_alias_table[i].flags & PTA_TBM
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3611 if (processor_alias_table[i].flags & PTA_BMI2
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3614 if (processor_alias_table[i].flags & PTA_CX16
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3617 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3620 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3621 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3624 if (processor_alias_table[i].flags & PTA_MOVBE
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3627 if (processor_alias_table[i].flags & PTA_AES
3628 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3629 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3630 if (processor_alias_table[i].flags & PTA_SHA
3631 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3632 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3633 if (processor_alias_table[i].flags & PTA_PCLMUL
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3636 if (processor_alias_table[i].flags & PTA_FSGSBASE
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3639 if (processor_alias_table[i].flags & PTA_RDRND
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3642 if (processor_alias_table[i].flags & PTA_F16C
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3645 if (processor_alias_table[i].flags & PTA_RTM
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3648 if (processor_alias_table[i].flags & PTA_HLE
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3651 if (processor_alias_table[i].flags & PTA_PRFCHW
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3654 if (processor_alias_table[i].flags & PTA_RDSEED
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3657 if (processor_alias_table[i].flags & PTA_ADX
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3660 if (processor_alias_table[i].flags & PTA_FXSR
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3663 if (processor_alias_table[i].flags & PTA_XSAVE
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3666 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3669 if (processor_alias_table[i].flags & PTA_AVX512F
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3672 if (processor_alias_table[i].flags & PTA_AVX512ER
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3675 if (processor_alias_table[i].flags & PTA_AVX512PF
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3678 if (processor_alias_table[i].flags & PTA_AVX512CD
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3681 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3684 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3687 if (processor_alias_table[i].flags & PTA_XSAVEC
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3690 if (processor_alias_table[i].flags & PTA_XSAVES
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3693 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3694 x86_prefetch_sse = true;
3696 break;
3699 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3700 error ("generic CPU can be used only for %stune=%s %s",
3701 prefix, suffix, sw);
3702 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3703 error ("intel CPU can be used only for %stune=%s %s",
3704 prefix, suffix, sw);
3705 else if (i == pta_size)
3706 error ("bad value (%s) for %sarch=%s %s",
3707 opts->x_ix86_arch_string, prefix, suffix, sw);
3709 ix86_arch_mask = 1u << ix86_arch;
3710 for (i = 0; i < X86_ARCH_LAST; ++i)
3711 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3713 for (i = 0; i < pta_size; i++)
3714 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3716 ix86_schedule = processor_alias_table[i].schedule;
3717 ix86_tune = processor_alias_table[i].processor;
3718 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3720 if (!(processor_alias_table[i].flags & PTA_64BIT))
3722 if (ix86_tune_defaulted)
3724 opts->x_ix86_tune_string = "x86-64";
3725 for (i = 0; i < pta_size; i++)
3726 if (! strcmp (opts->x_ix86_tune_string,
3727 processor_alias_table[i].name))
3728 break;
3729 ix86_schedule = processor_alias_table[i].schedule;
3730 ix86_tune = processor_alias_table[i].processor;
3732 else
3733 error ("CPU you selected does not support x86-64 "
3734 "instruction set");
3737 /* Intel CPUs have always interpreted SSE prefetch instructions as
3738 NOPs; so, we can enable SSE prefetch instructions even when
3739 -mtune (rather than -march) points us to a processor that has them.
3740 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3741 higher processors. */
3742 if (TARGET_CMOV
3743 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3744 x86_prefetch_sse = true;
3745 break;
3748 if (ix86_tune_specified && i == pta_size)
3749 error ("bad value (%s) for %stune=%s %s",
3750 opts->x_ix86_tune_string, prefix, suffix, sw);
3752 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3754 #ifndef USE_IX86_FRAME_POINTER
3755 #define USE_IX86_FRAME_POINTER 0
3756 #endif
3758 #ifndef USE_X86_64_FRAME_POINTER
3759 #define USE_X86_64_FRAME_POINTER 0
3760 #endif
3762 /* Set the default values for switches whose default depends on TARGET_64BIT
3763 in case they weren't overwritten by command line options. */
3764 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3766 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3767 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3768 if (opts->x_flag_asynchronous_unwind_tables
3769 && !opts_set->x_flag_unwind_tables
3770 && TARGET_64BIT_MS_ABI)
3771 opts->x_flag_unwind_tables = 1;
3772 if (opts->x_flag_asynchronous_unwind_tables == 2)
3773 opts->x_flag_unwind_tables
3774 = opts->x_flag_asynchronous_unwind_tables = 1;
3775 if (opts->x_flag_pcc_struct_return == 2)
3776 opts->x_flag_pcc_struct_return = 0;
3778 else
3780 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3781 opts->x_flag_omit_frame_pointer
3782 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3783 if (opts->x_flag_asynchronous_unwind_tables == 2)
3784 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3785 if (opts->x_flag_pcc_struct_return == 2)
3786 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3789 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3790 if (opts->x_optimize_size)
3791 ix86_cost = &ix86_size_cost;
3792 else
3793 ix86_cost = ix86_tune_cost;
3795 /* Arrange to set up i386_stack_locals for all functions. */
3796 init_machine_status = ix86_init_machine_status;
3798 /* Validate -mregparm= value. */
3799 if (opts_set->x_ix86_regparm)
3801 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3802 warning (0, "-mregparm is ignored in 64-bit mode");
3803 if (opts->x_ix86_regparm > REGPARM_MAX)
3805 error ("-mregparm=%d is not between 0 and %d",
3806 opts->x_ix86_regparm, REGPARM_MAX);
3807 opts->x_ix86_regparm = 0;
3810 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3811 opts->x_ix86_regparm = REGPARM_MAX;
3813 /* Default align_* from the processor table. */
3814 if (opts->x_align_loops == 0)
3816 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3817 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3819 if (opts->x_align_jumps == 0)
3821 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3822 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3824 if (opts->x_align_functions == 0)
3826 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3829 /* Provide default for -mbranch-cost= value. */
3830 if (!opts_set->x_ix86_branch_cost)
3831 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3833 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3835 opts->x_target_flags
3836 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3838 /* Enable by default the SSE and MMX builtins. Do allow the user to
3839 explicitly disable any of these. In particular, disabling SSE and
3840 MMX for kernel code is extremely useful. */
3841 if (!ix86_arch_specified)
3842 opts->x_ix86_isa_flags
3843 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3844 | TARGET_SUBTARGET64_ISA_DEFAULT)
3845 & ~opts->x_ix86_isa_flags_explicit);
3847 if (TARGET_RTD_P (opts->x_target_flags))
3848 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3850 else
3852 opts->x_target_flags
3853 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3855 if (!ix86_arch_specified)
3856 opts->x_ix86_isa_flags
3857 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3859 /* i386 ABI does not specify red zone. It still makes sense to use it
3860 when programmer takes care to stack from being destroyed. */
3861 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3862 opts->x_target_flags |= MASK_NO_RED_ZONE;
3865 /* Keep nonleaf frame pointers. */
3866 if (opts->x_flag_omit_frame_pointer)
3867 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3868 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3869 opts->x_flag_omit_frame_pointer = 1;
3871 /* If we're doing fast math, we don't care about comparison order
3872 wrt NaNs. This lets us use a shorter comparison sequence. */
3873 if (opts->x_flag_finite_math_only)
3874 opts->x_target_flags &= ~MASK_IEEE_FP;
3876 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3877 since the insns won't need emulation. */
3878 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3879 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3881 /* Likewise, if the target doesn't have a 387, or we've specified
3882 software floating point, don't use 387 inline intrinsics. */
3883 if (!TARGET_80387_P (opts->x_target_flags))
3884 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3886 /* Turn on MMX builtins for -msse. */
3887 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3888 opts->x_ix86_isa_flags
3889 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3891 /* Enable SSE prefetch. */
3892 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3893 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3894 x86_prefetch_sse = true;
3896 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3897 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3898 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3899 opts->x_ix86_isa_flags
3900 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3902 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3903 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3904 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3905 opts->x_ix86_isa_flags
3906 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3908 /* Enable lzcnt instruction for -mabm. */
3909 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3910 opts->x_ix86_isa_flags
3911 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3913 /* Validate -mpreferred-stack-boundary= value or default it to
3914 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3915 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3916 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3918 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3919 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3920 int max = (TARGET_SEH ? 4 : 12);
3922 if (opts->x_ix86_preferred_stack_boundary_arg < min
3923 || opts->x_ix86_preferred_stack_boundary_arg > max)
3925 if (min == max)
3926 error ("-mpreferred-stack-boundary is not supported "
3927 "for this target");
3928 else
3929 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3930 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3932 else
3933 ix86_preferred_stack_boundary
3934 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3937 /* Set the default value for -mstackrealign. */
3938 if (opts->x_ix86_force_align_arg_pointer == -1)
3939 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3941 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3943 /* Validate -mincoming-stack-boundary= value or default it to
3944 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3945 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3946 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3948 if (opts->x_ix86_incoming_stack_boundary_arg
3949 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3950 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3951 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3952 opts->x_ix86_incoming_stack_boundary_arg,
3953 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3954 else
3956 ix86_user_incoming_stack_boundary
3957 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3958 ix86_incoming_stack_boundary
3959 = ix86_user_incoming_stack_boundary;
3963 /* Accept -msseregparm only if at least SSE support is enabled. */
3964 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3965 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3966 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3968 if (opts_set->x_ix86_fpmath)
3970 if (opts->x_ix86_fpmath & FPMATH_SSE)
3972 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3974 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3975 opts->x_ix86_fpmath = FPMATH_387;
3977 else if ((opts->x_ix86_fpmath & FPMATH_387)
3978 && !TARGET_80387_P (opts->x_target_flags))
3980 warning (0, "387 instruction set disabled, using SSE arithmetics");
3981 opts->x_ix86_fpmath = FPMATH_SSE;
3985 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3986 fpmath=387. The second is however default at many targets since the
3987 extra 80bit precision of temporaries is considered to be part of ABI.
3988 Overwrite the default at least for -ffast-math.
3989 TODO: -mfpmath=both seems to produce same performing code with bit
3990 smaller binaries. It is however not clear if register allocation is
3991 ready for this setting.
3992 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3993 codegen. We may switch to 387 with -ffast-math for size optimized
3994 functions. */
3995 else if (fast_math_flags_set_p (&global_options)
3996 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3997 opts->x_ix86_fpmath = FPMATH_SSE;
3998 else
3999 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4001 /* If the i387 is disabled, then do not return values in it. */
4002 if (!TARGET_80387_P (opts->x_target_flags))
4003 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4005 /* Use external vectorized library in vectorizing intrinsics. */
4006 if (opts_set->x_ix86_veclibabi_type)
4007 switch (opts->x_ix86_veclibabi_type)
4009 case ix86_veclibabi_type_svml:
4010 ix86_veclib_handler = ix86_veclibabi_svml;
4011 break;
4013 case ix86_veclibabi_type_acml:
4014 ix86_veclib_handler = ix86_veclibabi_acml;
4015 break;
4017 default:
4018 gcc_unreachable ();
4021 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4022 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4023 && !opts->x_optimize_size)
4024 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4026 /* If stack probes are required, the space used for large function
4027 arguments on the stack must also be probed, so enable
4028 -maccumulate-outgoing-args so this happens in the prologue. */
4029 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4030 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4032 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4033 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4034 "for correctness", prefix, suffix);
4035 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4038 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4040 char *p;
4041 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4042 p = strchr (internal_label_prefix, 'X');
4043 internal_label_prefix_len = p - internal_label_prefix;
4044 *p = '\0';
4047 /* When scheduling description is not available, disable scheduler pass
4048 so it won't slow down the compilation and make x87 code slower. */
4049 if (!TARGET_SCHEDULE)
4050 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4052 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4053 ix86_tune_cost->simultaneous_prefetches,
4054 opts->x_param_values,
4055 opts_set->x_param_values);
4056 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4057 ix86_tune_cost->prefetch_block,
4058 opts->x_param_values,
4059 opts_set->x_param_values);
4060 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4061 ix86_tune_cost->l1_cache_size,
4062 opts->x_param_values,
4063 opts_set->x_param_values);
4064 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4065 ix86_tune_cost->l2_cache_size,
4066 opts->x_param_values,
4067 opts_set->x_param_values);
4069 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4070 if (opts->x_flag_prefetch_loop_arrays < 0
4071 && HAVE_prefetch
4072 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4073 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4074 opts->x_flag_prefetch_loop_arrays = 1;
4076 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4077 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4078 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4079 targetm.expand_builtin_va_start = NULL;
4081 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4083 ix86_gen_leave = gen_leave_rex64;
4084 if (Pmode == DImode)
4086 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4087 ix86_gen_tls_local_dynamic_base_64
4088 = gen_tls_local_dynamic_base_64_di;
4090 else
4092 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4093 ix86_gen_tls_local_dynamic_base_64
4094 = gen_tls_local_dynamic_base_64_si;
4097 else
4098 ix86_gen_leave = gen_leave;
4100 if (Pmode == DImode)
4102 ix86_gen_add3 = gen_adddi3;
4103 ix86_gen_sub3 = gen_subdi3;
4104 ix86_gen_sub3_carry = gen_subdi3_carry;
4105 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4106 ix86_gen_andsp = gen_anddi3;
4107 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4108 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4109 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4110 ix86_gen_monitor = gen_sse3_monitor_di;
4112 else
4114 ix86_gen_add3 = gen_addsi3;
4115 ix86_gen_sub3 = gen_subsi3;
4116 ix86_gen_sub3_carry = gen_subsi3_carry;
4117 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4118 ix86_gen_andsp = gen_andsi3;
4119 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4120 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4121 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4122 ix86_gen_monitor = gen_sse3_monitor_si;
4125 #ifdef USE_IX86_CLD
4126 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4127 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4128 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4129 #endif
4131 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4133 if (opts->x_flag_fentry > 0)
4134 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4135 "with -fpic");
4136 opts->x_flag_fentry = 0;
4138 else if (TARGET_SEH)
4140 if (opts->x_flag_fentry == 0)
4141 sorry ("-mno-fentry isn%'t compatible with SEH");
4142 opts->x_flag_fentry = 1;
4144 else if (opts->x_flag_fentry < 0)
4146 #if defined(PROFILE_BEFORE_PROLOGUE)
4147 opts->x_flag_fentry = 1;
4148 #else
4149 opts->x_flag_fentry = 0;
4150 #endif
4153 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4154 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4155 AVX unaligned load/store. */
4156 if (!opts->x_optimize_size)
4158 if (flag_expensive_optimizations
4159 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4160 opts->x_target_flags |= MASK_VZEROUPPER;
4161 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4162 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4163 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4164 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4165 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4166 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4167 /* Enable 128-bit AVX instruction generation
4168 for the auto-vectorizer. */
4169 if (TARGET_AVX128_OPTIMAL
4170 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4171 opts->x_target_flags |= MASK_PREFER_AVX128;
4174 if (opts->x_ix86_recip_name)
4176 char *p = ASTRDUP (opts->x_ix86_recip_name);
4177 char *q;
4178 unsigned int mask, i;
4179 bool invert;
4181 while ((q = strtok (p, ",")) != NULL)
4183 p = NULL;
4184 if (*q == '!')
4186 invert = true;
4187 q++;
4189 else
4190 invert = false;
4192 if (!strcmp (q, "default"))
4193 mask = RECIP_MASK_ALL;
4194 else
4196 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4197 if (!strcmp (q, recip_options[i].string))
4199 mask = recip_options[i].mask;
4200 break;
4203 if (i == ARRAY_SIZE (recip_options))
4205 error ("unknown option for -mrecip=%s", q);
4206 invert = false;
4207 mask = RECIP_MASK_NONE;
4211 opts->x_recip_mask_explicit |= mask;
4212 if (invert)
4213 opts->x_recip_mask &= ~mask;
4214 else
4215 opts->x_recip_mask |= mask;
4219 if (TARGET_RECIP_P (opts->x_target_flags))
4220 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4221 else if (opts_set->x_target_flags & MASK_RECIP)
4222 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4224 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4225 for 64-bit Bionic. */
4226 if (TARGET_HAS_BIONIC
4227 && !(opts_set->x_target_flags
4228 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4229 opts->x_target_flags |= (TARGET_64BIT
4230 ? MASK_LONG_DOUBLE_128
4231 : MASK_LONG_DOUBLE_64);
4233 /* Only one of them can be active. */
4234 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4235 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4237 /* Save the initial options in case the user does function specific
4238 options. */
4239 if (main_args_p)
4240 target_option_default_node = target_option_current_node
4241 = build_target_option_node (opts);
4243 /* Handle stack protector */
4244 if (!opts_set->x_ix86_stack_protector_guard)
4245 opts->x_ix86_stack_protector_guard
4246 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4248 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4249 if (opts->x_ix86_tune_memcpy_strategy)
4251 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4252 ix86_parse_stringop_strategy_string (str, false);
4253 free (str);
4256 if (opts->x_ix86_tune_memset_strategy)
4258 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4259 ix86_parse_stringop_strategy_string (str, true);
4260 free (str);
4264 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4266 static void
4267 ix86_option_override (void)
4269 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4270 static struct register_pass_info insert_vzeroupper_info
4271 = { pass_insert_vzeroupper, "reload",
4272 1, PASS_POS_INSERT_AFTER
4275 ix86_option_override_internal (true, &global_options, &global_options_set);
4278 /* This needs to be done at start up. It's convenient to do it here. */
4279 register_pass (&insert_vzeroupper_info);
4282 /* Update register usage after having seen the compiler flags. */
4284 static void
4285 ix86_conditional_register_usage (void)
4287 int i, c_mask;
4288 unsigned int j;
4290 /* The PIC register, if it exists, is fixed. */
4291 j = PIC_OFFSET_TABLE_REGNUM;
4292 if (j != INVALID_REGNUM)
4293 fixed_regs[j] = call_used_regs[j] = 1;
4295 /* For 32-bit targets, squash the REX registers. */
4296 if (! TARGET_64BIT)
4298 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4299 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4300 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4301 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4302 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4303 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4306 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4307 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4308 : TARGET_64BIT ? (1 << 2)
4309 : (1 << 1));
4311 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4313 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4315 /* Set/reset conditionally defined registers from
4316 CALL_USED_REGISTERS initializer. */
4317 if (call_used_regs[i] > 1)
4318 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4320 /* Calculate registers of CLOBBERED_REGS register set
4321 as call used registers from GENERAL_REGS register set. */
4322 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4323 && call_used_regs[i])
4324 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4327 /* If MMX is disabled, squash the registers. */
4328 if (! TARGET_MMX)
4329 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4330 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4331 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4333 /* If SSE is disabled, squash the registers. */
4334 if (! TARGET_SSE)
4335 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4336 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4337 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4339 /* If the FPU is disabled, squash the registers. */
4340 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4341 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4342 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4343 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4345 /* If AVX512F is disabled, squash the registers. */
4346 if (! TARGET_AVX512F)
4348 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4349 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4351 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4352 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4357 /* Save the current options */
4359 static void
4360 ix86_function_specific_save (struct cl_target_option *ptr,
4361 struct gcc_options *opts)
4363 ptr->arch = ix86_arch;
4364 ptr->schedule = ix86_schedule;
4365 ptr->tune = ix86_tune;
4366 ptr->branch_cost = ix86_branch_cost;
4367 ptr->tune_defaulted = ix86_tune_defaulted;
4368 ptr->arch_specified = ix86_arch_specified;
4369 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4370 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4371 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4372 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4373 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4374 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4375 ptr->x_ix86_abi = opts->x_ix86_abi;
4376 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4377 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4378 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4379 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4380 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4381 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4382 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4383 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4384 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4385 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4386 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4387 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4388 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4389 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4390 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4391 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4392 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4393 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4394 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4395 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4397 /* The fields are char but the variables are not; make sure the
4398 values fit in the fields. */
4399 gcc_assert (ptr->arch == ix86_arch);
4400 gcc_assert (ptr->schedule == ix86_schedule);
4401 gcc_assert (ptr->tune == ix86_tune);
4402 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4405 /* Restore the current options */
4407 static void
4408 ix86_function_specific_restore (struct gcc_options *opts,
4409 struct cl_target_option *ptr)
4411 enum processor_type old_tune = ix86_tune;
4412 enum processor_type old_arch = ix86_arch;
4413 unsigned int ix86_arch_mask;
4414 int i;
4416 /* We don't change -fPIC. */
4417 opts->x_flag_pic = flag_pic;
4419 ix86_arch = (enum processor_type) ptr->arch;
4420 ix86_schedule = (enum attr_cpu) ptr->schedule;
4421 ix86_tune = (enum processor_type) ptr->tune;
4422 opts->x_ix86_branch_cost = ptr->branch_cost;
4423 ix86_tune_defaulted = ptr->tune_defaulted;
4424 ix86_arch_specified = ptr->arch_specified;
4425 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4426 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4427 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4428 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4429 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4430 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4431 opts->x_ix86_abi = ptr->x_ix86_abi;
4432 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4433 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4434 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4435 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4436 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4437 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4438 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4439 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4440 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4441 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4442 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4443 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4444 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4445 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4446 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4447 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4448 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4449 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4450 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4451 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4453 /* Recreate the arch feature tests if the arch changed */
4454 if (old_arch != ix86_arch)
4456 ix86_arch_mask = 1u << ix86_arch;
4457 for (i = 0; i < X86_ARCH_LAST; ++i)
4458 ix86_arch_features[i]
4459 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4462 /* Recreate the tune optimization tests */
4463 if (old_tune != ix86_tune)
4464 set_ix86_tune_features (ix86_tune, false);
4467 /* Print the current options */
4469 static void
4470 ix86_function_specific_print (FILE *file, int indent,
4471 struct cl_target_option *ptr)
4473 char *target_string
4474 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4475 NULL, NULL, ptr->x_ix86_fpmath, false);
4477 gcc_assert (ptr->arch < PROCESSOR_max);
4478 fprintf (file, "%*sarch = %d (%s)\n",
4479 indent, "",
4480 ptr->arch, processor_target_table[ptr->arch].name);
4482 gcc_assert (ptr->tune < PROCESSOR_max);
4483 fprintf (file, "%*stune = %d (%s)\n",
4484 indent, "",
4485 ptr->tune, processor_target_table[ptr->tune].name);
4487 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4489 if (target_string)
4491 fprintf (file, "%*s%s\n", indent, "", target_string);
4492 free (target_string);
4497 /* Inner function to process the attribute((target(...))), take an argument and
4498 set the current options from the argument. If we have a list, recursively go
4499 over the list. */
4501 static bool
4502 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4503 struct gcc_options *opts,
4504 struct gcc_options *opts_set,
4505 struct gcc_options *enum_opts_set)
4507 char *next_optstr;
4508 bool ret = true;
4510 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4511 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4512 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4513 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4514 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4516 enum ix86_opt_type
4518 ix86_opt_unknown,
4519 ix86_opt_yes,
4520 ix86_opt_no,
4521 ix86_opt_str,
4522 ix86_opt_enum,
4523 ix86_opt_isa
4526 static const struct
4528 const char *string;
4529 size_t len;
4530 enum ix86_opt_type type;
4531 int opt;
4532 int mask;
4533 } attrs[] = {
4534 /* isa options */
4535 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4536 IX86_ATTR_ISA ("abm", OPT_mabm),
4537 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4538 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4539 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4540 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4541 IX86_ATTR_ISA ("aes", OPT_maes),
4542 IX86_ATTR_ISA ("sha", OPT_msha),
4543 IX86_ATTR_ISA ("avx", OPT_mavx),
4544 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4545 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4546 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4547 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4548 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4549 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4550 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4551 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4552 IX86_ATTR_ISA ("sse", OPT_msse),
4553 IX86_ATTR_ISA ("sse2", OPT_msse2),
4554 IX86_ATTR_ISA ("sse3", OPT_msse3),
4555 IX86_ATTR_ISA ("sse4", OPT_msse4),
4556 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4557 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4558 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4559 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4560 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4561 IX86_ATTR_ISA ("fma", OPT_mfma),
4562 IX86_ATTR_ISA ("xop", OPT_mxop),
4563 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4564 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4565 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4566 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4567 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4568 IX86_ATTR_ISA ("hle", OPT_mhle),
4569 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4570 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4571 IX86_ATTR_ISA ("adx", OPT_madx),
4572 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4573 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4574 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4575 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4576 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4577 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4578 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4580 /* enum options */
4581 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4583 /* string options */
4584 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4585 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4587 /* flag options */
4588 IX86_ATTR_YES ("cld",
4589 OPT_mcld,
4590 MASK_CLD),
4592 IX86_ATTR_NO ("fancy-math-387",
4593 OPT_mfancy_math_387,
4594 MASK_NO_FANCY_MATH_387),
4596 IX86_ATTR_YES ("ieee-fp",
4597 OPT_mieee_fp,
4598 MASK_IEEE_FP),
4600 IX86_ATTR_YES ("inline-all-stringops",
4601 OPT_minline_all_stringops,
4602 MASK_INLINE_ALL_STRINGOPS),
4604 IX86_ATTR_YES ("inline-stringops-dynamically",
4605 OPT_minline_stringops_dynamically,
4606 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4608 IX86_ATTR_NO ("align-stringops",
4609 OPT_mno_align_stringops,
4610 MASK_NO_ALIGN_STRINGOPS),
4612 IX86_ATTR_YES ("recip",
4613 OPT_mrecip,
4614 MASK_RECIP),
4618 /* If this is a list, recurse to get the options. */
4619 if (TREE_CODE (args) == TREE_LIST)
4621 bool ret = true;
4623 for (; args; args = TREE_CHAIN (args))
4624 if (TREE_VALUE (args)
4625 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4626 p_strings, opts, opts_set,
4627 enum_opts_set))
4628 ret = false;
4630 return ret;
4633 else if (TREE_CODE (args) != STRING_CST)
4635 error ("attribute %<target%> argument not a string");
4636 return false;
4639 /* Handle multiple arguments separated by commas. */
4640 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4642 while (next_optstr && *next_optstr != '\0')
4644 char *p = next_optstr;
4645 char *orig_p = p;
4646 char *comma = strchr (next_optstr, ',');
4647 const char *opt_string;
4648 size_t len, opt_len;
4649 int opt;
4650 bool opt_set_p;
4651 char ch;
4652 unsigned i;
4653 enum ix86_opt_type type = ix86_opt_unknown;
4654 int mask = 0;
4656 if (comma)
4658 *comma = '\0';
4659 len = comma - next_optstr;
4660 next_optstr = comma + 1;
4662 else
4664 len = strlen (p);
4665 next_optstr = NULL;
4668 /* Recognize no-xxx. */
4669 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4671 opt_set_p = false;
4672 p += 3;
4673 len -= 3;
4675 else
4676 opt_set_p = true;
4678 /* Find the option. */
4679 ch = *p;
4680 opt = N_OPTS;
4681 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4683 type = attrs[i].type;
4684 opt_len = attrs[i].len;
4685 if (ch == attrs[i].string[0]
4686 && ((type != ix86_opt_str && type != ix86_opt_enum)
4687 ? len == opt_len
4688 : len > opt_len)
4689 && memcmp (p, attrs[i].string, opt_len) == 0)
4691 opt = attrs[i].opt;
4692 mask = attrs[i].mask;
4693 opt_string = attrs[i].string;
4694 break;
4698 /* Process the option. */
4699 if (opt == N_OPTS)
4701 error ("attribute(target(\"%s\")) is unknown", orig_p);
4702 ret = false;
4705 else if (type == ix86_opt_isa)
4707 struct cl_decoded_option decoded;
4709 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4710 ix86_handle_option (opts, opts_set,
4711 &decoded, input_location);
4714 else if (type == ix86_opt_yes || type == ix86_opt_no)
4716 if (type == ix86_opt_no)
4717 opt_set_p = !opt_set_p;
4719 if (opt_set_p)
4720 opts->x_target_flags |= mask;
4721 else
4722 opts->x_target_flags &= ~mask;
4725 else if (type == ix86_opt_str)
4727 if (p_strings[opt])
4729 error ("option(\"%s\") was already specified", opt_string);
4730 ret = false;
4732 else
4733 p_strings[opt] = xstrdup (p + opt_len);
4736 else if (type == ix86_opt_enum)
4738 bool arg_ok;
4739 int value;
4741 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4742 if (arg_ok)
4743 set_option (opts, enum_opts_set, opt, value,
4744 p + opt_len, DK_UNSPECIFIED, input_location,
4745 global_dc);
4746 else
4748 error ("attribute(target(\"%s\")) is unknown", orig_p);
4749 ret = false;
4753 else
4754 gcc_unreachable ();
4757 return ret;
4760 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4762 tree
4763 ix86_valid_target_attribute_tree (tree args,
4764 struct gcc_options *opts,
4765 struct gcc_options *opts_set)
4767 const char *orig_arch_string = opts->x_ix86_arch_string;
4768 const char *orig_tune_string = opts->x_ix86_tune_string;
4769 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4770 int orig_tune_defaulted = ix86_tune_defaulted;
4771 int orig_arch_specified = ix86_arch_specified;
4772 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4773 tree t = NULL_TREE;
4774 int i;
4775 struct cl_target_option *def
4776 = TREE_TARGET_OPTION (target_option_default_node);
4777 struct gcc_options enum_opts_set;
4779 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4781 /* Process each of the options on the chain. */
4782 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4783 opts_set, &enum_opts_set))
4784 return error_mark_node;
4786 /* If the changed options are different from the default, rerun
4787 ix86_option_override_internal, and then save the options away.
4788 The string options are are attribute options, and will be undone
4789 when we copy the save structure. */
4790 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4791 || opts->x_target_flags != def->x_target_flags
4792 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4793 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4794 || enum_opts_set.x_ix86_fpmath)
4796 /* If we are using the default tune= or arch=, undo the string assigned,
4797 and use the default. */
4798 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4799 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4800 else if (!orig_arch_specified)
4801 opts->x_ix86_arch_string = NULL;
4803 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4804 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4805 else if (orig_tune_defaulted)
4806 opts->x_ix86_tune_string = NULL;
4808 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4809 if (enum_opts_set.x_ix86_fpmath)
4810 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4811 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4812 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4814 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4815 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4818 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4819 ix86_option_override_internal (false, opts, opts_set);
4821 /* Add any builtin functions with the new isa if any. */
4822 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4824 /* Save the current options unless we are validating options for
4825 #pragma. */
4826 t = build_target_option_node (opts);
4828 opts->x_ix86_arch_string = orig_arch_string;
4829 opts->x_ix86_tune_string = orig_tune_string;
4830 opts_set->x_ix86_fpmath = orig_fpmath_set;
4832 /* Free up memory allocated to hold the strings */
4833 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4834 free (option_strings[i]);
4837 return t;
4840 /* Hook to validate attribute((target("string"))). */
4842 static bool
4843 ix86_valid_target_attribute_p (tree fndecl,
4844 tree ARG_UNUSED (name),
4845 tree args,
4846 int ARG_UNUSED (flags))
4848 struct gcc_options func_options;
4849 tree new_target, new_optimize;
4850 bool ret = true;
4852 /* attribute((target("default"))) does nothing, beyond
4853 affecting multi-versioning. */
4854 if (TREE_VALUE (args)
4855 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4856 && TREE_CHAIN (args) == NULL_TREE
4857 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4858 return true;
4860 tree old_optimize = build_optimization_node (&global_options);
4862 /* Get the optimization options of the current function. */
4863 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4865 if (!func_optimize)
4866 func_optimize = old_optimize;
4868 /* Init func_options. */
4869 memset (&func_options, 0, sizeof (func_options));
4870 init_options_struct (&func_options, NULL);
4871 lang_hooks.init_options_struct (&func_options);
4873 cl_optimization_restore (&func_options,
4874 TREE_OPTIMIZATION (func_optimize));
4876 /* Initialize func_options to the default before its target options can
4877 be set. */
4878 cl_target_option_restore (&func_options,
4879 TREE_TARGET_OPTION (target_option_default_node));
4881 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4882 &global_options_set);
4884 new_optimize = build_optimization_node (&func_options);
4886 if (new_target == error_mark_node)
4887 ret = false;
4889 else if (fndecl && new_target)
4891 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4893 if (old_optimize != new_optimize)
4894 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4897 return ret;
4901 /* Hook to determine if one function can safely inline another. */
4903 static bool
4904 ix86_can_inline_p (tree caller, tree callee)
4906 bool ret = false;
4907 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4908 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4910 /* If callee has no option attributes, then it is ok to inline. */
4911 if (!callee_tree)
4912 ret = true;
4914 /* If caller has no option attributes, but callee does then it is not ok to
4915 inline. */
4916 else if (!caller_tree)
4917 ret = false;
4919 else
4921 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4922 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4924 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4925 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4926 function. */
4927 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4928 != callee_opts->x_ix86_isa_flags)
4929 ret = false;
4931 /* See if we have the same non-isa options. */
4932 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4933 ret = false;
4935 /* See if arch, tune, etc. are the same. */
4936 else if (caller_opts->arch != callee_opts->arch)
4937 ret = false;
4939 else if (caller_opts->tune != callee_opts->tune)
4940 ret = false;
4942 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4943 ret = false;
4945 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4946 ret = false;
4948 else
4949 ret = true;
4952 return ret;
4956 /* Remember the last target of ix86_set_current_function. */
4957 static GTY(()) tree ix86_previous_fndecl;
4959 /* Invalidate ix86_previous_fndecl cache. */
4960 void
4961 ix86_reset_previous_fndecl (void)
4963 ix86_previous_fndecl = NULL_TREE;
4966 /* Establish appropriate back-end context for processing the function
4967 FNDECL. The argument might be NULL to indicate processing at top
4968 level, outside of any function scope. */
4969 static void
4970 ix86_set_current_function (tree fndecl)
4972 /* Only change the context if the function changes. This hook is called
4973 several times in the course of compiling a function, and we don't want to
4974 slow things down too much or call target_reinit when it isn't safe. */
4975 if (fndecl && fndecl != ix86_previous_fndecl)
4977 tree old_tree = (ix86_previous_fndecl
4978 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4979 : NULL_TREE);
4981 tree new_tree = (fndecl
4982 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4983 : NULL_TREE);
4985 ix86_previous_fndecl = fndecl;
4986 if (old_tree == new_tree)
4989 else if (new_tree)
4991 cl_target_option_restore (&global_options,
4992 TREE_TARGET_OPTION (new_tree));
4993 if (TREE_TARGET_GLOBALS (new_tree))
4994 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4995 else
4996 TREE_TARGET_GLOBALS (new_tree)
4997 = save_target_globals_default_opts ();
5000 else if (old_tree)
5002 new_tree = target_option_current_node;
5003 cl_target_option_restore (&global_options,
5004 TREE_TARGET_OPTION (new_tree));
5005 if (TREE_TARGET_GLOBALS (new_tree))
5006 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5007 else if (new_tree == target_option_default_node)
5008 restore_target_globals (&default_target_globals);
5009 else
5010 TREE_TARGET_GLOBALS (new_tree)
5011 = save_target_globals_default_opts ();
5017 /* Return true if this goes in large data/bss. */
5019 static bool
5020 ix86_in_large_data_p (tree exp)
5022 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5023 return false;
5025 /* Functions are never large data. */
5026 if (TREE_CODE (exp) == FUNCTION_DECL)
5027 return false;
5029 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5031 const char *section = DECL_SECTION_NAME (exp);
5032 if (strcmp (section, ".ldata") == 0
5033 || strcmp (section, ".lbss") == 0)
5034 return true;
5035 return false;
5037 else
5039 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5041 /* If this is an incomplete type with size 0, then we can't put it
5042 in data because it might be too big when completed. */
5043 if (!size || size > ix86_section_threshold)
5044 return true;
5047 return false;
5050 /* Switch to the appropriate section for output of DECL.
5051 DECL is either a `VAR_DECL' node or a constant of some sort.
5052 RELOC indicates whether forming the initial value of DECL requires
5053 link-time relocations. */
5055 ATTRIBUTE_UNUSED static section *
5056 x86_64_elf_select_section (tree decl, int reloc,
5057 unsigned HOST_WIDE_INT align)
5059 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5060 && ix86_in_large_data_p (decl))
5062 const char *sname = NULL;
5063 unsigned int flags = SECTION_WRITE;
5064 switch (categorize_decl_for_section (decl, reloc))
5066 case SECCAT_DATA:
5067 sname = ".ldata";
5068 break;
5069 case SECCAT_DATA_REL:
5070 sname = ".ldata.rel";
5071 break;
5072 case SECCAT_DATA_REL_LOCAL:
5073 sname = ".ldata.rel.local";
5074 break;
5075 case SECCAT_DATA_REL_RO:
5076 sname = ".ldata.rel.ro";
5077 break;
5078 case SECCAT_DATA_REL_RO_LOCAL:
5079 sname = ".ldata.rel.ro.local";
5080 break;
5081 case SECCAT_BSS:
5082 sname = ".lbss";
5083 flags |= SECTION_BSS;
5084 break;
5085 case SECCAT_RODATA:
5086 case SECCAT_RODATA_MERGE_STR:
5087 case SECCAT_RODATA_MERGE_STR_INIT:
5088 case SECCAT_RODATA_MERGE_CONST:
5089 sname = ".lrodata";
5090 flags = 0;
5091 break;
5092 case SECCAT_SRODATA:
5093 case SECCAT_SDATA:
5094 case SECCAT_SBSS:
5095 gcc_unreachable ();
5096 case SECCAT_TEXT:
5097 case SECCAT_TDATA:
5098 case SECCAT_TBSS:
5099 /* We don't split these for medium model. Place them into
5100 default sections and hope for best. */
5101 break;
5103 if (sname)
5105 /* We might get called with string constants, but get_named_section
5106 doesn't like them as they are not DECLs. Also, we need to set
5107 flags in that case. */
5108 if (!DECL_P (decl))
5109 return get_section (sname, flags, NULL);
5110 return get_named_section (decl, sname, reloc);
5113 return default_elf_select_section (decl, reloc, align);
5116 /* Select a set of attributes for section NAME based on the properties
5117 of DECL and whether or not RELOC indicates that DECL's initializer
5118 might contain runtime relocations. */
5120 static unsigned int ATTRIBUTE_UNUSED
5121 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5123 unsigned int flags = default_section_type_flags (decl, name, reloc);
5125 if (decl == NULL_TREE
5126 && (strcmp (name, ".ldata.rel.ro") == 0
5127 || strcmp (name, ".ldata.rel.ro.local") == 0))
5128 flags |= SECTION_RELRO;
5130 if (strcmp (name, ".lbss") == 0
5131 || strncmp (name, ".lbss.", 5) == 0
5132 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5133 flags |= SECTION_BSS;
5135 return flags;
5138 /* Build up a unique section name, expressed as a
5139 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5140 RELOC indicates whether the initial value of EXP requires
5141 link-time relocations. */
5143 static void ATTRIBUTE_UNUSED
5144 x86_64_elf_unique_section (tree decl, int reloc)
5146 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5147 && ix86_in_large_data_p (decl))
5149 const char *prefix = NULL;
5150 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5151 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5153 switch (categorize_decl_for_section (decl, reloc))
5155 case SECCAT_DATA:
5156 case SECCAT_DATA_REL:
5157 case SECCAT_DATA_REL_LOCAL:
5158 case SECCAT_DATA_REL_RO:
5159 case SECCAT_DATA_REL_RO_LOCAL:
5160 prefix = one_only ? ".ld" : ".ldata";
5161 break;
5162 case SECCAT_BSS:
5163 prefix = one_only ? ".lb" : ".lbss";
5164 break;
5165 case SECCAT_RODATA:
5166 case SECCAT_RODATA_MERGE_STR:
5167 case SECCAT_RODATA_MERGE_STR_INIT:
5168 case SECCAT_RODATA_MERGE_CONST:
5169 prefix = one_only ? ".lr" : ".lrodata";
5170 break;
5171 case SECCAT_SRODATA:
5172 case SECCAT_SDATA:
5173 case SECCAT_SBSS:
5174 gcc_unreachable ();
5175 case SECCAT_TEXT:
5176 case SECCAT_TDATA:
5177 case SECCAT_TBSS:
5178 /* We don't split these for medium model. Place them into
5179 default sections and hope for best. */
5180 break;
5182 if (prefix)
5184 const char *name, *linkonce;
5185 char *string;
5187 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5188 name = targetm.strip_name_encoding (name);
5190 /* If we're using one_only, then there needs to be a .gnu.linkonce
5191 prefix to the section name. */
5192 linkonce = one_only ? ".gnu.linkonce" : "";
5194 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5196 set_decl_section_name (decl, string);
5197 return;
5200 default_unique_section (decl, reloc);
5203 #ifdef COMMON_ASM_OP
5204 /* This says how to output assembler code to declare an
5205 uninitialized external linkage data object.
5207 For medium model x86-64 we need to use .largecomm opcode for
5208 large objects. */
5209 void
5210 x86_elf_aligned_common (FILE *file,
5211 const char *name, unsigned HOST_WIDE_INT size,
5212 int align)
5214 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5215 && size > (unsigned int)ix86_section_threshold)
5216 fputs (".largecomm\t", file);
5217 else
5218 fputs (COMMON_ASM_OP, file);
5219 assemble_name (file, name);
5220 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5221 size, align / BITS_PER_UNIT);
5223 #endif
5225 /* Utility function for targets to use in implementing
5226 ASM_OUTPUT_ALIGNED_BSS. */
5228 void
5229 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5230 const char *name, unsigned HOST_WIDE_INT size,
5231 int align)
5233 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5234 && size > (unsigned int)ix86_section_threshold)
5235 switch_to_section (get_named_section (decl, ".lbss", 0));
5236 else
5237 switch_to_section (bss_section);
5238 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5239 #ifdef ASM_DECLARE_OBJECT_NAME
5240 last_assemble_variable_decl = decl;
5241 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5242 #else
5243 /* Standard thing is just output label for the object. */
5244 ASM_OUTPUT_LABEL (file, name);
5245 #endif /* ASM_DECLARE_OBJECT_NAME */
5246 ASM_OUTPUT_SKIP (file, size ? size : 1);
5249 /* Decide whether we must probe the stack before any space allocation
5250 on this target. It's essentially TARGET_STACK_PROBE except when
5251 -fstack-check causes the stack to be already probed differently. */
5253 bool
5254 ix86_target_stack_probe (void)
5256 /* Do not probe the stack twice if static stack checking is enabled. */
5257 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5258 return false;
5260 return TARGET_STACK_PROBE;
5263 /* Decide whether we can make a sibling call to a function. DECL is the
5264 declaration of the function being targeted by the call and EXP is the
5265 CALL_EXPR representing the call. */
5267 static bool
5268 ix86_function_ok_for_sibcall (tree decl, tree exp)
5270 tree type, decl_or_type;
5271 rtx a, b;
5273 /* If we are generating position-independent code, we cannot sibcall
5274 optimize any indirect call, or a direct call to a global function,
5275 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5276 if (!TARGET_MACHO
5277 && !TARGET_64BIT
5278 && flag_pic
5279 && (!decl || !targetm.binds_local_p (decl)))
5280 return false;
5282 /* If we need to align the outgoing stack, then sibcalling would
5283 unalign the stack, which may break the called function. */
5284 if (ix86_minimum_incoming_stack_boundary (true)
5285 < PREFERRED_STACK_BOUNDARY)
5286 return false;
5288 if (decl)
5290 decl_or_type = decl;
5291 type = TREE_TYPE (decl);
5293 else
5295 /* We're looking at the CALL_EXPR, we need the type of the function. */
5296 type = CALL_EXPR_FN (exp); /* pointer expression */
5297 type = TREE_TYPE (type); /* pointer type */
5298 type = TREE_TYPE (type); /* function type */
5299 decl_or_type = type;
5302 /* Check that the return value locations are the same. Like
5303 if we are returning floats on the 80387 register stack, we cannot
5304 make a sibcall from a function that doesn't return a float to a
5305 function that does or, conversely, from a function that does return
5306 a float to a function that doesn't; the necessary stack adjustment
5307 would not be executed. This is also the place we notice
5308 differences in the return value ABI. Note that it is ok for one
5309 of the functions to have void return type as long as the return
5310 value of the other is passed in a register. */
5311 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5312 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5313 cfun->decl, false);
5314 if (STACK_REG_P (a) || STACK_REG_P (b))
5316 if (!rtx_equal_p (a, b))
5317 return false;
5319 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5321 else if (!rtx_equal_p (a, b))
5322 return false;
5324 if (TARGET_64BIT)
5326 /* The SYSV ABI has more call-clobbered registers;
5327 disallow sibcalls from MS to SYSV. */
5328 if (cfun->machine->call_abi == MS_ABI
5329 && ix86_function_type_abi (type) == SYSV_ABI)
5330 return false;
5332 else
5334 /* If this call is indirect, we'll need to be able to use a
5335 call-clobbered register for the address of the target function.
5336 Make sure that all such registers are not used for passing
5337 parameters. Note that DLLIMPORT functions are indirect. */
5338 if (!decl
5339 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5341 if (ix86_function_regparm (type, NULL) >= 3)
5343 /* ??? Need to count the actual number of registers to be used,
5344 not the possible number of registers. Fix later. */
5345 return false;
5350 /* Otherwise okay. That also includes certain types of indirect calls. */
5351 return true;
5354 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5355 and "sseregparm" calling convention attributes;
5356 arguments as in struct attribute_spec.handler. */
5358 static tree
5359 ix86_handle_cconv_attribute (tree *node, tree name,
5360 tree args,
5361 int flags ATTRIBUTE_UNUSED,
5362 bool *no_add_attrs)
5364 if (TREE_CODE (*node) != FUNCTION_TYPE
5365 && TREE_CODE (*node) != METHOD_TYPE
5366 && TREE_CODE (*node) != FIELD_DECL
5367 && TREE_CODE (*node) != TYPE_DECL)
5369 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5370 name);
5371 *no_add_attrs = true;
5372 return NULL_TREE;
5375 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5376 if (is_attribute_p ("regparm", name))
5378 tree cst;
5380 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5382 error ("fastcall and regparm attributes are not compatible");
5385 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5387 error ("regparam and thiscall attributes are not compatible");
5390 cst = TREE_VALUE (args);
5391 if (TREE_CODE (cst) != INTEGER_CST)
5393 warning (OPT_Wattributes,
5394 "%qE attribute requires an integer constant argument",
5395 name);
5396 *no_add_attrs = true;
5398 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5400 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5401 name, REGPARM_MAX);
5402 *no_add_attrs = true;
5405 return NULL_TREE;
5408 if (TARGET_64BIT)
5410 /* Do not warn when emulating the MS ABI. */
5411 if ((TREE_CODE (*node) != FUNCTION_TYPE
5412 && TREE_CODE (*node) != METHOD_TYPE)
5413 || ix86_function_type_abi (*node) != MS_ABI)
5414 warning (OPT_Wattributes, "%qE attribute ignored",
5415 name);
5416 *no_add_attrs = true;
5417 return NULL_TREE;
5420 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5421 if (is_attribute_p ("fastcall", name))
5423 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5425 error ("fastcall and cdecl attributes are not compatible");
5427 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5429 error ("fastcall and stdcall attributes are not compatible");
5431 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5433 error ("fastcall and regparm attributes are not compatible");
5435 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5437 error ("fastcall and thiscall attributes are not compatible");
5441 /* Can combine stdcall with fastcall (redundant), regparm and
5442 sseregparm. */
5443 else if (is_attribute_p ("stdcall", name))
5445 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5447 error ("stdcall and cdecl attributes are not compatible");
5449 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5451 error ("stdcall and fastcall attributes are not compatible");
5453 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5455 error ("stdcall and thiscall attributes are not compatible");
5459 /* Can combine cdecl with regparm and sseregparm. */
5460 else if (is_attribute_p ("cdecl", name))
5462 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5464 error ("stdcall and cdecl attributes are not compatible");
5466 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5468 error ("fastcall and cdecl attributes are not compatible");
5470 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5472 error ("cdecl and thiscall attributes are not compatible");
5475 else if (is_attribute_p ("thiscall", name))
5477 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5478 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5479 name);
5480 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5482 error ("stdcall and thiscall attributes are not compatible");
5484 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5486 error ("fastcall and thiscall attributes are not compatible");
5488 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5490 error ("cdecl and thiscall attributes are not compatible");
5494 /* Can combine sseregparm with all attributes. */
5496 return NULL_TREE;
5499 /* The transactional memory builtins are implicitly regparm or fastcall
5500 depending on the ABI. Override the generic do-nothing attribute that
5501 these builtins were declared with, and replace it with one of the two
5502 attributes that we expect elsewhere. */
5504 static tree
5505 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5506 tree args ATTRIBUTE_UNUSED,
5507 int flags, bool *no_add_attrs)
5509 tree alt;
5511 /* In no case do we want to add the placeholder attribute. */
5512 *no_add_attrs = true;
5514 /* The 64-bit ABI is unchanged for transactional memory. */
5515 if (TARGET_64BIT)
5516 return NULL_TREE;
5518 /* ??? Is there a better way to validate 32-bit windows? We have
5519 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5520 if (CHECK_STACK_LIMIT > 0)
5521 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5522 else
5524 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5525 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5527 decl_attributes (node, alt, flags);
5529 return NULL_TREE;
5532 /* This function determines from TYPE the calling-convention. */
5534 unsigned int
5535 ix86_get_callcvt (const_tree type)
5537 unsigned int ret = 0;
5538 bool is_stdarg;
5539 tree attrs;
5541 if (TARGET_64BIT)
5542 return IX86_CALLCVT_CDECL;
5544 attrs = TYPE_ATTRIBUTES (type);
5545 if (attrs != NULL_TREE)
5547 if (lookup_attribute ("cdecl", attrs))
5548 ret |= IX86_CALLCVT_CDECL;
5549 else if (lookup_attribute ("stdcall", attrs))
5550 ret |= IX86_CALLCVT_STDCALL;
5551 else if (lookup_attribute ("fastcall", attrs))
5552 ret |= IX86_CALLCVT_FASTCALL;
5553 else if (lookup_attribute ("thiscall", attrs))
5554 ret |= IX86_CALLCVT_THISCALL;
5556 /* Regparam isn't allowed for thiscall and fastcall. */
5557 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5559 if (lookup_attribute ("regparm", attrs))
5560 ret |= IX86_CALLCVT_REGPARM;
5561 if (lookup_attribute ("sseregparm", attrs))
5562 ret |= IX86_CALLCVT_SSEREGPARM;
5565 if (IX86_BASE_CALLCVT(ret) != 0)
5566 return ret;
5569 is_stdarg = stdarg_p (type);
5570 if (TARGET_RTD && !is_stdarg)
5571 return IX86_CALLCVT_STDCALL | ret;
5573 if (ret != 0
5574 || is_stdarg
5575 || TREE_CODE (type) != METHOD_TYPE
5576 || ix86_function_type_abi (type) != MS_ABI)
5577 return IX86_CALLCVT_CDECL | ret;
5579 return IX86_CALLCVT_THISCALL;
5582 /* Return 0 if the attributes for two types are incompatible, 1 if they
5583 are compatible, and 2 if they are nearly compatible (which causes a
5584 warning to be generated). */
5586 static int
5587 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5589 unsigned int ccvt1, ccvt2;
5591 if (TREE_CODE (type1) != FUNCTION_TYPE
5592 && TREE_CODE (type1) != METHOD_TYPE)
5593 return 1;
5595 ccvt1 = ix86_get_callcvt (type1);
5596 ccvt2 = ix86_get_callcvt (type2);
5597 if (ccvt1 != ccvt2)
5598 return 0;
5599 if (ix86_function_regparm (type1, NULL)
5600 != ix86_function_regparm (type2, NULL))
5601 return 0;
5603 return 1;
5606 /* Return the regparm value for a function with the indicated TYPE and DECL.
5607 DECL may be NULL when calling function indirectly
5608 or considering a libcall. */
5610 static int
5611 ix86_function_regparm (const_tree type, const_tree decl)
5613 tree attr;
5614 int regparm;
5615 unsigned int ccvt;
5617 if (TARGET_64BIT)
5618 return (ix86_function_type_abi (type) == SYSV_ABI
5619 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5620 ccvt = ix86_get_callcvt (type);
5621 regparm = ix86_regparm;
5623 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5625 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5626 if (attr)
5628 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5629 return regparm;
5632 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5633 return 2;
5634 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5635 return 1;
5637 /* Use register calling convention for local functions when possible. */
5638 if (decl
5639 && TREE_CODE (decl) == FUNCTION_DECL
5640 /* Caller and callee must agree on the calling convention, so
5641 checking here just optimize means that with
5642 __attribute__((optimize (...))) caller could use regparm convention
5643 and callee not, or vice versa. Instead look at whether the callee
5644 is optimized or not. */
5645 && opt_for_fn (decl, optimize)
5646 && !(profile_flag && !flag_fentry))
5648 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5649 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5650 if (i && i->local && i->can_change_signature)
5652 int local_regparm, globals = 0, regno;
5654 /* Make sure no regparm register is taken by a
5655 fixed register variable. */
5656 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5657 if (fixed_regs[local_regparm])
5658 break;
5660 /* We don't want to use regparm(3) for nested functions as
5661 these use a static chain pointer in the third argument. */
5662 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5663 local_regparm = 2;
5665 /* In 32-bit mode save a register for the split stack. */
5666 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5667 local_regparm = 2;
5669 /* Each fixed register usage increases register pressure,
5670 so less registers should be used for argument passing.
5671 This functionality can be overriden by an explicit
5672 regparm value. */
5673 for (regno = AX_REG; regno <= DI_REG; regno++)
5674 if (fixed_regs[regno])
5675 globals++;
5677 local_regparm
5678 = globals < local_regparm ? local_regparm - globals : 0;
5680 if (local_regparm > regparm)
5681 regparm = local_regparm;
5685 return regparm;
5688 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5689 DFmode (2) arguments in SSE registers for a function with the
5690 indicated TYPE and DECL. DECL may be NULL when calling function
5691 indirectly or considering a libcall. Otherwise return 0. */
5693 static int
5694 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5696 gcc_assert (!TARGET_64BIT);
5698 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5699 by the sseregparm attribute. */
5700 if (TARGET_SSEREGPARM
5701 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5703 if (!TARGET_SSE)
5705 if (warn)
5707 if (decl)
5708 error ("calling %qD with attribute sseregparm without "
5709 "SSE/SSE2 enabled", decl);
5710 else
5711 error ("calling %qT with attribute sseregparm without "
5712 "SSE/SSE2 enabled", type);
5714 return 0;
5717 return 2;
5720 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5721 (and DFmode for SSE2) arguments in SSE registers. */
5722 if (decl && TARGET_SSE_MATH && optimize
5723 && !(profile_flag && !flag_fentry))
5725 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5726 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5727 if (i && i->local && i->can_change_signature)
5728 return TARGET_SSE2 ? 2 : 1;
5731 return 0;
5734 /* Return true if EAX is live at the start of the function. Used by
5735 ix86_expand_prologue to determine if we need special help before
5736 calling allocate_stack_worker. */
5738 static bool
5739 ix86_eax_live_at_start_p (void)
5741 /* Cheat. Don't bother working forward from ix86_function_regparm
5742 to the function type to whether an actual argument is located in
5743 eax. Instead just look at cfg info, which is still close enough
5744 to correct at this point. This gives false positives for broken
5745 functions that might use uninitialized data that happens to be
5746 allocated in eax, but who cares? */
5747 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5750 static bool
5751 ix86_keep_aggregate_return_pointer (tree fntype)
5753 tree attr;
5755 if (!TARGET_64BIT)
5757 attr = lookup_attribute ("callee_pop_aggregate_return",
5758 TYPE_ATTRIBUTES (fntype));
5759 if (attr)
5760 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5762 /* For 32-bit MS-ABI the default is to keep aggregate
5763 return pointer. */
5764 if (ix86_function_type_abi (fntype) == MS_ABI)
5765 return true;
5767 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5770 /* Value is the number of bytes of arguments automatically
5771 popped when returning from a subroutine call.
5772 FUNDECL is the declaration node of the function (as a tree),
5773 FUNTYPE is the data type of the function (as a tree),
5774 or for a library call it is an identifier node for the subroutine name.
5775 SIZE is the number of bytes of arguments passed on the stack.
5777 On the 80386, the RTD insn may be used to pop them if the number
5778 of args is fixed, but if the number is variable then the caller
5779 must pop them all. RTD can't be used for library calls now
5780 because the library is compiled with the Unix compiler.
5781 Use of RTD is a selectable option, since it is incompatible with
5782 standard Unix calling sequences. If the option is not selected,
5783 the caller must always pop the args.
5785 The attribute stdcall is equivalent to RTD on a per module basis. */
5787 static int
5788 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5790 unsigned int ccvt;
5792 /* None of the 64-bit ABIs pop arguments. */
5793 if (TARGET_64BIT)
5794 return 0;
5796 ccvt = ix86_get_callcvt (funtype);
5798 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5799 | IX86_CALLCVT_THISCALL)) != 0
5800 && ! stdarg_p (funtype))
5801 return size;
5803 /* Lose any fake structure return argument if it is passed on the stack. */
5804 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5805 && !ix86_keep_aggregate_return_pointer (funtype))
5807 int nregs = ix86_function_regparm (funtype, fundecl);
5808 if (nregs == 0)
5809 return GET_MODE_SIZE (Pmode);
5812 return 0;
5815 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5817 static bool
5818 ix86_legitimate_combined_insn (rtx insn)
5820 /* Check operand constraints in case hard registers were propagated
5821 into insn pattern. This check prevents combine pass from
5822 generating insn patterns with invalid hard register operands.
5823 These invalid insns can eventually confuse reload to error out
5824 with a spill failure. See also PRs 46829 and 46843. */
5825 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5827 int i;
5829 extract_insn (insn);
5830 preprocess_constraints (insn);
5832 int n_operands = recog_data.n_operands;
5833 int n_alternatives = recog_data.n_alternatives;
5834 for (i = 0; i < n_operands; i++)
5836 rtx op = recog_data.operand[i];
5837 enum machine_mode mode = GET_MODE (op);
5838 const operand_alternative *op_alt;
5839 int offset = 0;
5840 bool win;
5841 int j;
5843 /* For pre-AVX disallow unaligned loads/stores where the
5844 instructions don't support it. */
5845 if (!TARGET_AVX
5846 && VECTOR_MODE_P (GET_MODE (op))
5847 && misaligned_operand (op, GET_MODE (op)))
5849 int min_align = get_attr_ssememalign (insn);
5850 if (min_align == 0)
5851 return false;
5854 /* A unary operator may be accepted by the predicate, but it
5855 is irrelevant for matching constraints. */
5856 if (UNARY_P (op))
5857 op = XEXP (op, 0);
5859 if (GET_CODE (op) == SUBREG)
5861 if (REG_P (SUBREG_REG (op))
5862 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5863 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5864 GET_MODE (SUBREG_REG (op)),
5865 SUBREG_BYTE (op),
5866 GET_MODE (op));
5867 op = SUBREG_REG (op);
5870 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5871 continue;
5873 op_alt = recog_op_alt;
5875 /* Operand has no constraints, anything is OK. */
5876 win = !n_alternatives;
5878 alternative_mask enabled = recog_data.enabled_alternatives;
5879 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5881 if (!TEST_BIT (enabled, j))
5882 continue;
5883 if (op_alt[i].anything_ok
5884 || (op_alt[i].matches != -1
5885 && operands_match_p
5886 (recog_data.operand[i],
5887 recog_data.operand[op_alt[i].matches]))
5888 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5890 win = true;
5891 break;
5895 if (!win)
5896 return false;
5900 return true;
5903 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5905 static unsigned HOST_WIDE_INT
5906 ix86_asan_shadow_offset (void)
5908 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5909 : HOST_WIDE_INT_C (0x7fff8000))
5910 : (HOST_WIDE_INT_1 << 29);
5913 /* Argument support functions. */
5915 /* Return true when register may be used to pass function parameters. */
5916 bool
5917 ix86_function_arg_regno_p (int regno)
5919 int i;
5920 const int *parm_regs;
5922 if (!TARGET_64BIT)
5924 if (TARGET_MACHO)
5925 return (regno < REGPARM_MAX
5926 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5927 else
5928 return (regno < REGPARM_MAX
5929 || (TARGET_MMX && MMX_REGNO_P (regno)
5930 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5931 || (TARGET_SSE && SSE_REGNO_P (regno)
5932 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5935 if (TARGET_SSE && SSE_REGNO_P (regno)
5936 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5937 return true;
5939 /* TODO: The function should depend on current function ABI but
5940 builtins.c would need updating then. Therefore we use the
5941 default ABI. */
5943 /* RAX is used as hidden argument to va_arg functions. */
5944 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5945 return true;
5947 if (ix86_abi == MS_ABI)
5948 parm_regs = x86_64_ms_abi_int_parameter_registers;
5949 else
5950 parm_regs = x86_64_int_parameter_registers;
5951 for (i = 0; i < (ix86_abi == MS_ABI
5952 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5953 if (regno == parm_regs[i])
5954 return true;
5955 return false;
5958 /* Return if we do not know how to pass TYPE solely in registers. */
5960 static bool
5961 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5963 if (must_pass_in_stack_var_size_or_pad (mode, type))
5964 return true;
5966 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5967 The layout_type routine is crafty and tries to trick us into passing
5968 currently unsupported vector types on the stack by using TImode. */
5969 return (!TARGET_64BIT && mode == TImode
5970 && type && TREE_CODE (type) != VECTOR_TYPE);
5973 /* It returns the size, in bytes, of the area reserved for arguments passed
5974 in registers for the function represented by fndecl dependent to the used
5975 abi format. */
5977 ix86_reg_parm_stack_space (const_tree fndecl)
5979 enum calling_abi call_abi = SYSV_ABI;
5980 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5981 call_abi = ix86_function_abi (fndecl);
5982 else
5983 call_abi = ix86_function_type_abi (fndecl);
5984 if (TARGET_64BIT && call_abi == MS_ABI)
5985 return 32;
5986 return 0;
5989 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5990 call abi used. */
5991 enum calling_abi
5992 ix86_function_type_abi (const_tree fntype)
5994 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5996 enum calling_abi abi = ix86_abi;
5997 if (abi == SYSV_ABI)
5999 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6000 abi = MS_ABI;
6002 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6003 abi = SYSV_ABI;
6004 return abi;
6006 return ix86_abi;
6009 /* We add this as a workaround in order to use libc_has_function
6010 hook in i386.md. */
6011 bool
6012 ix86_libc_has_function (enum function_class fn_class)
6014 return targetm.libc_has_function (fn_class);
6017 static bool
6018 ix86_function_ms_hook_prologue (const_tree fn)
6020 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6022 if (decl_function_context (fn) != NULL_TREE)
6023 error_at (DECL_SOURCE_LOCATION (fn),
6024 "ms_hook_prologue is not compatible with nested function");
6025 else
6026 return true;
6028 return false;
6031 static enum calling_abi
6032 ix86_function_abi (const_tree fndecl)
6034 if (! fndecl)
6035 return ix86_abi;
6036 return ix86_function_type_abi (TREE_TYPE (fndecl));
6039 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6040 call abi used. */
6041 enum calling_abi
6042 ix86_cfun_abi (void)
6044 if (! cfun)
6045 return ix86_abi;
6046 return cfun->machine->call_abi;
6049 /* Write the extra assembler code needed to declare a function properly. */
6051 void
6052 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6053 tree decl)
6055 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6057 if (is_ms_hook)
6059 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6060 unsigned int filler_cc = 0xcccccccc;
6062 for (i = 0; i < filler_count; i += 4)
6063 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6066 #ifdef SUBTARGET_ASM_UNWIND_INIT
6067 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6068 #endif
6070 ASM_OUTPUT_LABEL (asm_out_file, fname);
6072 /* Output magic byte marker, if hot-patch attribute is set. */
6073 if (is_ms_hook)
6075 if (TARGET_64BIT)
6077 /* leaq [%rsp + 0], %rsp */
6078 asm_fprintf (asm_out_file, ASM_BYTE
6079 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6081 else
6083 /* movl.s %edi, %edi
6084 push %ebp
6085 movl.s %esp, %ebp */
6086 asm_fprintf (asm_out_file, ASM_BYTE
6087 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6092 /* regclass.c */
6093 extern void init_regs (void);
6095 /* Implementation of call abi switching target hook. Specific to FNDECL
6096 the specific call register sets are set. See also
6097 ix86_conditional_register_usage for more details. */
6098 void
6099 ix86_call_abi_override (const_tree fndecl)
6101 if (fndecl == NULL_TREE)
6102 cfun->machine->call_abi = ix86_abi;
6103 else
6104 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6107 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6108 expensive re-initialization of init_regs each time we switch function context
6109 since this is needed only during RTL expansion. */
6110 static void
6111 ix86_maybe_switch_abi (void)
6113 if (TARGET_64BIT &&
6114 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6115 reinit_regs ();
6118 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6119 for a call to a function whose data type is FNTYPE.
6120 For a library call, FNTYPE is 0. */
6122 void
6123 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6124 tree fntype, /* tree ptr for function decl */
6125 rtx libname, /* SYMBOL_REF of library name or 0 */
6126 tree fndecl,
6127 int caller)
6129 struct cgraph_local_info *i;
6131 memset (cum, 0, sizeof (*cum));
6133 if (fndecl)
6135 i = cgraph_local_info (fndecl);
6136 cum->call_abi = ix86_function_abi (fndecl);
6138 else
6140 i = NULL;
6141 cum->call_abi = ix86_function_type_abi (fntype);
6144 cum->caller = caller;
6146 /* Set up the number of registers to use for passing arguments. */
6147 cum->nregs = ix86_regparm;
6148 if (TARGET_64BIT)
6150 cum->nregs = (cum->call_abi == SYSV_ABI
6151 ? X86_64_REGPARM_MAX
6152 : X86_64_MS_REGPARM_MAX);
6154 if (TARGET_SSE)
6156 cum->sse_nregs = SSE_REGPARM_MAX;
6157 if (TARGET_64BIT)
6159 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6160 ? X86_64_SSE_REGPARM_MAX
6161 : X86_64_MS_SSE_REGPARM_MAX);
6164 if (TARGET_MMX)
6165 cum->mmx_nregs = MMX_REGPARM_MAX;
6166 cum->warn_avx512f = true;
6167 cum->warn_avx = true;
6168 cum->warn_sse = true;
6169 cum->warn_mmx = true;
6171 /* Because type might mismatch in between caller and callee, we need to
6172 use actual type of function for local calls.
6173 FIXME: cgraph_analyze can be told to actually record if function uses
6174 va_start so for local functions maybe_vaarg can be made aggressive
6175 helping K&R code.
6176 FIXME: once typesytem is fixed, we won't need this code anymore. */
6177 if (i && i->local && i->can_change_signature)
6178 fntype = TREE_TYPE (fndecl);
6179 cum->maybe_vaarg = (fntype
6180 ? (!prototype_p (fntype) || stdarg_p (fntype))
6181 : !libname);
6183 if (!TARGET_64BIT)
6185 /* If there are variable arguments, then we won't pass anything
6186 in registers in 32-bit mode. */
6187 if (stdarg_p (fntype))
6189 cum->nregs = 0;
6190 cum->sse_nregs = 0;
6191 cum->mmx_nregs = 0;
6192 cum->warn_avx512f = false;
6193 cum->warn_avx = false;
6194 cum->warn_sse = false;
6195 cum->warn_mmx = false;
6196 return;
6199 /* Use ecx and edx registers if function has fastcall attribute,
6200 else look for regparm information. */
6201 if (fntype)
6203 unsigned int ccvt = ix86_get_callcvt (fntype);
6204 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6206 cum->nregs = 1;
6207 cum->fastcall = 1; /* Same first register as in fastcall. */
6209 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6211 cum->nregs = 2;
6212 cum->fastcall = 1;
6214 else
6215 cum->nregs = ix86_function_regparm (fntype, fndecl);
6218 /* Set up the number of SSE registers used for passing SFmode
6219 and DFmode arguments. Warn for mismatching ABI. */
6220 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6224 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6225 But in the case of vector types, it is some vector mode.
6227 When we have only some of our vector isa extensions enabled, then there
6228 are some modes for which vector_mode_supported_p is false. For these
6229 modes, the generic vector support in gcc will choose some non-vector mode
6230 in order to implement the type. By computing the natural mode, we'll
6231 select the proper ABI location for the operand and not depend on whatever
6232 the middle-end decides to do with these vector types.
6234 The midde-end can't deal with the vector types > 16 bytes. In this
6235 case, we return the original mode and warn ABI change if CUM isn't
6236 NULL.
6238 If INT_RETURN is true, warn ABI change if the vector mode isn't
6239 available for function return value. */
6241 static enum machine_mode
6242 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6243 bool in_return)
6245 enum machine_mode mode = TYPE_MODE (type);
6247 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6249 HOST_WIDE_INT size = int_size_in_bytes (type);
6250 if ((size == 8 || size == 16 || size == 32 || size == 64)
6251 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6252 && TYPE_VECTOR_SUBPARTS (type) > 1)
6254 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6256 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6257 mode = MIN_MODE_VECTOR_FLOAT;
6258 else
6259 mode = MIN_MODE_VECTOR_INT;
6261 /* Get the mode which has this inner mode and number of units. */
6262 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6263 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6264 && GET_MODE_INNER (mode) == innermode)
6266 if (size == 64 && !TARGET_AVX512F)
6268 static bool warnedavx512f;
6269 static bool warnedavx512f_ret;
6271 if (cum && cum->warn_avx512f && !warnedavx512f)
6273 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6274 "without AVX512F enabled changes the ABI"))
6275 warnedavx512f = true;
6277 else if (in_return && !warnedavx512f_ret)
6279 if (warning (OPT_Wpsabi, "AVX512F vector return "
6280 "without AVX512F enabled changes the ABI"))
6281 warnedavx512f_ret = true;
6284 return TYPE_MODE (type);
6286 else if (size == 32 && !TARGET_AVX)
6288 static bool warnedavx;
6289 static bool warnedavx_ret;
6291 if (cum && cum->warn_avx && !warnedavx)
6293 if (warning (OPT_Wpsabi, "AVX vector argument "
6294 "without AVX enabled changes the ABI"))
6295 warnedavx = true;
6297 else if (in_return && !warnedavx_ret)
6299 if (warning (OPT_Wpsabi, "AVX vector return "
6300 "without AVX enabled changes the ABI"))
6301 warnedavx_ret = true;
6304 return TYPE_MODE (type);
6306 else if (((size == 8 && TARGET_64BIT) || size == 16)
6307 && !TARGET_SSE)
6309 static bool warnedsse;
6310 static bool warnedsse_ret;
6312 if (cum && cum->warn_sse && !warnedsse)
6314 if (warning (OPT_Wpsabi, "SSE vector argument "
6315 "without SSE enabled changes the ABI"))
6316 warnedsse = true;
6318 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6320 if (warning (OPT_Wpsabi, "SSE vector return "
6321 "without SSE enabled changes the ABI"))
6322 warnedsse_ret = true;
6325 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6327 static bool warnedmmx;
6328 static bool warnedmmx_ret;
6330 if (cum && cum->warn_mmx && !warnedmmx)
6332 if (warning (OPT_Wpsabi, "MMX vector argument "
6333 "without MMX enabled changes the ABI"))
6334 warnedmmx = true;
6336 else if (in_return && !warnedmmx_ret)
6338 if (warning (OPT_Wpsabi, "MMX vector return "
6339 "without MMX enabled changes the ABI"))
6340 warnedmmx_ret = true;
6343 return mode;
6346 gcc_unreachable ();
6350 return mode;
6353 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6354 this may not agree with the mode that the type system has chosen for the
6355 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6356 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6358 static rtx
6359 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6360 unsigned int regno)
6362 rtx tmp;
6364 if (orig_mode != BLKmode)
6365 tmp = gen_rtx_REG (orig_mode, regno);
6366 else
6368 tmp = gen_rtx_REG (mode, regno);
6369 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6370 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6373 return tmp;
6376 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6377 of this code is to classify each 8bytes of incoming argument by the register
6378 class and assign registers accordingly. */
6380 /* Return the union class of CLASS1 and CLASS2.
6381 See the x86-64 PS ABI for details. */
6383 static enum x86_64_reg_class
6384 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6386 /* Rule #1: If both classes are equal, this is the resulting class. */
6387 if (class1 == class2)
6388 return class1;
6390 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6391 the other class. */
6392 if (class1 == X86_64_NO_CLASS)
6393 return class2;
6394 if (class2 == X86_64_NO_CLASS)
6395 return class1;
6397 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6398 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6399 return X86_64_MEMORY_CLASS;
6401 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6402 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6403 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6404 return X86_64_INTEGERSI_CLASS;
6405 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6406 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6407 return X86_64_INTEGER_CLASS;
6409 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6410 MEMORY is used. */
6411 if (class1 == X86_64_X87_CLASS
6412 || class1 == X86_64_X87UP_CLASS
6413 || class1 == X86_64_COMPLEX_X87_CLASS
6414 || class2 == X86_64_X87_CLASS
6415 || class2 == X86_64_X87UP_CLASS
6416 || class2 == X86_64_COMPLEX_X87_CLASS)
6417 return X86_64_MEMORY_CLASS;
6419 /* Rule #6: Otherwise class SSE is used. */
6420 return X86_64_SSE_CLASS;
6423 /* Classify the argument of type TYPE and mode MODE.
6424 CLASSES will be filled by the register class used to pass each word
6425 of the operand. The number of words is returned. In case the parameter
6426 should be passed in memory, 0 is returned. As a special case for zero
6427 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6429 BIT_OFFSET is used internally for handling records and specifies offset
6430 of the offset in bits modulo 512 to avoid overflow cases.
6432 See the x86-64 PS ABI for details.
6435 static int
6436 classify_argument (enum machine_mode mode, const_tree type,
6437 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6439 HOST_WIDE_INT bytes =
6440 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6441 int words
6442 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6444 /* Variable sized entities are always passed/returned in memory. */
6445 if (bytes < 0)
6446 return 0;
6448 if (mode != VOIDmode
6449 && targetm.calls.must_pass_in_stack (mode, type))
6450 return 0;
6452 if (type && AGGREGATE_TYPE_P (type))
6454 int i;
6455 tree field;
6456 enum x86_64_reg_class subclasses[MAX_CLASSES];
6458 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6459 if (bytes > 64)
6460 return 0;
6462 for (i = 0; i < words; i++)
6463 classes[i] = X86_64_NO_CLASS;
6465 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6466 signalize memory class, so handle it as special case. */
6467 if (!words)
6469 classes[0] = X86_64_NO_CLASS;
6470 return 1;
6473 /* Classify each field of record and merge classes. */
6474 switch (TREE_CODE (type))
6476 case RECORD_TYPE:
6477 /* And now merge the fields of structure. */
6478 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6480 if (TREE_CODE (field) == FIELD_DECL)
6482 int num;
6484 if (TREE_TYPE (field) == error_mark_node)
6485 continue;
6487 /* Bitfields are always classified as integer. Handle them
6488 early, since later code would consider them to be
6489 misaligned integers. */
6490 if (DECL_BIT_FIELD (field))
6492 for (i = (int_bit_position (field)
6493 + (bit_offset % 64)) / 8 / 8;
6494 i < ((int_bit_position (field) + (bit_offset % 64))
6495 + tree_to_shwi (DECL_SIZE (field))
6496 + 63) / 8 / 8; i++)
6497 classes[i] =
6498 merge_classes (X86_64_INTEGER_CLASS,
6499 classes[i]);
6501 else
6503 int pos;
6505 type = TREE_TYPE (field);
6507 /* Flexible array member is ignored. */
6508 if (TYPE_MODE (type) == BLKmode
6509 && TREE_CODE (type) == ARRAY_TYPE
6510 && TYPE_SIZE (type) == NULL_TREE
6511 && TYPE_DOMAIN (type) != NULL_TREE
6512 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6513 == NULL_TREE))
6515 static bool warned;
6517 if (!warned && warn_psabi)
6519 warned = true;
6520 inform (input_location,
6521 "the ABI of passing struct with"
6522 " a flexible array member has"
6523 " changed in GCC 4.4");
6525 continue;
6527 num = classify_argument (TYPE_MODE (type), type,
6528 subclasses,
6529 (int_bit_position (field)
6530 + bit_offset) % 512);
6531 if (!num)
6532 return 0;
6533 pos = (int_bit_position (field)
6534 + (bit_offset % 64)) / 8 / 8;
6535 for (i = 0; i < num && (i + pos) < words; i++)
6536 classes[i + pos] =
6537 merge_classes (subclasses[i], classes[i + pos]);
6541 break;
6543 case ARRAY_TYPE:
6544 /* Arrays are handled as small records. */
6546 int num;
6547 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6548 TREE_TYPE (type), subclasses, bit_offset);
6549 if (!num)
6550 return 0;
6552 /* The partial classes are now full classes. */
6553 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6554 subclasses[0] = X86_64_SSE_CLASS;
6555 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6556 && !((bit_offset % 64) == 0 && bytes == 4))
6557 subclasses[0] = X86_64_INTEGER_CLASS;
6559 for (i = 0; i < words; i++)
6560 classes[i] = subclasses[i % num];
6562 break;
6564 case UNION_TYPE:
6565 case QUAL_UNION_TYPE:
6566 /* Unions are similar to RECORD_TYPE but offset is always 0.
6568 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6570 if (TREE_CODE (field) == FIELD_DECL)
6572 int num;
6574 if (TREE_TYPE (field) == error_mark_node)
6575 continue;
6577 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6578 TREE_TYPE (field), subclasses,
6579 bit_offset);
6580 if (!num)
6581 return 0;
6582 for (i = 0; i < num; i++)
6583 classes[i] = merge_classes (subclasses[i], classes[i]);
6586 break;
6588 default:
6589 gcc_unreachable ();
6592 if (words > 2)
6594 /* When size > 16 bytes, if the first one isn't
6595 X86_64_SSE_CLASS or any other ones aren't
6596 X86_64_SSEUP_CLASS, everything should be passed in
6597 memory. */
6598 if (classes[0] != X86_64_SSE_CLASS)
6599 return 0;
6601 for (i = 1; i < words; i++)
6602 if (classes[i] != X86_64_SSEUP_CLASS)
6603 return 0;
6606 /* Final merger cleanup. */
6607 for (i = 0; i < words; i++)
6609 /* If one class is MEMORY, everything should be passed in
6610 memory. */
6611 if (classes[i] == X86_64_MEMORY_CLASS)
6612 return 0;
6614 /* The X86_64_SSEUP_CLASS should be always preceded by
6615 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6616 if (classes[i] == X86_64_SSEUP_CLASS
6617 && classes[i - 1] != X86_64_SSE_CLASS
6618 && classes[i - 1] != X86_64_SSEUP_CLASS)
6620 /* The first one should never be X86_64_SSEUP_CLASS. */
6621 gcc_assert (i != 0);
6622 classes[i] = X86_64_SSE_CLASS;
6625 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6626 everything should be passed in memory. */
6627 if (classes[i] == X86_64_X87UP_CLASS
6628 && (classes[i - 1] != X86_64_X87_CLASS))
6630 static bool warned;
6632 /* The first one should never be X86_64_X87UP_CLASS. */
6633 gcc_assert (i != 0);
6634 if (!warned && warn_psabi)
6636 warned = true;
6637 inform (input_location,
6638 "the ABI of passing union with long double"
6639 " has changed in GCC 4.4");
6641 return 0;
6644 return words;
6647 /* Compute alignment needed. We align all types to natural boundaries with
6648 exception of XFmode that is aligned to 64bits. */
6649 if (mode != VOIDmode && mode != BLKmode)
6651 int mode_alignment = GET_MODE_BITSIZE (mode);
6653 if (mode == XFmode)
6654 mode_alignment = 128;
6655 else if (mode == XCmode)
6656 mode_alignment = 256;
6657 if (COMPLEX_MODE_P (mode))
6658 mode_alignment /= 2;
6659 /* Misaligned fields are always returned in memory. */
6660 if (bit_offset % mode_alignment)
6661 return 0;
6664 /* for V1xx modes, just use the base mode */
6665 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6666 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6667 mode = GET_MODE_INNER (mode);
6669 /* Classification of atomic types. */
6670 switch (mode)
6672 case SDmode:
6673 case DDmode:
6674 classes[0] = X86_64_SSE_CLASS;
6675 return 1;
6676 case TDmode:
6677 classes[0] = X86_64_SSE_CLASS;
6678 classes[1] = X86_64_SSEUP_CLASS;
6679 return 2;
6680 case DImode:
6681 case SImode:
6682 case HImode:
6683 case QImode:
6684 case CSImode:
6685 case CHImode:
6686 case CQImode:
6688 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6690 /* Analyze last 128 bits only. */
6691 size = (size - 1) & 0x7f;
6693 if (size < 32)
6695 classes[0] = X86_64_INTEGERSI_CLASS;
6696 return 1;
6698 else if (size < 64)
6700 classes[0] = X86_64_INTEGER_CLASS;
6701 return 1;
6703 else if (size < 64+32)
6705 classes[0] = X86_64_INTEGER_CLASS;
6706 classes[1] = X86_64_INTEGERSI_CLASS;
6707 return 2;
6709 else if (size < 64+64)
6711 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6712 return 2;
6714 else
6715 gcc_unreachable ();
6717 case CDImode:
6718 case TImode:
6719 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6720 return 2;
6721 case COImode:
6722 case OImode:
6723 /* OImode shouldn't be used directly. */
6724 gcc_unreachable ();
6725 case CTImode:
6726 return 0;
6727 case SFmode:
6728 if (!(bit_offset % 64))
6729 classes[0] = X86_64_SSESF_CLASS;
6730 else
6731 classes[0] = X86_64_SSE_CLASS;
6732 return 1;
6733 case DFmode:
6734 classes[0] = X86_64_SSEDF_CLASS;
6735 return 1;
6736 case XFmode:
6737 classes[0] = X86_64_X87_CLASS;
6738 classes[1] = X86_64_X87UP_CLASS;
6739 return 2;
6740 case TFmode:
6741 classes[0] = X86_64_SSE_CLASS;
6742 classes[1] = X86_64_SSEUP_CLASS;
6743 return 2;
6744 case SCmode:
6745 classes[0] = X86_64_SSE_CLASS;
6746 if (!(bit_offset % 64))
6747 return 1;
6748 else
6750 static bool warned;
6752 if (!warned && warn_psabi)
6754 warned = true;
6755 inform (input_location,
6756 "the ABI of passing structure with complex float"
6757 " member has changed in GCC 4.4");
6759 classes[1] = X86_64_SSESF_CLASS;
6760 return 2;
6762 case DCmode:
6763 classes[0] = X86_64_SSEDF_CLASS;
6764 classes[1] = X86_64_SSEDF_CLASS;
6765 return 2;
6766 case XCmode:
6767 classes[0] = X86_64_COMPLEX_X87_CLASS;
6768 return 1;
6769 case TCmode:
6770 /* This modes is larger than 16 bytes. */
6771 return 0;
6772 case V8SFmode:
6773 case V8SImode:
6774 case V32QImode:
6775 case V16HImode:
6776 case V4DFmode:
6777 case V4DImode:
6778 classes[0] = X86_64_SSE_CLASS;
6779 classes[1] = X86_64_SSEUP_CLASS;
6780 classes[2] = X86_64_SSEUP_CLASS;
6781 classes[3] = X86_64_SSEUP_CLASS;
6782 return 4;
6783 case V8DFmode:
6784 case V16SFmode:
6785 case V8DImode:
6786 case V16SImode:
6787 case V32HImode:
6788 case V64QImode:
6789 classes[0] = X86_64_SSE_CLASS;
6790 classes[1] = X86_64_SSEUP_CLASS;
6791 classes[2] = X86_64_SSEUP_CLASS;
6792 classes[3] = X86_64_SSEUP_CLASS;
6793 classes[4] = X86_64_SSEUP_CLASS;
6794 classes[5] = X86_64_SSEUP_CLASS;
6795 classes[6] = X86_64_SSEUP_CLASS;
6796 classes[7] = X86_64_SSEUP_CLASS;
6797 return 8;
6798 case V4SFmode:
6799 case V4SImode:
6800 case V16QImode:
6801 case V8HImode:
6802 case V2DFmode:
6803 case V2DImode:
6804 classes[0] = X86_64_SSE_CLASS;
6805 classes[1] = X86_64_SSEUP_CLASS;
6806 return 2;
6807 case V1TImode:
6808 case V1DImode:
6809 case V2SFmode:
6810 case V2SImode:
6811 case V4HImode:
6812 case V8QImode:
6813 classes[0] = X86_64_SSE_CLASS;
6814 return 1;
6815 case BLKmode:
6816 case VOIDmode:
6817 return 0;
6818 default:
6819 gcc_assert (VECTOR_MODE_P (mode));
6821 if (bytes > 16)
6822 return 0;
6824 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6826 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6827 classes[0] = X86_64_INTEGERSI_CLASS;
6828 else
6829 classes[0] = X86_64_INTEGER_CLASS;
6830 classes[1] = X86_64_INTEGER_CLASS;
6831 return 1 + (bytes > 8);
6835 /* Examine the argument and return set number of register required in each
6836 class. Return true iff parameter should be passed in memory. */
6838 static bool
6839 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6840 int *int_nregs, int *sse_nregs)
6842 enum x86_64_reg_class regclass[MAX_CLASSES];
6843 int n = classify_argument (mode, type, regclass, 0);
6845 *int_nregs = 0;
6846 *sse_nregs = 0;
6848 if (!n)
6849 return true;
6850 for (n--; n >= 0; n--)
6851 switch (regclass[n])
6853 case X86_64_INTEGER_CLASS:
6854 case X86_64_INTEGERSI_CLASS:
6855 (*int_nregs)++;
6856 break;
6857 case X86_64_SSE_CLASS:
6858 case X86_64_SSESF_CLASS:
6859 case X86_64_SSEDF_CLASS:
6860 (*sse_nregs)++;
6861 break;
6862 case X86_64_NO_CLASS:
6863 case X86_64_SSEUP_CLASS:
6864 break;
6865 case X86_64_X87_CLASS:
6866 case X86_64_X87UP_CLASS:
6867 case X86_64_COMPLEX_X87_CLASS:
6868 if (!in_return)
6869 return true;
6870 break;
6871 case X86_64_MEMORY_CLASS:
6872 gcc_unreachable ();
6875 return false;
6878 /* Construct container for the argument used by GCC interface. See
6879 FUNCTION_ARG for the detailed description. */
6881 static rtx
6882 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6883 const_tree type, int in_return, int nintregs, int nsseregs,
6884 const int *intreg, int sse_regno)
6886 /* The following variables hold the static issued_error state. */
6887 static bool issued_sse_arg_error;
6888 static bool issued_sse_ret_error;
6889 static bool issued_x87_ret_error;
6891 enum machine_mode tmpmode;
6892 int bytes =
6893 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6894 enum x86_64_reg_class regclass[MAX_CLASSES];
6895 int n;
6896 int i;
6897 int nexps = 0;
6898 int needed_sseregs, needed_intregs;
6899 rtx exp[MAX_CLASSES];
6900 rtx ret;
6902 n = classify_argument (mode, type, regclass, 0);
6903 if (!n)
6904 return NULL;
6905 if (examine_argument (mode, type, in_return, &needed_intregs,
6906 &needed_sseregs))
6907 return NULL;
6908 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6909 return NULL;
6911 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6912 some less clueful developer tries to use floating-point anyway. */
6913 if (needed_sseregs && !TARGET_SSE)
6915 if (in_return)
6917 if (!issued_sse_ret_error)
6919 error ("SSE register return with SSE disabled");
6920 issued_sse_ret_error = true;
6923 else if (!issued_sse_arg_error)
6925 error ("SSE register argument with SSE disabled");
6926 issued_sse_arg_error = true;
6928 return NULL;
6931 /* Likewise, error if the ABI requires us to return values in the
6932 x87 registers and the user specified -mno-80387. */
6933 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6934 for (i = 0; i < n; i++)
6935 if (regclass[i] == X86_64_X87_CLASS
6936 || regclass[i] == X86_64_X87UP_CLASS
6937 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6939 if (!issued_x87_ret_error)
6941 error ("x87 register return with x87 disabled");
6942 issued_x87_ret_error = true;
6944 return NULL;
6947 /* First construct simple cases. Avoid SCmode, since we want to use
6948 single register to pass this type. */
6949 if (n == 1 && mode != SCmode)
6950 switch (regclass[0])
6952 case X86_64_INTEGER_CLASS:
6953 case X86_64_INTEGERSI_CLASS:
6954 return gen_rtx_REG (mode, intreg[0]);
6955 case X86_64_SSE_CLASS:
6956 case X86_64_SSESF_CLASS:
6957 case X86_64_SSEDF_CLASS:
6958 if (mode != BLKmode)
6959 return gen_reg_or_parallel (mode, orig_mode,
6960 SSE_REGNO (sse_regno));
6961 break;
6962 case X86_64_X87_CLASS:
6963 case X86_64_COMPLEX_X87_CLASS:
6964 return gen_rtx_REG (mode, FIRST_STACK_REG);
6965 case X86_64_NO_CLASS:
6966 /* Zero sized array, struct or class. */
6967 return NULL;
6968 default:
6969 gcc_unreachable ();
6971 if (n == 2
6972 && regclass[0] == X86_64_SSE_CLASS
6973 && regclass[1] == X86_64_SSEUP_CLASS
6974 && mode != BLKmode)
6975 return gen_reg_or_parallel (mode, orig_mode,
6976 SSE_REGNO (sse_regno));
6977 if (n == 4
6978 && regclass[0] == X86_64_SSE_CLASS
6979 && regclass[1] == X86_64_SSEUP_CLASS
6980 && regclass[2] == X86_64_SSEUP_CLASS
6981 && regclass[3] == X86_64_SSEUP_CLASS
6982 && mode != BLKmode)
6983 return gen_reg_or_parallel (mode, orig_mode,
6984 SSE_REGNO (sse_regno));
6985 if (n == 8
6986 && regclass[0] == X86_64_SSE_CLASS
6987 && regclass[1] == X86_64_SSEUP_CLASS
6988 && regclass[2] == X86_64_SSEUP_CLASS
6989 && regclass[3] == X86_64_SSEUP_CLASS
6990 && regclass[4] == X86_64_SSEUP_CLASS
6991 && regclass[5] == X86_64_SSEUP_CLASS
6992 && regclass[6] == X86_64_SSEUP_CLASS
6993 && regclass[7] == X86_64_SSEUP_CLASS
6994 && mode != BLKmode)
6995 return gen_reg_or_parallel (mode, orig_mode,
6996 SSE_REGNO (sse_regno));
6997 if (n == 2
6998 && regclass[0] == X86_64_X87_CLASS
6999 && regclass[1] == X86_64_X87UP_CLASS)
7000 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7002 if (n == 2
7003 && regclass[0] == X86_64_INTEGER_CLASS
7004 && regclass[1] == X86_64_INTEGER_CLASS
7005 && (mode == CDImode || mode == TImode)
7006 && intreg[0] + 1 == intreg[1])
7007 return gen_rtx_REG (mode, intreg[0]);
7009 /* Otherwise figure out the entries of the PARALLEL. */
7010 for (i = 0; i < n; i++)
7012 int pos;
7014 switch (regclass[i])
7016 case X86_64_NO_CLASS:
7017 break;
7018 case X86_64_INTEGER_CLASS:
7019 case X86_64_INTEGERSI_CLASS:
7020 /* Merge TImodes on aligned occasions here too. */
7021 if (i * 8 + 8 > bytes)
7022 tmpmode
7023 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7024 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7025 tmpmode = SImode;
7026 else
7027 tmpmode = DImode;
7028 /* We've requested 24 bytes we
7029 don't have mode for. Use DImode. */
7030 if (tmpmode == BLKmode)
7031 tmpmode = DImode;
7032 exp [nexps++]
7033 = gen_rtx_EXPR_LIST (VOIDmode,
7034 gen_rtx_REG (tmpmode, *intreg),
7035 GEN_INT (i*8));
7036 intreg++;
7037 break;
7038 case X86_64_SSESF_CLASS:
7039 exp [nexps++]
7040 = gen_rtx_EXPR_LIST (VOIDmode,
7041 gen_rtx_REG (SFmode,
7042 SSE_REGNO (sse_regno)),
7043 GEN_INT (i*8));
7044 sse_regno++;
7045 break;
7046 case X86_64_SSEDF_CLASS:
7047 exp [nexps++]
7048 = gen_rtx_EXPR_LIST (VOIDmode,
7049 gen_rtx_REG (DFmode,
7050 SSE_REGNO (sse_regno)),
7051 GEN_INT (i*8));
7052 sse_regno++;
7053 break;
7054 case X86_64_SSE_CLASS:
7055 pos = i;
7056 switch (n)
7058 case 1:
7059 tmpmode = DImode;
7060 break;
7061 case 2:
7062 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7064 tmpmode = TImode;
7065 i++;
7067 else
7068 tmpmode = DImode;
7069 break;
7070 case 4:
7071 gcc_assert (i == 0
7072 && regclass[1] == X86_64_SSEUP_CLASS
7073 && regclass[2] == X86_64_SSEUP_CLASS
7074 && regclass[3] == X86_64_SSEUP_CLASS);
7075 tmpmode = OImode;
7076 i += 3;
7077 break;
7078 case 8:
7079 gcc_assert (i == 0
7080 && regclass[1] == X86_64_SSEUP_CLASS
7081 && regclass[2] == X86_64_SSEUP_CLASS
7082 && regclass[3] == X86_64_SSEUP_CLASS
7083 && regclass[4] == X86_64_SSEUP_CLASS
7084 && regclass[5] == X86_64_SSEUP_CLASS
7085 && regclass[6] == X86_64_SSEUP_CLASS
7086 && regclass[7] == X86_64_SSEUP_CLASS);
7087 tmpmode = XImode;
7088 i += 7;
7089 break;
7090 default:
7091 gcc_unreachable ();
7093 exp [nexps++]
7094 = gen_rtx_EXPR_LIST (VOIDmode,
7095 gen_rtx_REG (tmpmode,
7096 SSE_REGNO (sse_regno)),
7097 GEN_INT (pos*8));
7098 sse_regno++;
7099 break;
7100 default:
7101 gcc_unreachable ();
7105 /* Empty aligned struct, union or class. */
7106 if (nexps == 0)
7107 return NULL;
7109 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7110 for (i = 0; i < nexps; i++)
7111 XVECEXP (ret, 0, i) = exp [i];
7112 return ret;
7115 /* Update the data in CUM to advance over an argument of mode MODE
7116 and data type TYPE. (TYPE is null for libcalls where that information
7117 may not be available.) */
7119 static void
7120 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7121 const_tree type, HOST_WIDE_INT bytes,
7122 HOST_WIDE_INT words)
7124 switch (mode)
7126 default:
7127 break;
7129 case BLKmode:
7130 if (bytes < 0)
7131 break;
7132 /* FALLTHRU */
7134 case DImode:
7135 case SImode:
7136 case HImode:
7137 case QImode:
7138 cum->words += words;
7139 cum->nregs -= words;
7140 cum->regno += words;
7142 if (cum->nregs <= 0)
7144 cum->nregs = 0;
7145 cum->regno = 0;
7147 break;
7149 case OImode:
7150 /* OImode shouldn't be used directly. */
7151 gcc_unreachable ();
7153 case DFmode:
7154 if (cum->float_in_sse < 2)
7155 break;
7156 case SFmode:
7157 if (cum->float_in_sse < 1)
7158 break;
7159 /* FALLTHRU */
7161 case V8SFmode:
7162 case V8SImode:
7163 case V64QImode:
7164 case V32HImode:
7165 case V16SImode:
7166 case V8DImode:
7167 case V16SFmode:
7168 case V8DFmode:
7169 case V32QImode:
7170 case V16HImode:
7171 case V4DFmode:
7172 case V4DImode:
7173 case TImode:
7174 case V16QImode:
7175 case V8HImode:
7176 case V4SImode:
7177 case V2DImode:
7178 case V4SFmode:
7179 case V2DFmode:
7180 if (!type || !AGGREGATE_TYPE_P (type))
7182 cum->sse_words += words;
7183 cum->sse_nregs -= 1;
7184 cum->sse_regno += 1;
7185 if (cum->sse_nregs <= 0)
7187 cum->sse_nregs = 0;
7188 cum->sse_regno = 0;
7191 break;
7193 case V8QImode:
7194 case V4HImode:
7195 case V2SImode:
7196 case V2SFmode:
7197 case V1TImode:
7198 case V1DImode:
7199 if (!type || !AGGREGATE_TYPE_P (type))
7201 cum->mmx_words += words;
7202 cum->mmx_nregs -= 1;
7203 cum->mmx_regno += 1;
7204 if (cum->mmx_nregs <= 0)
7206 cum->mmx_nregs = 0;
7207 cum->mmx_regno = 0;
7210 break;
7214 static void
7215 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7216 const_tree type, HOST_WIDE_INT words, bool named)
7218 int int_nregs, sse_nregs;
7220 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7221 if (!named && (VALID_AVX512F_REG_MODE (mode)
7222 || VALID_AVX256_REG_MODE (mode)))
7223 return;
7225 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7226 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7228 cum->nregs -= int_nregs;
7229 cum->sse_nregs -= sse_nregs;
7230 cum->regno += int_nregs;
7231 cum->sse_regno += sse_nregs;
7233 else
7235 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7236 cum->words = (cum->words + align - 1) & ~(align - 1);
7237 cum->words += words;
7241 static void
7242 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7243 HOST_WIDE_INT words)
7245 /* Otherwise, this should be passed indirect. */
7246 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7248 cum->words += words;
7249 if (cum->nregs > 0)
7251 cum->nregs -= 1;
7252 cum->regno += 1;
7256 /* Update the data in CUM to advance over an argument of mode MODE and
7257 data type TYPE. (TYPE is null for libcalls where that information
7258 may not be available.) */
7260 static void
7261 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7262 const_tree type, bool named)
7264 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7265 HOST_WIDE_INT bytes, words;
7267 if (mode == BLKmode)
7268 bytes = int_size_in_bytes (type);
7269 else
7270 bytes = GET_MODE_SIZE (mode);
7271 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7273 if (type)
7274 mode = type_natural_mode (type, NULL, false);
7276 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7277 function_arg_advance_ms_64 (cum, bytes, words);
7278 else if (TARGET_64BIT)
7279 function_arg_advance_64 (cum, mode, type, words, named);
7280 else
7281 function_arg_advance_32 (cum, mode, type, bytes, words);
7284 /* Define where to put the arguments to a function.
7285 Value is zero to push the argument on the stack,
7286 or a hard register in which to store the argument.
7288 MODE is the argument's machine mode.
7289 TYPE is the data type of the argument (as a tree).
7290 This is null for libcalls where that information may
7291 not be available.
7292 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7293 the preceding args and about the function being called.
7294 NAMED is nonzero if this argument is a named parameter
7295 (otherwise it is an extra parameter matching an ellipsis). */
7297 static rtx
7298 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7299 enum machine_mode orig_mode, const_tree type,
7300 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7302 /* Avoid the AL settings for the Unix64 ABI. */
7303 if (mode == VOIDmode)
7304 return constm1_rtx;
7306 switch (mode)
7308 default:
7309 break;
7311 case BLKmode:
7312 if (bytes < 0)
7313 break;
7314 /* FALLTHRU */
7315 case DImode:
7316 case SImode:
7317 case HImode:
7318 case QImode:
7319 if (words <= cum->nregs)
7321 int regno = cum->regno;
7323 /* Fastcall allocates the first two DWORD (SImode) or
7324 smaller arguments to ECX and EDX if it isn't an
7325 aggregate type . */
7326 if (cum->fastcall)
7328 if (mode == BLKmode
7329 || mode == DImode
7330 || (type && AGGREGATE_TYPE_P (type)))
7331 break;
7333 /* ECX not EAX is the first allocated register. */
7334 if (regno == AX_REG)
7335 regno = CX_REG;
7337 return gen_rtx_REG (mode, regno);
7339 break;
7341 case DFmode:
7342 if (cum->float_in_sse < 2)
7343 break;
7344 case SFmode:
7345 if (cum->float_in_sse < 1)
7346 break;
7347 /* FALLTHRU */
7348 case TImode:
7349 /* In 32bit, we pass TImode in xmm registers. */
7350 case V16QImode:
7351 case V8HImode:
7352 case V4SImode:
7353 case V2DImode:
7354 case V4SFmode:
7355 case V2DFmode:
7356 if (!type || !AGGREGATE_TYPE_P (type))
7358 if (cum->sse_nregs)
7359 return gen_reg_or_parallel (mode, orig_mode,
7360 cum->sse_regno + FIRST_SSE_REG);
7362 break;
7364 case OImode:
7365 case XImode:
7366 /* OImode and XImode shouldn't be used directly. */
7367 gcc_unreachable ();
7369 case V64QImode:
7370 case V32HImode:
7371 case V16SImode:
7372 case V8DImode:
7373 case V16SFmode:
7374 case V8DFmode:
7375 case V8SFmode:
7376 case V8SImode:
7377 case V32QImode:
7378 case V16HImode:
7379 case V4DFmode:
7380 case V4DImode:
7381 if (!type || !AGGREGATE_TYPE_P (type))
7383 if (cum->sse_nregs)
7384 return gen_reg_or_parallel (mode, orig_mode,
7385 cum->sse_regno + FIRST_SSE_REG);
7387 break;
7389 case V8QImode:
7390 case V4HImode:
7391 case V2SImode:
7392 case V2SFmode:
7393 case V1TImode:
7394 case V1DImode:
7395 if (!type || !AGGREGATE_TYPE_P (type))
7397 if (cum->mmx_nregs)
7398 return gen_reg_or_parallel (mode, orig_mode,
7399 cum->mmx_regno + FIRST_MMX_REG);
7401 break;
7404 return NULL_RTX;
7407 static rtx
7408 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7409 enum machine_mode orig_mode, const_tree type, bool named)
7411 /* Handle a hidden AL argument containing number of registers
7412 for varargs x86-64 functions. */
7413 if (mode == VOIDmode)
7414 return GEN_INT (cum->maybe_vaarg
7415 ? (cum->sse_nregs < 0
7416 ? X86_64_SSE_REGPARM_MAX
7417 : cum->sse_regno)
7418 : -1);
7420 switch (mode)
7422 default:
7423 break;
7425 case V8SFmode:
7426 case V8SImode:
7427 case V32QImode:
7428 case V16HImode:
7429 case V4DFmode:
7430 case V4DImode:
7431 case V16SFmode:
7432 case V16SImode:
7433 case V64QImode:
7434 case V32HImode:
7435 case V8DFmode:
7436 case V8DImode:
7437 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7438 if (!named)
7439 return NULL;
7440 break;
7443 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7444 cum->sse_nregs,
7445 &x86_64_int_parameter_registers [cum->regno],
7446 cum->sse_regno);
7449 static rtx
7450 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7451 enum machine_mode orig_mode, bool named,
7452 HOST_WIDE_INT bytes)
7454 unsigned int regno;
7456 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7457 We use value of -2 to specify that current function call is MSABI. */
7458 if (mode == VOIDmode)
7459 return GEN_INT (-2);
7461 /* If we've run out of registers, it goes on the stack. */
7462 if (cum->nregs == 0)
7463 return NULL_RTX;
7465 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7467 /* Only floating point modes are passed in anything but integer regs. */
7468 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7470 if (named)
7471 regno = cum->regno + FIRST_SSE_REG;
7472 else
7474 rtx t1, t2;
7476 /* Unnamed floating parameters are passed in both the
7477 SSE and integer registers. */
7478 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7479 t2 = gen_rtx_REG (mode, regno);
7480 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7481 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7482 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7485 /* Handle aggregated types passed in register. */
7486 if (orig_mode == BLKmode)
7488 if (bytes > 0 && bytes <= 8)
7489 mode = (bytes > 4 ? DImode : SImode);
7490 if (mode == BLKmode)
7491 mode = DImode;
7494 return gen_reg_or_parallel (mode, orig_mode, regno);
7497 /* Return where to put the arguments to a function.
7498 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7500 MODE is the argument's machine mode. TYPE is the data type of the
7501 argument. It is null for libcalls where that information may not be
7502 available. CUM gives information about the preceding args and about
7503 the function being called. NAMED is nonzero if this argument is a
7504 named parameter (otherwise it is an extra parameter matching an
7505 ellipsis). */
7507 static rtx
7508 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7509 const_tree type, bool named)
7511 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7512 enum machine_mode mode = omode;
7513 HOST_WIDE_INT bytes, words;
7514 rtx arg;
7516 if (mode == BLKmode)
7517 bytes = int_size_in_bytes (type);
7518 else
7519 bytes = GET_MODE_SIZE (mode);
7520 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7522 /* To simplify the code below, represent vector types with a vector mode
7523 even if MMX/SSE are not active. */
7524 if (type && TREE_CODE (type) == VECTOR_TYPE)
7525 mode = type_natural_mode (type, cum, false);
7527 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7528 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7529 else if (TARGET_64BIT)
7530 arg = function_arg_64 (cum, mode, omode, type, named);
7531 else
7532 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7534 return arg;
7537 /* A C expression that indicates when an argument must be passed by
7538 reference. If nonzero for an argument, a copy of that argument is
7539 made in memory and a pointer to the argument is passed instead of
7540 the argument itself. The pointer is passed in whatever way is
7541 appropriate for passing a pointer to that type. */
7543 static bool
7544 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7545 const_tree type, bool named ATTRIBUTE_UNUSED)
7547 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7549 /* See Windows x64 Software Convention. */
7550 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7552 int msize = (int) GET_MODE_SIZE (mode);
7553 if (type)
7555 /* Arrays are passed by reference. */
7556 if (TREE_CODE (type) == ARRAY_TYPE)
7557 return true;
7559 if (AGGREGATE_TYPE_P (type))
7561 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7562 are passed by reference. */
7563 msize = int_size_in_bytes (type);
7567 /* __m128 is passed by reference. */
7568 switch (msize) {
7569 case 1: case 2: case 4: case 8:
7570 break;
7571 default:
7572 return true;
7575 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7576 return 1;
7578 return 0;
7581 /* Return true when TYPE should be 128bit aligned for 32bit argument
7582 passing ABI. XXX: This function is obsolete and is only used for
7583 checking psABI compatibility with previous versions of GCC. */
7585 static bool
7586 ix86_compat_aligned_value_p (const_tree type)
7588 enum machine_mode mode = TYPE_MODE (type);
7589 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7590 || mode == TDmode
7591 || mode == TFmode
7592 || mode == TCmode)
7593 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7594 return true;
7595 if (TYPE_ALIGN (type) < 128)
7596 return false;
7598 if (AGGREGATE_TYPE_P (type))
7600 /* Walk the aggregates recursively. */
7601 switch (TREE_CODE (type))
7603 case RECORD_TYPE:
7604 case UNION_TYPE:
7605 case QUAL_UNION_TYPE:
7607 tree field;
7609 /* Walk all the structure fields. */
7610 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7612 if (TREE_CODE (field) == FIELD_DECL
7613 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7614 return true;
7616 break;
7619 case ARRAY_TYPE:
7620 /* Just for use if some languages passes arrays by value. */
7621 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7622 return true;
7623 break;
7625 default:
7626 gcc_unreachable ();
7629 return false;
7632 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7633 XXX: This function is obsolete and is only used for checking psABI
7634 compatibility with previous versions of GCC. */
7636 static unsigned int
7637 ix86_compat_function_arg_boundary (enum machine_mode mode,
7638 const_tree type, unsigned int align)
7640 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7641 natural boundaries. */
7642 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7644 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7645 make an exception for SSE modes since these require 128bit
7646 alignment.
7648 The handling here differs from field_alignment. ICC aligns MMX
7649 arguments to 4 byte boundaries, while structure fields are aligned
7650 to 8 byte boundaries. */
7651 if (!type)
7653 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7654 align = PARM_BOUNDARY;
7656 else
7658 if (!ix86_compat_aligned_value_p (type))
7659 align = PARM_BOUNDARY;
7662 if (align > BIGGEST_ALIGNMENT)
7663 align = BIGGEST_ALIGNMENT;
7664 return align;
7667 /* Return true when TYPE should be 128bit aligned for 32bit argument
7668 passing ABI. */
7670 static bool
7671 ix86_contains_aligned_value_p (const_tree type)
7673 enum machine_mode mode = TYPE_MODE (type);
7675 if (mode == XFmode || mode == XCmode)
7676 return false;
7678 if (TYPE_ALIGN (type) < 128)
7679 return false;
7681 if (AGGREGATE_TYPE_P (type))
7683 /* Walk the aggregates recursively. */
7684 switch (TREE_CODE (type))
7686 case RECORD_TYPE:
7687 case UNION_TYPE:
7688 case QUAL_UNION_TYPE:
7690 tree field;
7692 /* Walk all the structure fields. */
7693 for (field = TYPE_FIELDS (type);
7694 field;
7695 field = DECL_CHAIN (field))
7697 if (TREE_CODE (field) == FIELD_DECL
7698 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7699 return true;
7701 break;
7704 case ARRAY_TYPE:
7705 /* Just for use if some languages passes arrays by value. */
7706 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7707 return true;
7708 break;
7710 default:
7711 gcc_unreachable ();
7714 else
7715 return TYPE_ALIGN (type) >= 128;
7717 return false;
7720 /* Gives the alignment boundary, in bits, of an argument with the
7721 specified mode and type. */
7723 static unsigned int
7724 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7726 unsigned int align;
7727 if (type)
7729 /* Since the main variant type is used for call, we convert it to
7730 the main variant type. */
7731 type = TYPE_MAIN_VARIANT (type);
7732 align = TYPE_ALIGN (type);
7734 else
7735 align = GET_MODE_ALIGNMENT (mode);
7736 if (align < PARM_BOUNDARY)
7737 align = PARM_BOUNDARY;
7738 else
7740 static bool warned;
7741 unsigned int saved_align = align;
7743 if (!TARGET_64BIT)
7745 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7746 if (!type)
7748 if (mode == XFmode || mode == XCmode)
7749 align = PARM_BOUNDARY;
7751 else if (!ix86_contains_aligned_value_p (type))
7752 align = PARM_BOUNDARY;
7754 if (align < 128)
7755 align = PARM_BOUNDARY;
7758 if (warn_psabi
7759 && !warned
7760 && align != ix86_compat_function_arg_boundary (mode, type,
7761 saved_align))
7763 warned = true;
7764 inform (input_location,
7765 "The ABI for passing parameters with %d-byte"
7766 " alignment has changed in GCC 4.6",
7767 align / BITS_PER_UNIT);
7771 return align;
7774 /* Return true if N is a possible register number of function value. */
7776 static bool
7777 ix86_function_value_regno_p (const unsigned int regno)
7779 switch (regno)
7781 case AX_REG:
7782 return true;
7783 case DX_REG:
7784 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7785 case DI_REG:
7786 case SI_REG:
7787 return TARGET_64BIT && ix86_abi != MS_ABI;
7789 /* Complex values are returned in %st(0)/%st(1) pair. */
7790 case ST0_REG:
7791 case ST1_REG:
7792 /* TODO: The function should depend on current function ABI but
7793 builtins.c would need updating then. Therefore we use the
7794 default ABI. */
7795 if (TARGET_64BIT && ix86_abi == MS_ABI)
7796 return false;
7797 return TARGET_FLOAT_RETURNS_IN_80387;
7799 /* Complex values are returned in %xmm0/%xmm1 pair. */
7800 case XMM0_REG:
7801 case XMM1_REG:
7802 return TARGET_SSE;
7804 case MM0_REG:
7805 if (TARGET_MACHO || TARGET_64BIT)
7806 return false;
7807 return TARGET_MMX;
7810 return false;
7813 /* Define how to find the value returned by a function.
7814 VALTYPE is the data type of the value (as a tree).
7815 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7816 otherwise, FUNC is 0. */
7818 static rtx
7819 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7820 const_tree fntype, const_tree fn)
7822 unsigned int regno;
7824 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7825 we normally prevent this case when mmx is not available. However
7826 some ABIs may require the result to be returned like DImode. */
7827 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7828 regno = FIRST_MMX_REG;
7830 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7831 we prevent this case when sse is not available. However some ABIs
7832 may require the result to be returned like integer TImode. */
7833 else if (mode == TImode
7834 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7835 regno = FIRST_SSE_REG;
7837 /* 32-byte vector modes in %ymm0. */
7838 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7839 regno = FIRST_SSE_REG;
7841 /* 64-byte vector modes in %zmm0. */
7842 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7843 regno = FIRST_SSE_REG;
7845 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7846 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7847 regno = FIRST_FLOAT_REG;
7848 else
7849 /* Most things go in %eax. */
7850 regno = AX_REG;
7852 /* Override FP return register with %xmm0 for local functions when
7853 SSE math is enabled or for functions with sseregparm attribute. */
7854 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7856 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7857 if ((sse_level >= 1 && mode == SFmode)
7858 || (sse_level == 2 && mode == DFmode))
7859 regno = FIRST_SSE_REG;
7862 /* OImode shouldn't be used directly. */
7863 gcc_assert (mode != OImode);
7865 return gen_rtx_REG (orig_mode, regno);
7868 static rtx
7869 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7870 const_tree valtype)
7872 rtx ret;
7874 /* Handle libcalls, which don't provide a type node. */
7875 if (valtype == NULL)
7877 unsigned int regno;
7879 switch (mode)
7881 case SFmode:
7882 case SCmode:
7883 case DFmode:
7884 case DCmode:
7885 case TFmode:
7886 case SDmode:
7887 case DDmode:
7888 case TDmode:
7889 regno = FIRST_SSE_REG;
7890 break;
7891 case XFmode:
7892 case XCmode:
7893 regno = FIRST_FLOAT_REG;
7894 break;
7895 case TCmode:
7896 return NULL;
7897 default:
7898 regno = AX_REG;
7901 return gen_rtx_REG (mode, regno);
7903 else if (POINTER_TYPE_P (valtype))
7905 /* Pointers are always returned in word_mode. */
7906 mode = word_mode;
7909 ret = construct_container (mode, orig_mode, valtype, 1,
7910 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7911 x86_64_int_return_registers, 0);
7913 /* For zero sized structures, construct_container returns NULL, but we
7914 need to keep rest of compiler happy by returning meaningful value. */
7915 if (!ret)
7916 ret = gen_rtx_REG (orig_mode, AX_REG);
7918 return ret;
7921 static rtx
7922 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7923 const_tree valtype)
7925 unsigned int regno = AX_REG;
7927 if (TARGET_SSE)
7929 switch (GET_MODE_SIZE (mode))
7931 case 16:
7932 if (valtype != NULL_TREE
7933 && !VECTOR_INTEGER_TYPE_P (valtype)
7934 && !VECTOR_INTEGER_TYPE_P (valtype)
7935 && !INTEGRAL_TYPE_P (valtype)
7936 && !VECTOR_FLOAT_TYPE_P (valtype))
7937 break;
7938 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7939 && !COMPLEX_MODE_P (mode))
7940 regno = FIRST_SSE_REG;
7941 break;
7942 case 8:
7943 case 4:
7944 if (mode == SFmode || mode == DFmode)
7945 regno = FIRST_SSE_REG;
7946 break;
7947 default:
7948 break;
7951 return gen_rtx_REG (orig_mode, regno);
7954 static rtx
7955 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7956 enum machine_mode orig_mode, enum machine_mode mode)
7958 const_tree fn, fntype;
7960 fn = NULL_TREE;
7961 if (fntype_or_decl && DECL_P (fntype_or_decl))
7962 fn = fntype_or_decl;
7963 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7965 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7966 return function_value_ms_64 (orig_mode, mode, valtype);
7967 else if (TARGET_64BIT)
7968 return function_value_64 (orig_mode, mode, valtype);
7969 else
7970 return function_value_32 (orig_mode, mode, fntype, fn);
7973 static rtx
7974 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7975 bool outgoing ATTRIBUTE_UNUSED)
7977 enum machine_mode mode, orig_mode;
7979 orig_mode = TYPE_MODE (valtype);
7980 mode = type_natural_mode (valtype, NULL, true);
7981 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7984 /* Pointer function arguments and return values are promoted to
7985 word_mode. */
7987 static enum machine_mode
7988 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7989 int *punsignedp, const_tree fntype,
7990 int for_return)
7992 if (type != NULL_TREE && POINTER_TYPE_P (type))
7994 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7995 return word_mode;
7997 return default_promote_function_mode (type, mode, punsignedp, fntype,
7998 for_return);
8001 /* Return true if a structure, union or array with MODE containing FIELD
8002 should be accessed using BLKmode. */
8004 static bool
8005 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8007 /* Union with XFmode must be in BLKmode. */
8008 return (mode == XFmode
8009 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8010 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8014 ix86_libcall_value (enum machine_mode mode)
8016 return ix86_function_value_1 (NULL, NULL, mode, mode);
8019 /* Return true iff type is returned in memory. */
8021 static bool
8022 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8024 #ifdef SUBTARGET_RETURN_IN_MEMORY
8025 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8026 #else
8027 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8028 HOST_WIDE_INT size;
8030 if (TARGET_64BIT)
8032 if (ix86_function_type_abi (fntype) == MS_ABI)
8034 size = int_size_in_bytes (type);
8036 /* __m128 is returned in xmm0. */
8037 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8038 || INTEGRAL_TYPE_P (type)
8039 || VECTOR_FLOAT_TYPE_P (type))
8040 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8041 && !COMPLEX_MODE_P (mode)
8042 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8043 return false;
8045 /* Otherwise, the size must be exactly in [1248]. */
8046 return size != 1 && size != 2 && size != 4 && size != 8;
8048 else
8050 int needed_intregs, needed_sseregs;
8052 return examine_argument (mode, type, 1,
8053 &needed_intregs, &needed_sseregs);
8056 else
8058 if (mode == BLKmode)
8059 return true;
8061 size = int_size_in_bytes (type);
8063 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8064 return false;
8066 if (VECTOR_MODE_P (mode) || mode == TImode)
8068 /* User-created vectors small enough to fit in EAX. */
8069 if (size < 8)
8070 return false;
8072 /* Unless ABI prescibes otherwise,
8073 MMX/3dNow values are returned in MM0 if available. */
8075 if (size == 8)
8076 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8078 /* SSE values are returned in XMM0 if available. */
8079 if (size == 16)
8080 return !TARGET_SSE;
8082 /* AVX values are returned in YMM0 if available. */
8083 if (size == 32)
8084 return !TARGET_AVX;
8086 /* AVX512F values are returned in ZMM0 if available. */
8087 if (size == 64)
8088 return !TARGET_AVX512F;
8091 if (mode == XFmode)
8092 return false;
8094 if (size > 12)
8095 return true;
8097 /* OImode shouldn't be used directly. */
8098 gcc_assert (mode != OImode);
8100 return false;
8102 #endif
8106 /* Create the va_list data type. */
8108 /* Returns the calling convention specific va_list date type.
8109 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8111 static tree
8112 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8114 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8116 /* For i386 we use plain pointer to argument area. */
8117 if (!TARGET_64BIT || abi == MS_ABI)
8118 return build_pointer_type (char_type_node);
8120 record = lang_hooks.types.make_type (RECORD_TYPE);
8121 type_decl = build_decl (BUILTINS_LOCATION,
8122 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8124 f_gpr = build_decl (BUILTINS_LOCATION,
8125 FIELD_DECL, get_identifier ("gp_offset"),
8126 unsigned_type_node);
8127 f_fpr = build_decl (BUILTINS_LOCATION,
8128 FIELD_DECL, get_identifier ("fp_offset"),
8129 unsigned_type_node);
8130 f_ovf = build_decl (BUILTINS_LOCATION,
8131 FIELD_DECL, get_identifier ("overflow_arg_area"),
8132 ptr_type_node);
8133 f_sav = build_decl (BUILTINS_LOCATION,
8134 FIELD_DECL, get_identifier ("reg_save_area"),
8135 ptr_type_node);
8137 va_list_gpr_counter_field = f_gpr;
8138 va_list_fpr_counter_field = f_fpr;
8140 DECL_FIELD_CONTEXT (f_gpr) = record;
8141 DECL_FIELD_CONTEXT (f_fpr) = record;
8142 DECL_FIELD_CONTEXT (f_ovf) = record;
8143 DECL_FIELD_CONTEXT (f_sav) = record;
8145 TYPE_STUB_DECL (record) = type_decl;
8146 TYPE_NAME (record) = type_decl;
8147 TYPE_FIELDS (record) = f_gpr;
8148 DECL_CHAIN (f_gpr) = f_fpr;
8149 DECL_CHAIN (f_fpr) = f_ovf;
8150 DECL_CHAIN (f_ovf) = f_sav;
8152 layout_type (record);
8154 /* The correct type is an array type of one element. */
8155 return build_array_type (record, build_index_type (size_zero_node));
8158 /* Setup the builtin va_list data type and for 64-bit the additional
8159 calling convention specific va_list data types. */
8161 static tree
8162 ix86_build_builtin_va_list (void)
8164 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8166 /* Initialize abi specific va_list builtin types. */
8167 if (TARGET_64BIT)
8169 tree t;
8170 if (ix86_abi == MS_ABI)
8172 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8173 if (TREE_CODE (t) != RECORD_TYPE)
8174 t = build_variant_type_copy (t);
8175 sysv_va_list_type_node = t;
8177 else
8179 t = ret;
8180 if (TREE_CODE (t) != RECORD_TYPE)
8181 t = build_variant_type_copy (t);
8182 sysv_va_list_type_node = t;
8184 if (ix86_abi != MS_ABI)
8186 t = ix86_build_builtin_va_list_abi (MS_ABI);
8187 if (TREE_CODE (t) != RECORD_TYPE)
8188 t = build_variant_type_copy (t);
8189 ms_va_list_type_node = t;
8191 else
8193 t = ret;
8194 if (TREE_CODE (t) != RECORD_TYPE)
8195 t = build_variant_type_copy (t);
8196 ms_va_list_type_node = t;
8200 return ret;
8203 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8205 static void
8206 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8208 rtx save_area, mem;
8209 alias_set_type set;
8210 int i, max;
8212 /* GPR size of varargs save area. */
8213 if (cfun->va_list_gpr_size)
8214 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8215 else
8216 ix86_varargs_gpr_size = 0;
8218 /* FPR size of varargs save area. We don't need it if we don't pass
8219 anything in SSE registers. */
8220 if (TARGET_SSE && cfun->va_list_fpr_size)
8221 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8222 else
8223 ix86_varargs_fpr_size = 0;
8225 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8226 return;
8228 save_area = frame_pointer_rtx;
8229 set = get_varargs_alias_set ();
8231 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8232 if (max > X86_64_REGPARM_MAX)
8233 max = X86_64_REGPARM_MAX;
8235 for (i = cum->regno; i < max; i++)
8237 mem = gen_rtx_MEM (word_mode,
8238 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8239 MEM_NOTRAP_P (mem) = 1;
8240 set_mem_alias_set (mem, set);
8241 emit_move_insn (mem,
8242 gen_rtx_REG (word_mode,
8243 x86_64_int_parameter_registers[i]));
8246 if (ix86_varargs_fpr_size)
8248 enum machine_mode smode;
8249 rtx label, test;
8251 /* Now emit code to save SSE registers. The AX parameter contains number
8252 of SSE parameter registers used to call this function, though all we
8253 actually check here is the zero/non-zero status. */
8255 label = gen_label_rtx ();
8256 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8257 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8258 label));
8260 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8261 we used movdqa (i.e. TImode) instead? Perhaps even better would
8262 be if we could determine the real mode of the data, via a hook
8263 into pass_stdarg. Ignore all that for now. */
8264 smode = V4SFmode;
8265 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8266 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8268 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8269 if (max > X86_64_SSE_REGPARM_MAX)
8270 max = X86_64_SSE_REGPARM_MAX;
8272 for (i = cum->sse_regno; i < max; ++i)
8274 mem = plus_constant (Pmode, save_area,
8275 i * 16 + ix86_varargs_gpr_size);
8276 mem = gen_rtx_MEM (smode, mem);
8277 MEM_NOTRAP_P (mem) = 1;
8278 set_mem_alias_set (mem, set);
8279 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8281 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8284 emit_label (label);
8288 static void
8289 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8291 alias_set_type set = get_varargs_alias_set ();
8292 int i;
8294 /* Reset to zero, as there might be a sysv vaarg used
8295 before. */
8296 ix86_varargs_gpr_size = 0;
8297 ix86_varargs_fpr_size = 0;
8299 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8301 rtx reg, mem;
8303 mem = gen_rtx_MEM (Pmode,
8304 plus_constant (Pmode, virtual_incoming_args_rtx,
8305 i * UNITS_PER_WORD));
8306 MEM_NOTRAP_P (mem) = 1;
8307 set_mem_alias_set (mem, set);
8309 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8310 emit_move_insn (mem, reg);
8314 static void
8315 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8316 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8317 int no_rtl)
8319 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8320 CUMULATIVE_ARGS next_cum;
8321 tree fntype;
8323 /* This argument doesn't appear to be used anymore. Which is good,
8324 because the old code here didn't suppress rtl generation. */
8325 gcc_assert (!no_rtl);
8327 if (!TARGET_64BIT)
8328 return;
8330 fntype = TREE_TYPE (current_function_decl);
8332 /* For varargs, we do not want to skip the dummy va_dcl argument.
8333 For stdargs, we do want to skip the last named argument. */
8334 next_cum = *cum;
8335 if (stdarg_p (fntype))
8336 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8337 true);
8339 if (cum->call_abi == MS_ABI)
8340 setup_incoming_varargs_ms_64 (&next_cum);
8341 else
8342 setup_incoming_varargs_64 (&next_cum);
8345 /* Checks if TYPE is of kind va_list char *. */
8347 static bool
8348 is_va_list_char_pointer (tree type)
8350 tree canonic;
8352 /* For 32-bit it is always true. */
8353 if (!TARGET_64BIT)
8354 return true;
8355 canonic = ix86_canonical_va_list_type (type);
8356 return (canonic == ms_va_list_type_node
8357 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8360 /* Implement va_start. */
8362 static void
8363 ix86_va_start (tree valist, rtx nextarg)
8365 HOST_WIDE_INT words, n_gpr, n_fpr;
8366 tree f_gpr, f_fpr, f_ovf, f_sav;
8367 tree gpr, fpr, ovf, sav, t;
8368 tree type;
8369 rtx ovf_rtx;
8371 if (flag_split_stack
8372 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8374 unsigned int scratch_regno;
8376 /* When we are splitting the stack, we can't refer to the stack
8377 arguments using internal_arg_pointer, because they may be on
8378 the old stack. The split stack prologue will arrange to
8379 leave a pointer to the old stack arguments in a scratch
8380 register, which we here copy to a pseudo-register. The split
8381 stack prologue can't set the pseudo-register directly because
8382 it (the prologue) runs before any registers have been saved. */
8384 scratch_regno = split_stack_prologue_scratch_regno ();
8385 if (scratch_regno != INVALID_REGNUM)
8387 rtx reg, seq;
8389 reg = gen_reg_rtx (Pmode);
8390 cfun->machine->split_stack_varargs_pointer = reg;
8392 start_sequence ();
8393 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8394 seq = get_insns ();
8395 end_sequence ();
8397 push_topmost_sequence ();
8398 emit_insn_after (seq, entry_of_function ());
8399 pop_topmost_sequence ();
8403 /* Only 64bit target needs something special. */
8404 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8406 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8407 std_expand_builtin_va_start (valist, nextarg);
8408 else
8410 rtx va_r, next;
8412 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8413 next = expand_binop (ptr_mode, add_optab,
8414 cfun->machine->split_stack_varargs_pointer,
8415 crtl->args.arg_offset_rtx,
8416 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8417 convert_move (va_r, next, 0);
8419 return;
8422 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8423 f_fpr = DECL_CHAIN (f_gpr);
8424 f_ovf = DECL_CHAIN (f_fpr);
8425 f_sav = DECL_CHAIN (f_ovf);
8427 valist = build_simple_mem_ref (valist);
8428 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8429 /* The following should be folded into the MEM_REF offset. */
8430 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8431 f_gpr, NULL_TREE);
8432 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8433 f_fpr, NULL_TREE);
8434 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8435 f_ovf, NULL_TREE);
8436 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8437 f_sav, NULL_TREE);
8439 /* Count number of gp and fp argument registers used. */
8440 words = crtl->args.info.words;
8441 n_gpr = crtl->args.info.regno;
8442 n_fpr = crtl->args.info.sse_regno;
8444 if (cfun->va_list_gpr_size)
8446 type = TREE_TYPE (gpr);
8447 t = build2 (MODIFY_EXPR, type,
8448 gpr, build_int_cst (type, n_gpr * 8));
8449 TREE_SIDE_EFFECTS (t) = 1;
8450 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8453 if (TARGET_SSE && cfun->va_list_fpr_size)
8455 type = TREE_TYPE (fpr);
8456 t = build2 (MODIFY_EXPR, type, fpr,
8457 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8458 TREE_SIDE_EFFECTS (t) = 1;
8459 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8462 /* Find the overflow area. */
8463 type = TREE_TYPE (ovf);
8464 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8465 ovf_rtx = crtl->args.internal_arg_pointer;
8466 else
8467 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8468 t = make_tree (type, ovf_rtx);
8469 if (words != 0)
8470 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8471 t = build2 (MODIFY_EXPR, type, ovf, t);
8472 TREE_SIDE_EFFECTS (t) = 1;
8473 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8475 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8477 /* Find the register save area.
8478 Prologue of the function save it right above stack frame. */
8479 type = TREE_TYPE (sav);
8480 t = make_tree (type, frame_pointer_rtx);
8481 if (!ix86_varargs_gpr_size)
8482 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8483 t = build2 (MODIFY_EXPR, type, sav, t);
8484 TREE_SIDE_EFFECTS (t) = 1;
8485 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8489 /* Implement va_arg. */
8491 static tree
8492 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8493 gimple_seq *post_p)
8495 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8496 tree f_gpr, f_fpr, f_ovf, f_sav;
8497 tree gpr, fpr, ovf, sav, t;
8498 int size, rsize;
8499 tree lab_false, lab_over = NULL_TREE;
8500 tree addr, t2;
8501 rtx container;
8502 int indirect_p = 0;
8503 tree ptrtype;
8504 enum machine_mode nat_mode;
8505 unsigned int arg_boundary;
8507 /* Only 64bit target needs something special. */
8508 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8509 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8511 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8512 f_fpr = DECL_CHAIN (f_gpr);
8513 f_ovf = DECL_CHAIN (f_fpr);
8514 f_sav = DECL_CHAIN (f_ovf);
8516 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8517 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8518 valist = build_va_arg_indirect_ref (valist);
8519 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8520 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8521 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8523 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8524 if (indirect_p)
8525 type = build_pointer_type (type);
8526 size = int_size_in_bytes (type);
8527 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8529 nat_mode = type_natural_mode (type, NULL, false);
8530 switch (nat_mode)
8532 case V8SFmode:
8533 case V8SImode:
8534 case V32QImode:
8535 case V16HImode:
8536 case V4DFmode:
8537 case V4DImode:
8538 case V16SFmode:
8539 case V16SImode:
8540 case V64QImode:
8541 case V32HImode:
8542 case V8DFmode:
8543 case V8DImode:
8544 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8545 if (!TARGET_64BIT_MS_ABI)
8547 container = NULL;
8548 break;
8551 default:
8552 container = construct_container (nat_mode, TYPE_MODE (type),
8553 type, 0, X86_64_REGPARM_MAX,
8554 X86_64_SSE_REGPARM_MAX, intreg,
8556 break;
8559 /* Pull the value out of the saved registers. */
8561 addr = create_tmp_var (ptr_type_node, "addr");
8563 if (container)
8565 int needed_intregs, needed_sseregs;
8566 bool need_temp;
8567 tree int_addr, sse_addr;
8569 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8570 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8572 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8574 need_temp = (!REG_P (container)
8575 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8576 || TYPE_ALIGN (type) > 128));
8578 /* In case we are passing structure, verify that it is consecutive block
8579 on the register save area. If not we need to do moves. */
8580 if (!need_temp && !REG_P (container))
8582 /* Verify that all registers are strictly consecutive */
8583 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8585 int i;
8587 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8589 rtx slot = XVECEXP (container, 0, i);
8590 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8591 || INTVAL (XEXP (slot, 1)) != i * 16)
8592 need_temp = 1;
8595 else
8597 int i;
8599 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8601 rtx slot = XVECEXP (container, 0, i);
8602 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8603 || INTVAL (XEXP (slot, 1)) != i * 8)
8604 need_temp = 1;
8608 if (!need_temp)
8610 int_addr = addr;
8611 sse_addr = addr;
8613 else
8615 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8616 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8619 /* First ensure that we fit completely in registers. */
8620 if (needed_intregs)
8622 t = build_int_cst (TREE_TYPE (gpr),
8623 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8624 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8625 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8626 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8627 gimplify_and_add (t, pre_p);
8629 if (needed_sseregs)
8631 t = build_int_cst (TREE_TYPE (fpr),
8632 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8633 + X86_64_REGPARM_MAX * 8);
8634 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8635 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8636 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8637 gimplify_and_add (t, pre_p);
8640 /* Compute index to start of area used for integer regs. */
8641 if (needed_intregs)
8643 /* int_addr = gpr + sav; */
8644 t = fold_build_pointer_plus (sav, gpr);
8645 gimplify_assign (int_addr, t, pre_p);
8647 if (needed_sseregs)
8649 /* sse_addr = fpr + sav; */
8650 t = fold_build_pointer_plus (sav, fpr);
8651 gimplify_assign (sse_addr, t, pre_p);
8653 if (need_temp)
8655 int i, prev_size = 0;
8656 tree temp = create_tmp_var (type, "va_arg_tmp");
8658 /* addr = &temp; */
8659 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8660 gimplify_assign (addr, t, pre_p);
8662 for (i = 0; i < XVECLEN (container, 0); i++)
8664 rtx slot = XVECEXP (container, 0, i);
8665 rtx reg = XEXP (slot, 0);
8666 enum machine_mode mode = GET_MODE (reg);
8667 tree piece_type;
8668 tree addr_type;
8669 tree daddr_type;
8670 tree src_addr, src;
8671 int src_offset;
8672 tree dest_addr, dest;
8673 int cur_size = GET_MODE_SIZE (mode);
8675 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8676 prev_size = INTVAL (XEXP (slot, 1));
8677 if (prev_size + cur_size > size)
8679 cur_size = size - prev_size;
8680 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8681 if (mode == BLKmode)
8682 mode = QImode;
8684 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8685 if (mode == GET_MODE (reg))
8686 addr_type = build_pointer_type (piece_type);
8687 else
8688 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8689 true);
8690 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8691 true);
8693 if (SSE_REGNO_P (REGNO (reg)))
8695 src_addr = sse_addr;
8696 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8698 else
8700 src_addr = int_addr;
8701 src_offset = REGNO (reg) * 8;
8703 src_addr = fold_convert (addr_type, src_addr);
8704 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8706 dest_addr = fold_convert (daddr_type, addr);
8707 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8708 if (cur_size == GET_MODE_SIZE (mode))
8710 src = build_va_arg_indirect_ref (src_addr);
8711 dest = build_va_arg_indirect_ref (dest_addr);
8713 gimplify_assign (dest, src, pre_p);
8715 else
8717 tree copy
8718 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8719 3, dest_addr, src_addr,
8720 size_int (cur_size));
8721 gimplify_and_add (copy, pre_p);
8723 prev_size += cur_size;
8727 if (needed_intregs)
8729 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8730 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8731 gimplify_assign (gpr, t, pre_p);
8734 if (needed_sseregs)
8736 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8737 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8738 gimplify_assign (fpr, t, pre_p);
8741 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8743 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8746 /* ... otherwise out of the overflow area. */
8748 /* When we align parameter on stack for caller, if the parameter
8749 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8750 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8751 here with caller. */
8752 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8753 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8754 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8756 /* Care for on-stack alignment if needed. */
8757 if (arg_boundary <= 64 || size == 0)
8758 t = ovf;
8759 else
8761 HOST_WIDE_INT align = arg_boundary / 8;
8762 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8763 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8764 build_int_cst (TREE_TYPE (t), -align));
8767 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8768 gimplify_assign (addr, t, pre_p);
8770 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8771 gimplify_assign (unshare_expr (ovf), t, pre_p);
8773 if (container)
8774 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8776 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8777 addr = fold_convert (ptrtype, addr);
8779 if (indirect_p)
8780 addr = build_va_arg_indirect_ref (addr);
8781 return build_va_arg_indirect_ref (addr);
8784 /* Return true if OPNUM's MEM should be matched
8785 in movabs* patterns. */
8787 bool
8788 ix86_check_movabs (rtx insn, int opnum)
8790 rtx set, mem;
8792 set = PATTERN (insn);
8793 if (GET_CODE (set) == PARALLEL)
8794 set = XVECEXP (set, 0, 0);
8795 gcc_assert (GET_CODE (set) == SET);
8796 mem = XEXP (set, opnum);
8797 while (GET_CODE (mem) == SUBREG)
8798 mem = SUBREG_REG (mem);
8799 gcc_assert (MEM_P (mem));
8800 return volatile_ok || !MEM_VOLATILE_P (mem);
8803 /* Initialize the table of extra 80387 mathematical constants. */
8805 static void
8806 init_ext_80387_constants (void)
8808 static const char * cst[5] =
8810 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8811 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8812 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8813 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8814 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8816 int i;
8818 for (i = 0; i < 5; i++)
8820 real_from_string (&ext_80387_constants_table[i], cst[i]);
8821 /* Ensure each constant is rounded to XFmode precision. */
8822 real_convert (&ext_80387_constants_table[i],
8823 XFmode, &ext_80387_constants_table[i]);
8826 ext_80387_constants_init = 1;
8829 /* Return non-zero if the constant is something that
8830 can be loaded with a special instruction. */
8833 standard_80387_constant_p (rtx x)
8835 enum machine_mode mode = GET_MODE (x);
8837 REAL_VALUE_TYPE r;
8839 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8840 return -1;
8842 if (x == CONST0_RTX (mode))
8843 return 1;
8844 if (x == CONST1_RTX (mode))
8845 return 2;
8847 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8849 /* For XFmode constants, try to find a special 80387 instruction when
8850 optimizing for size or on those CPUs that benefit from them. */
8851 if (mode == XFmode
8852 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8854 int i;
8856 if (! ext_80387_constants_init)
8857 init_ext_80387_constants ();
8859 for (i = 0; i < 5; i++)
8860 if (real_identical (&r, &ext_80387_constants_table[i]))
8861 return i + 3;
8864 /* Load of the constant -0.0 or -1.0 will be split as
8865 fldz;fchs or fld1;fchs sequence. */
8866 if (real_isnegzero (&r))
8867 return 8;
8868 if (real_identical (&r, &dconstm1))
8869 return 9;
8871 return 0;
8874 /* Return the opcode of the special instruction to be used to load
8875 the constant X. */
8877 const char *
8878 standard_80387_constant_opcode (rtx x)
8880 switch (standard_80387_constant_p (x))
8882 case 1:
8883 return "fldz";
8884 case 2:
8885 return "fld1";
8886 case 3:
8887 return "fldlg2";
8888 case 4:
8889 return "fldln2";
8890 case 5:
8891 return "fldl2e";
8892 case 6:
8893 return "fldl2t";
8894 case 7:
8895 return "fldpi";
8896 case 8:
8897 case 9:
8898 return "#";
8899 default:
8900 gcc_unreachable ();
8904 /* Return the CONST_DOUBLE representing the 80387 constant that is
8905 loaded by the specified special instruction. The argument IDX
8906 matches the return value from standard_80387_constant_p. */
8909 standard_80387_constant_rtx (int idx)
8911 int i;
8913 if (! ext_80387_constants_init)
8914 init_ext_80387_constants ();
8916 switch (idx)
8918 case 3:
8919 case 4:
8920 case 5:
8921 case 6:
8922 case 7:
8923 i = idx - 3;
8924 break;
8926 default:
8927 gcc_unreachable ();
8930 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8931 XFmode);
8934 /* Return 1 if X is all 0s and 2 if x is all 1s
8935 in supported SSE/AVX vector mode. */
8938 standard_sse_constant_p (rtx x)
8940 enum machine_mode mode = GET_MODE (x);
8942 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8943 return 1;
8944 if (vector_all_ones_operand (x, mode))
8945 switch (mode)
8947 case V16QImode:
8948 case V8HImode:
8949 case V4SImode:
8950 case V2DImode:
8951 if (TARGET_SSE2)
8952 return 2;
8953 case V32QImode:
8954 case V16HImode:
8955 case V8SImode:
8956 case V4DImode:
8957 if (TARGET_AVX2)
8958 return 2;
8959 case V64QImode:
8960 case V32HImode:
8961 case V16SImode:
8962 case V8DImode:
8963 if (TARGET_AVX512F)
8964 return 2;
8965 default:
8966 break;
8969 return 0;
8972 /* Return the opcode of the special instruction to be used to load
8973 the constant X. */
8975 const char *
8976 standard_sse_constant_opcode (rtx insn, rtx x)
8978 switch (standard_sse_constant_p (x))
8980 case 1:
8981 switch (get_attr_mode (insn))
8983 case MODE_XI:
8984 case MODE_V16SF:
8985 return "vpxord\t%g0, %g0, %g0";
8986 case MODE_V8DF:
8987 return "vpxorq\t%g0, %g0, %g0";
8988 case MODE_TI:
8989 return "%vpxor\t%0, %d0";
8990 case MODE_V2DF:
8991 return "%vxorpd\t%0, %d0";
8992 case MODE_V4SF:
8993 return "%vxorps\t%0, %d0";
8995 case MODE_OI:
8996 return "vpxor\t%x0, %x0, %x0";
8997 case MODE_V4DF:
8998 return "vxorpd\t%x0, %x0, %x0";
8999 case MODE_V8SF:
9000 return "vxorps\t%x0, %x0, %x0";
9002 default:
9003 break;
9006 case 2:
9007 if (get_attr_mode (insn) == MODE_XI
9008 || get_attr_mode (insn) == MODE_V8DF
9009 || get_attr_mode (insn) == MODE_V16SF)
9010 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9011 if (TARGET_AVX)
9012 return "vpcmpeqd\t%0, %0, %0";
9013 else
9014 return "pcmpeqd\t%0, %0";
9016 default:
9017 break;
9019 gcc_unreachable ();
9022 /* Returns true if OP contains a symbol reference */
9024 bool
9025 symbolic_reference_mentioned_p (rtx op)
9027 const char *fmt;
9028 int i;
9030 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9031 return true;
9033 fmt = GET_RTX_FORMAT (GET_CODE (op));
9034 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9036 if (fmt[i] == 'E')
9038 int j;
9040 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9041 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9042 return true;
9045 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9046 return true;
9049 return false;
9052 /* Return true if it is appropriate to emit `ret' instructions in the
9053 body of a function. Do this only if the epilogue is simple, needing a
9054 couple of insns. Prior to reloading, we can't tell how many registers
9055 must be saved, so return false then. Return false if there is no frame
9056 marker to de-allocate. */
9058 bool
9059 ix86_can_use_return_insn_p (void)
9061 struct ix86_frame frame;
9063 if (! reload_completed || frame_pointer_needed)
9064 return 0;
9066 /* Don't allow more than 32k pop, since that's all we can do
9067 with one instruction. */
9068 if (crtl->args.pops_args && crtl->args.size >= 32768)
9069 return 0;
9071 ix86_compute_frame_layout (&frame);
9072 return (frame.stack_pointer_offset == UNITS_PER_WORD
9073 && (frame.nregs + frame.nsseregs) == 0);
9076 /* Value should be nonzero if functions must have frame pointers.
9077 Zero means the frame pointer need not be set up (and parms may
9078 be accessed via the stack pointer) in functions that seem suitable. */
9080 static bool
9081 ix86_frame_pointer_required (void)
9083 /* If we accessed previous frames, then the generated code expects
9084 to be able to access the saved ebp value in our frame. */
9085 if (cfun->machine->accesses_prev_frame)
9086 return true;
9088 /* Several x86 os'es need a frame pointer for other reasons,
9089 usually pertaining to setjmp. */
9090 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9091 return true;
9093 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9094 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9095 return true;
9097 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9098 allocation is 4GB. */
9099 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9100 return true;
9102 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9103 turns off the frame pointer by default. Turn it back on now if
9104 we've not got a leaf function. */
9105 if (TARGET_OMIT_LEAF_FRAME_POINTER
9106 && (!crtl->is_leaf
9107 || ix86_current_function_calls_tls_descriptor))
9108 return true;
9110 if (crtl->profile && !flag_fentry)
9111 return true;
9113 return false;
9116 /* Record that the current function accesses previous call frames. */
9118 void
9119 ix86_setup_frame_addresses (void)
9121 cfun->machine->accesses_prev_frame = 1;
9124 #ifndef USE_HIDDEN_LINKONCE
9125 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9126 # define USE_HIDDEN_LINKONCE 1
9127 # else
9128 # define USE_HIDDEN_LINKONCE 0
9129 # endif
9130 #endif
9132 static int pic_labels_used;
9134 /* Fills in the label name that should be used for a pc thunk for
9135 the given register. */
9137 static void
9138 get_pc_thunk_name (char name[32], unsigned int regno)
9140 gcc_assert (!TARGET_64BIT);
9142 if (USE_HIDDEN_LINKONCE)
9143 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9144 else
9145 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9149 /* This function generates code for -fpic that loads %ebx with
9150 the return address of the caller and then returns. */
9152 static void
9153 ix86_code_end (void)
9155 rtx xops[2];
9156 int regno;
9158 for (regno = AX_REG; regno <= SP_REG; regno++)
9160 char name[32];
9161 tree decl;
9163 if (!(pic_labels_used & (1 << regno)))
9164 continue;
9166 get_pc_thunk_name (name, regno);
9168 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9169 get_identifier (name),
9170 build_function_type_list (void_type_node, NULL_TREE));
9171 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9172 NULL_TREE, void_type_node);
9173 TREE_PUBLIC (decl) = 1;
9174 TREE_STATIC (decl) = 1;
9175 DECL_IGNORED_P (decl) = 1;
9177 #if TARGET_MACHO
9178 if (TARGET_MACHO)
9180 switch_to_section (darwin_sections[text_coal_section]);
9181 fputs ("\t.weak_definition\t", asm_out_file);
9182 assemble_name (asm_out_file, name);
9183 fputs ("\n\t.private_extern\t", asm_out_file);
9184 assemble_name (asm_out_file, name);
9185 putc ('\n', asm_out_file);
9186 ASM_OUTPUT_LABEL (asm_out_file, name);
9187 DECL_WEAK (decl) = 1;
9189 else
9190 #endif
9191 if (USE_HIDDEN_LINKONCE)
9193 cgraph_create_node (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9195 targetm.asm_out.unique_section (decl, 0);
9196 switch_to_section (get_named_section (decl, NULL, 0));
9198 targetm.asm_out.globalize_label (asm_out_file, name);
9199 fputs ("\t.hidden\t", asm_out_file);
9200 assemble_name (asm_out_file, name);
9201 putc ('\n', asm_out_file);
9202 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9204 else
9206 switch_to_section (text_section);
9207 ASM_OUTPUT_LABEL (asm_out_file, name);
9210 DECL_INITIAL (decl) = make_node (BLOCK);
9211 current_function_decl = decl;
9212 init_function_start (decl);
9213 first_function_block_is_cold = false;
9214 /* Make sure unwind info is emitted for the thunk if needed. */
9215 final_start_function (emit_barrier (), asm_out_file, 1);
9217 /* Pad stack IP move with 4 instructions (two NOPs count
9218 as one instruction). */
9219 if (TARGET_PAD_SHORT_FUNCTION)
9221 int i = 8;
9223 while (i--)
9224 fputs ("\tnop\n", asm_out_file);
9227 xops[0] = gen_rtx_REG (Pmode, regno);
9228 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9229 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9230 fputs ("\tret\n", asm_out_file);
9231 final_end_function ();
9232 init_insn_lengths ();
9233 free_after_compilation (cfun);
9234 set_cfun (NULL);
9235 current_function_decl = NULL;
9238 if (flag_split_stack)
9239 file_end_indicate_split_stack ();
9242 /* Emit code for the SET_GOT patterns. */
9244 const char *
9245 output_set_got (rtx dest, rtx label)
9247 rtx xops[3];
9249 xops[0] = dest;
9251 if (TARGET_VXWORKS_RTP && flag_pic)
9253 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9254 xops[2] = gen_rtx_MEM (Pmode,
9255 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9256 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9258 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9259 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9260 an unadorned address. */
9261 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9262 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9263 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9264 return "";
9267 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9269 if (!flag_pic)
9271 if (TARGET_MACHO)
9272 /* We don't need a pic base, we're not producing pic. */
9273 gcc_unreachable ();
9275 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9276 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9277 targetm.asm_out.internal_label (asm_out_file, "L",
9278 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9280 else
9282 char name[32];
9283 get_pc_thunk_name (name, REGNO (dest));
9284 pic_labels_used |= 1 << REGNO (dest);
9286 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9287 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9288 output_asm_insn ("call\t%X2", xops);
9290 #if TARGET_MACHO
9291 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9292 This is what will be referenced by the Mach-O PIC subsystem. */
9293 if (machopic_should_output_picbase_label () || !label)
9294 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9296 /* When we are restoring the pic base at the site of a nonlocal label,
9297 and we decided to emit the pic base above, we will still output a
9298 local label used for calculating the correction offset (even though
9299 the offset will be 0 in that case). */
9300 if (label)
9301 targetm.asm_out.internal_label (asm_out_file, "L",
9302 CODE_LABEL_NUMBER (label));
9303 #endif
9306 if (!TARGET_MACHO)
9307 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9309 return "";
9312 /* Generate an "push" pattern for input ARG. */
9314 static rtx
9315 gen_push (rtx arg)
9317 struct machine_function *m = cfun->machine;
9319 if (m->fs.cfa_reg == stack_pointer_rtx)
9320 m->fs.cfa_offset += UNITS_PER_WORD;
9321 m->fs.sp_offset += UNITS_PER_WORD;
9323 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9324 arg = gen_rtx_REG (word_mode, REGNO (arg));
9326 return gen_rtx_SET (VOIDmode,
9327 gen_rtx_MEM (word_mode,
9328 gen_rtx_PRE_DEC (Pmode,
9329 stack_pointer_rtx)),
9330 arg);
9333 /* Generate an "pop" pattern for input ARG. */
9335 static rtx
9336 gen_pop (rtx arg)
9338 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9339 arg = gen_rtx_REG (word_mode, REGNO (arg));
9341 return gen_rtx_SET (VOIDmode,
9342 arg,
9343 gen_rtx_MEM (word_mode,
9344 gen_rtx_POST_INC (Pmode,
9345 stack_pointer_rtx)));
9348 /* Return >= 0 if there is an unused call-clobbered register available
9349 for the entire function. */
9351 static unsigned int
9352 ix86_select_alt_pic_regnum (void)
9354 if (crtl->is_leaf
9355 && !crtl->profile
9356 && !ix86_current_function_calls_tls_descriptor)
9358 int i, drap;
9359 /* Can't use the same register for both PIC and DRAP. */
9360 if (crtl->drap_reg)
9361 drap = REGNO (crtl->drap_reg);
9362 else
9363 drap = -1;
9364 for (i = 2; i >= 0; --i)
9365 if (i != drap && !df_regs_ever_live_p (i))
9366 return i;
9369 return INVALID_REGNUM;
9372 /* Return TRUE if we need to save REGNO. */
9374 static bool
9375 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9377 if (pic_offset_table_rtx
9378 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9379 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9380 || crtl->profile
9381 || crtl->calls_eh_return
9382 || crtl->uses_const_pool
9383 || cfun->has_nonlocal_label))
9384 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9386 if (crtl->calls_eh_return && maybe_eh_return)
9388 unsigned i;
9389 for (i = 0; ; i++)
9391 unsigned test = EH_RETURN_DATA_REGNO (i);
9392 if (test == INVALID_REGNUM)
9393 break;
9394 if (test == regno)
9395 return true;
9399 if (crtl->drap_reg
9400 && regno == REGNO (crtl->drap_reg)
9401 && !cfun->machine->no_drap_save_restore)
9402 return true;
9404 return (df_regs_ever_live_p (regno)
9405 && !call_used_regs[regno]
9406 && !fixed_regs[regno]
9407 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9410 /* Return number of saved general prupose registers. */
9412 static int
9413 ix86_nsaved_regs (void)
9415 int nregs = 0;
9416 int regno;
9418 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9419 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9420 nregs ++;
9421 return nregs;
9424 /* Return number of saved SSE registrers. */
9426 static int
9427 ix86_nsaved_sseregs (void)
9429 int nregs = 0;
9430 int regno;
9432 if (!TARGET_64BIT_MS_ABI)
9433 return 0;
9434 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9435 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9436 nregs ++;
9437 return nregs;
9440 /* Given FROM and TO register numbers, say whether this elimination is
9441 allowed. If stack alignment is needed, we can only replace argument
9442 pointer with hard frame pointer, or replace frame pointer with stack
9443 pointer. Otherwise, frame pointer elimination is automatically
9444 handled and all other eliminations are valid. */
9446 static bool
9447 ix86_can_eliminate (const int from, const int to)
9449 if (stack_realign_fp)
9450 return ((from == ARG_POINTER_REGNUM
9451 && to == HARD_FRAME_POINTER_REGNUM)
9452 || (from == FRAME_POINTER_REGNUM
9453 && to == STACK_POINTER_REGNUM));
9454 else
9455 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9458 /* Return the offset between two registers, one to be eliminated, and the other
9459 its replacement, at the start of a routine. */
9461 HOST_WIDE_INT
9462 ix86_initial_elimination_offset (int from, int to)
9464 struct ix86_frame frame;
9465 ix86_compute_frame_layout (&frame);
9467 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9468 return frame.hard_frame_pointer_offset;
9469 else if (from == FRAME_POINTER_REGNUM
9470 && to == HARD_FRAME_POINTER_REGNUM)
9471 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9472 else
9474 gcc_assert (to == STACK_POINTER_REGNUM);
9476 if (from == ARG_POINTER_REGNUM)
9477 return frame.stack_pointer_offset;
9479 gcc_assert (from == FRAME_POINTER_REGNUM);
9480 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9484 /* In a dynamically-aligned function, we can't know the offset from
9485 stack pointer to frame pointer, so we must ensure that setjmp
9486 eliminates fp against the hard fp (%ebp) rather than trying to
9487 index from %esp up to the top of the frame across a gap that is
9488 of unknown (at compile-time) size. */
9489 static rtx
9490 ix86_builtin_setjmp_frame_value (void)
9492 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9495 /* When using -fsplit-stack, the allocation routines set a field in
9496 the TCB to the bottom of the stack plus this much space, measured
9497 in bytes. */
9499 #define SPLIT_STACK_AVAILABLE 256
9501 /* Fill structure ix86_frame about frame of currently computed function. */
9503 static void
9504 ix86_compute_frame_layout (struct ix86_frame *frame)
9506 unsigned HOST_WIDE_INT stack_alignment_needed;
9507 HOST_WIDE_INT offset;
9508 unsigned HOST_WIDE_INT preferred_alignment;
9509 HOST_WIDE_INT size = get_frame_size ();
9510 HOST_WIDE_INT to_allocate;
9512 frame->nregs = ix86_nsaved_regs ();
9513 frame->nsseregs = ix86_nsaved_sseregs ();
9515 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9516 function prologues and leaf. */
9517 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9518 && (!crtl->is_leaf || cfun->calls_alloca != 0
9519 || ix86_current_function_calls_tls_descriptor))
9521 crtl->preferred_stack_boundary = 128;
9522 crtl->stack_alignment_needed = 128;
9524 /* preferred_stack_boundary is never updated for call
9525 expanded from tls descriptor. Update it here. We don't update it in
9526 expand stage because according to the comments before
9527 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9528 away. */
9529 else if (ix86_current_function_calls_tls_descriptor
9530 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9532 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9533 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9534 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9537 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9538 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9540 gcc_assert (!size || stack_alignment_needed);
9541 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9542 gcc_assert (preferred_alignment <= stack_alignment_needed);
9544 /* For SEH we have to limit the amount of code movement into the prologue.
9545 At present we do this via a BLOCKAGE, at which point there's very little
9546 scheduling that can be done, which means that there's very little point
9547 in doing anything except PUSHs. */
9548 if (TARGET_SEH)
9549 cfun->machine->use_fast_prologue_epilogue = false;
9551 /* During reload iteration the amount of registers saved can change.
9552 Recompute the value as needed. Do not recompute when amount of registers
9553 didn't change as reload does multiple calls to the function and does not
9554 expect the decision to change within single iteration. */
9555 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9556 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9558 int count = frame->nregs;
9559 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9561 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9563 /* The fast prologue uses move instead of push to save registers. This
9564 is significantly longer, but also executes faster as modern hardware
9565 can execute the moves in parallel, but can't do that for push/pop.
9567 Be careful about choosing what prologue to emit: When function takes
9568 many instructions to execute we may use slow version as well as in
9569 case function is known to be outside hot spot (this is known with
9570 feedback only). Weight the size of function by number of registers
9571 to save as it is cheap to use one or two push instructions but very
9572 slow to use many of them. */
9573 if (count)
9574 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9575 if (node->frequency < NODE_FREQUENCY_NORMAL
9576 || (flag_branch_probabilities
9577 && node->frequency < NODE_FREQUENCY_HOT))
9578 cfun->machine->use_fast_prologue_epilogue = false;
9579 else
9580 cfun->machine->use_fast_prologue_epilogue
9581 = !expensive_function_p (count);
9584 frame->save_regs_using_mov
9585 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9586 /* If static stack checking is enabled and done with probes,
9587 the registers need to be saved before allocating the frame. */
9588 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9590 /* Skip return address. */
9591 offset = UNITS_PER_WORD;
9593 /* Skip pushed static chain. */
9594 if (ix86_static_chain_on_stack)
9595 offset += UNITS_PER_WORD;
9597 /* Skip saved base pointer. */
9598 if (frame_pointer_needed)
9599 offset += UNITS_PER_WORD;
9600 frame->hfp_save_offset = offset;
9602 /* The traditional frame pointer location is at the top of the frame. */
9603 frame->hard_frame_pointer_offset = offset;
9605 /* Register save area */
9606 offset += frame->nregs * UNITS_PER_WORD;
9607 frame->reg_save_offset = offset;
9609 /* On SEH target, registers are pushed just before the frame pointer
9610 location. */
9611 if (TARGET_SEH)
9612 frame->hard_frame_pointer_offset = offset;
9614 /* Align and set SSE register save area. */
9615 if (frame->nsseregs)
9617 /* The only ABI that has saved SSE registers (Win64) also has a
9618 16-byte aligned default stack, and thus we don't need to be
9619 within the re-aligned local stack frame to save them. */
9620 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9621 offset = (offset + 16 - 1) & -16;
9622 offset += frame->nsseregs * 16;
9624 frame->sse_reg_save_offset = offset;
9626 /* The re-aligned stack starts here. Values before this point are not
9627 directly comparable with values below this point. In order to make
9628 sure that no value happens to be the same before and after, force
9629 the alignment computation below to add a non-zero value. */
9630 if (stack_realign_fp)
9631 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9633 /* Va-arg area */
9634 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9635 offset += frame->va_arg_size;
9637 /* Align start of frame for local function. */
9638 if (stack_realign_fp
9639 || offset != frame->sse_reg_save_offset
9640 || size != 0
9641 || !crtl->is_leaf
9642 || cfun->calls_alloca
9643 || ix86_current_function_calls_tls_descriptor)
9644 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9646 /* Frame pointer points here. */
9647 frame->frame_pointer_offset = offset;
9649 offset += size;
9651 /* Add outgoing arguments area. Can be skipped if we eliminated
9652 all the function calls as dead code.
9653 Skipping is however impossible when function calls alloca. Alloca
9654 expander assumes that last crtl->outgoing_args_size
9655 of stack frame are unused. */
9656 if (ACCUMULATE_OUTGOING_ARGS
9657 && (!crtl->is_leaf || cfun->calls_alloca
9658 || ix86_current_function_calls_tls_descriptor))
9660 offset += crtl->outgoing_args_size;
9661 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9663 else
9664 frame->outgoing_arguments_size = 0;
9666 /* Align stack boundary. Only needed if we're calling another function
9667 or using alloca. */
9668 if (!crtl->is_leaf || cfun->calls_alloca
9669 || ix86_current_function_calls_tls_descriptor)
9670 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9672 /* We've reached end of stack frame. */
9673 frame->stack_pointer_offset = offset;
9675 /* Size prologue needs to allocate. */
9676 to_allocate = offset - frame->sse_reg_save_offset;
9678 if ((!to_allocate && frame->nregs <= 1)
9679 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9680 frame->save_regs_using_mov = false;
9682 if (ix86_using_red_zone ()
9683 && crtl->sp_is_unchanging
9684 && crtl->is_leaf
9685 && !ix86_current_function_calls_tls_descriptor)
9687 frame->red_zone_size = to_allocate;
9688 if (frame->save_regs_using_mov)
9689 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9690 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9691 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9693 else
9694 frame->red_zone_size = 0;
9695 frame->stack_pointer_offset -= frame->red_zone_size;
9697 /* The SEH frame pointer location is near the bottom of the frame.
9698 This is enforced by the fact that the difference between the
9699 stack pointer and the frame pointer is limited to 240 bytes in
9700 the unwind data structure. */
9701 if (TARGET_SEH)
9703 HOST_WIDE_INT diff;
9705 /* If we can leave the frame pointer where it is, do so. Also, returns
9706 the establisher frame for __builtin_frame_address (0). */
9707 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9708 if (diff <= SEH_MAX_FRAME_SIZE
9709 && (diff > 240 || (diff & 15) != 0)
9710 && !crtl->accesses_prior_frames)
9712 /* Ideally we'd determine what portion of the local stack frame
9713 (within the constraint of the lowest 240) is most heavily used.
9714 But without that complication, simply bias the frame pointer
9715 by 128 bytes so as to maximize the amount of the local stack
9716 frame that is addressable with 8-bit offsets. */
9717 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9722 /* This is semi-inlined memory_address_length, but simplified
9723 since we know that we're always dealing with reg+offset, and
9724 to avoid having to create and discard all that rtl. */
9726 static inline int
9727 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9729 int len = 4;
9731 if (offset == 0)
9733 /* EBP and R13 cannot be encoded without an offset. */
9734 len = (regno == BP_REG || regno == R13_REG);
9736 else if (IN_RANGE (offset, -128, 127))
9737 len = 1;
9739 /* ESP and R12 must be encoded with a SIB byte. */
9740 if (regno == SP_REG || regno == R12_REG)
9741 len++;
9743 return len;
9746 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9747 The valid base registers are taken from CFUN->MACHINE->FS. */
9749 static rtx
9750 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9752 const struct machine_function *m = cfun->machine;
9753 rtx base_reg = NULL;
9754 HOST_WIDE_INT base_offset = 0;
9756 if (m->use_fast_prologue_epilogue)
9758 /* Choose the base register most likely to allow the most scheduling
9759 opportunities. Generally FP is valid throughout the function,
9760 while DRAP must be reloaded within the epilogue. But choose either
9761 over the SP due to increased encoding size. */
9763 if (m->fs.fp_valid)
9765 base_reg = hard_frame_pointer_rtx;
9766 base_offset = m->fs.fp_offset - cfa_offset;
9768 else if (m->fs.drap_valid)
9770 base_reg = crtl->drap_reg;
9771 base_offset = 0 - cfa_offset;
9773 else if (m->fs.sp_valid)
9775 base_reg = stack_pointer_rtx;
9776 base_offset = m->fs.sp_offset - cfa_offset;
9779 else
9781 HOST_WIDE_INT toffset;
9782 int len = 16, tlen;
9784 /* Choose the base register with the smallest address encoding.
9785 With a tie, choose FP > DRAP > SP. */
9786 if (m->fs.sp_valid)
9788 base_reg = stack_pointer_rtx;
9789 base_offset = m->fs.sp_offset - cfa_offset;
9790 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9792 if (m->fs.drap_valid)
9794 toffset = 0 - cfa_offset;
9795 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9796 if (tlen <= len)
9798 base_reg = crtl->drap_reg;
9799 base_offset = toffset;
9800 len = tlen;
9803 if (m->fs.fp_valid)
9805 toffset = m->fs.fp_offset - cfa_offset;
9806 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9807 if (tlen <= len)
9809 base_reg = hard_frame_pointer_rtx;
9810 base_offset = toffset;
9811 len = tlen;
9815 gcc_assert (base_reg != NULL);
9817 return plus_constant (Pmode, base_reg, base_offset);
9820 /* Emit code to save registers in the prologue. */
9822 static void
9823 ix86_emit_save_regs (void)
9825 unsigned int regno;
9826 rtx insn;
9828 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9829 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9831 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9832 RTX_FRAME_RELATED_P (insn) = 1;
9836 /* Emit a single register save at CFA - CFA_OFFSET. */
9838 static void
9839 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9840 HOST_WIDE_INT cfa_offset)
9842 struct machine_function *m = cfun->machine;
9843 rtx reg = gen_rtx_REG (mode, regno);
9844 rtx mem, addr, base, insn;
9846 addr = choose_baseaddr (cfa_offset);
9847 mem = gen_frame_mem (mode, addr);
9849 /* For SSE saves, we need to indicate the 128-bit alignment. */
9850 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9852 insn = emit_move_insn (mem, reg);
9853 RTX_FRAME_RELATED_P (insn) = 1;
9855 base = addr;
9856 if (GET_CODE (base) == PLUS)
9857 base = XEXP (base, 0);
9858 gcc_checking_assert (REG_P (base));
9860 /* When saving registers into a re-aligned local stack frame, avoid
9861 any tricky guessing by dwarf2out. */
9862 if (m->fs.realigned)
9864 gcc_checking_assert (stack_realign_drap);
9866 if (regno == REGNO (crtl->drap_reg))
9868 /* A bit of a hack. We force the DRAP register to be saved in
9869 the re-aligned stack frame, which provides us with a copy
9870 of the CFA that will last past the prologue. Install it. */
9871 gcc_checking_assert (cfun->machine->fs.fp_valid);
9872 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9873 cfun->machine->fs.fp_offset - cfa_offset);
9874 mem = gen_rtx_MEM (mode, addr);
9875 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9877 else
9879 /* The frame pointer is a stable reference within the
9880 aligned frame. Use it. */
9881 gcc_checking_assert (cfun->machine->fs.fp_valid);
9882 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9883 cfun->machine->fs.fp_offset - cfa_offset);
9884 mem = gen_rtx_MEM (mode, addr);
9885 add_reg_note (insn, REG_CFA_EXPRESSION,
9886 gen_rtx_SET (VOIDmode, mem, reg));
9890 /* The memory may not be relative to the current CFA register,
9891 which means that we may need to generate a new pattern for
9892 use by the unwind info. */
9893 else if (base != m->fs.cfa_reg)
9895 addr = plus_constant (Pmode, m->fs.cfa_reg,
9896 m->fs.cfa_offset - cfa_offset);
9897 mem = gen_rtx_MEM (mode, addr);
9898 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9902 /* Emit code to save registers using MOV insns.
9903 First register is stored at CFA - CFA_OFFSET. */
9904 static void
9905 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9907 unsigned int regno;
9909 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9910 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9912 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9913 cfa_offset -= UNITS_PER_WORD;
9917 /* Emit code to save SSE registers using MOV insns.
9918 First register is stored at CFA - CFA_OFFSET. */
9919 static void
9920 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9922 unsigned int regno;
9924 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9925 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9927 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9928 cfa_offset -= 16;
9932 static GTY(()) rtx queued_cfa_restores;
9934 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9935 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9936 Don't add the note if the previously saved value will be left untouched
9937 within stack red-zone till return, as unwinders can find the same value
9938 in the register and on the stack. */
9940 static void
9941 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9943 if (!crtl->shrink_wrapped
9944 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9945 return;
9947 if (insn)
9949 add_reg_note (insn, REG_CFA_RESTORE, reg);
9950 RTX_FRAME_RELATED_P (insn) = 1;
9952 else
9953 queued_cfa_restores
9954 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9957 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9959 static void
9960 ix86_add_queued_cfa_restore_notes (rtx insn)
9962 rtx last;
9963 if (!queued_cfa_restores)
9964 return;
9965 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9967 XEXP (last, 1) = REG_NOTES (insn);
9968 REG_NOTES (insn) = queued_cfa_restores;
9969 queued_cfa_restores = NULL_RTX;
9970 RTX_FRAME_RELATED_P (insn) = 1;
9973 /* Expand prologue or epilogue stack adjustment.
9974 The pattern exist to put a dependency on all ebp-based memory accesses.
9975 STYLE should be negative if instructions should be marked as frame related,
9976 zero if %r11 register is live and cannot be freely used and positive
9977 otherwise. */
9979 static void
9980 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9981 int style, bool set_cfa)
9983 struct machine_function *m = cfun->machine;
9984 rtx insn;
9985 bool add_frame_related_expr = false;
9987 if (Pmode == SImode)
9988 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9989 else if (x86_64_immediate_operand (offset, DImode))
9990 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9991 else
9993 rtx tmp;
9994 /* r11 is used by indirect sibcall return as well, set before the
9995 epilogue and used after the epilogue. */
9996 if (style)
9997 tmp = gen_rtx_REG (DImode, R11_REG);
9998 else
10000 gcc_assert (src != hard_frame_pointer_rtx
10001 && dest != hard_frame_pointer_rtx);
10002 tmp = hard_frame_pointer_rtx;
10004 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10005 if (style < 0)
10006 add_frame_related_expr = true;
10008 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10011 insn = emit_insn (insn);
10012 if (style >= 0)
10013 ix86_add_queued_cfa_restore_notes (insn);
10015 if (set_cfa)
10017 rtx r;
10019 gcc_assert (m->fs.cfa_reg == src);
10020 m->fs.cfa_offset += INTVAL (offset);
10021 m->fs.cfa_reg = dest;
10023 r = gen_rtx_PLUS (Pmode, src, offset);
10024 r = gen_rtx_SET (VOIDmode, dest, r);
10025 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10026 RTX_FRAME_RELATED_P (insn) = 1;
10028 else if (style < 0)
10030 RTX_FRAME_RELATED_P (insn) = 1;
10031 if (add_frame_related_expr)
10033 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10034 r = gen_rtx_SET (VOIDmode, dest, r);
10035 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10039 if (dest == stack_pointer_rtx)
10041 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10042 bool valid = m->fs.sp_valid;
10044 if (src == hard_frame_pointer_rtx)
10046 valid = m->fs.fp_valid;
10047 ooffset = m->fs.fp_offset;
10049 else if (src == crtl->drap_reg)
10051 valid = m->fs.drap_valid;
10052 ooffset = 0;
10054 else
10056 /* Else there are two possibilities: SP itself, which we set
10057 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10058 taken care of this by hand along the eh_return path. */
10059 gcc_checking_assert (src == stack_pointer_rtx
10060 || offset == const0_rtx);
10063 m->fs.sp_offset = ooffset - INTVAL (offset);
10064 m->fs.sp_valid = valid;
10068 /* Find an available register to be used as dynamic realign argument
10069 pointer regsiter. Such a register will be written in prologue and
10070 used in begin of body, so it must not be
10071 1. parameter passing register.
10072 2. GOT pointer.
10073 We reuse static-chain register if it is available. Otherwise, we
10074 use DI for i386 and R13 for x86-64. We chose R13 since it has
10075 shorter encoding.
10077 Return: the regno of chosen register. */
10079 static unsigned int
10080 find_drap_reg (void)
10082 tree decl = cfun->decl;
10084 if (TARGET_64BIT)
10086 /* Use R13 for nested function or function need static chain.
10087 Since function with tail call may use any caller-saved
10088 registers in epilogue, DRAP must not use caller-saved
10089 register in such case. */
10090 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10091 return R13_REG;
10093 return R10_REG;
10095 else
10097 /* Use DI for nested function or function need static chain.
10098 Since function with tail call may use any caller-saved
10099 registers in epilogue, DRAP must not use caller-saved
10100 register in such case. */
10101 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10102 return DI_REG;
10104 /* Reuse static chain register if it isn't used for parameter
10105 passing. */
10106 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10108 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10109 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10110 return CX_REG;
10112 return DI_REG;
10116 /* Return minimum incoming stack alignment. */
10118 static unsigned int
10119 ix86_minimum_incoming_stack_boundary (bool sibcall)
10121 unsigned int incoming_stack_boundary;
10123 /* Prefer the one specified at command line. */
10124 if (ix86_user_incoming_stack_boundary)
10125 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10126 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10127 if -mstackrealign is used, it isn't used for sibcall check and
10128 estimated stack alignment is 128bit. */
10129 else if (!sibcall
10130 && !TARGET_64BIT
10131 && ix86_force_align_arg_pointer
10132 && crtl->stack_alignment_estimated == 128)
10133 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10134 else
10135 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10137 /* Incoming stack alignment can be changed on individual functions
10138 via force_align_arg_pointer attribute. We use the smallest
10139 incoming stack boundary. */
10140 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10141 && lookup_attribute (ix86_force_align_arg_pointer_string,
10142 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10143 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10145 /* The incoming stack frame has to be aligned at least at
10146 parm_stack_boundary. */
10147 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10148 incoming_stack_boundary = crtl->parm_stack_boundary;
10150 /* Stack at entrance of main is aligned by runtime. We use the
10151 smallest incoming stack boundary. */
10152 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10153 && DECL_NAME (current_function_decl)
10154 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10155 && DECL_FILE_SCOPE_P (current_function_decl))
10156 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10158 return incoming_stack_boundary;
10161 /* Update incoming stack boundary and estimated stack alignment. */
10163 static void
10164 ix86_update_stack_boundary (void)
10166 ix86_incoming_stack_boundary
10167 = ix86_minimum_incoming_stack_boundary (false);
10169 /* x86_64 vararg needs 16byte stack alignment for register save
10170 area. */
10171 if (TARGET_64BIT
10172 && cfun->stdarg
10173 && crtl->stack_alignment_estimated < 128)
10174 crtl->stack_alignment_estimated = 128;
10177 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10178 needed or an rtx for DRAP otherwise. */
10180 static rtx
10181 ix86_get_drap_rtx (void)
10183 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10184 crtl->need_drap = true;
10186 if (stack_realign_drap)
10188 /* Assign DRAP to vDRAP and returns vDRAP */
10189 unsigned int regno = find_drap_reg ();
10190 rtx drap_vreg;
10191 rtx arg_ptr;
10192 rtx seq, insn;
10194 arg_ptr = gen_rtx_REG (Pmode, regno);
10195 crtl->drap_reg = arg_ptr;
10197 start_sequence ();
10198 drap_vreg = copy_to_reg (arg_ptr);
10199 seq = get_insns ();
10200 end_sequence ();
10202 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10203 if (!optimize)
10205 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10206 RTX_FRAME_RELATED_P (insn) = 1;
10208 return drap_vreg;
10210 else
10211 return NULL;
10214 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10216 static rtx
10217 ix86_internal_arg_pointer (void)
10219 return virtual_incoming_args_rtx;
10222 struct scratch_reg {
10223 rtx reg;
10224 bool saved;
10227 /* Return a short-lived scratch register for use on function entry.
10228 In 32-bit mode, it is valid only after the registers are saved
10229 in the prologue. This register must be released by means of
10230 release_scratch_register_on_entry once it is dead. */
10232 static void
10233 get_scratch_register_on_entry (struct scratch_reg *sr)
10235 int regno;
10237 sr->saved = false;
10239 if (TARGET_64BIT)
10241 /* We always use R11 in 64-bit mode. */
10242 regno = R11_REG;
10244 else
10246 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10247 bool fastcall_p
10248 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10249 bool thiscall_p
10250 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10251 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10252 int regparm = ix86_function_regparm (fntype, decl);
10253 int drap_regno
10254 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10256 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10257 for the static chain register. */
10258 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10259 && drap_regno != AX_REG)
10260 regno = AX_REG;
10261 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10262 for the static chain register. */
10263 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10264 regno = AX_REG;
10265 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10266 regno = DX_REG;
10267 /* ecx is the static chain register. */
10268 else if (regparm < 3 && !fastcall_p && !thiscall_p
10269 && !static_chain_p
10270 && drap_regno != CX_REG)
10271 regno = CX_REG;
10272 else if (ix86_save_reg (BX_REG, true))
10273 regno = BX_REG;
10274 /* esi is the static chain register. */
10275 else if (!(regparm == 3 && static_chain_p)
10276 && ix86_save_reg (SI_REG, true))
10277 regno = SI_REG;
10278 else if (ix86_save_reg (DI_REG, true))
10279 regno = DI_REG;
10280 else
10282 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10283 sr->saved = true;
10287 sr->reg = gen_rtx_REG (Pmode, regno);
10288 if (sr->saved)
10290 rtx insn = emit_insn (gen_push (sr->reg));
10291 RTX_FRAME_RELATED_P (insn) = 1;
10295 /* Release a scratch register obtained from the preceding function. */
10297 static void
10298 release_scratch_register_on_entry (struct scratch_reg *sr)
10300 if (sr->saved)
10302 struct machine_function *m = cfun->machine;
10303 rtx x, insn = emit_insn (gen_pop (sr->reg));
10305 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10306 RTX_FRAME_RELATED_P (insn) = 1;
10307 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10308 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10309 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10310 m->fs.sp_offset -= UNITS_PER_WORD;
10314 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10316 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10318 static void
10319 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10321 /* We skip the probe for the first interval + a small dope of 4 words and
10322 probe that many bytes past the specified size to maintain a protection
10323 area at the botton of the stack. */
10324 const int dope = 4 * UNITS_PER_WORD;
10325 rtx size_rtx = GEN_INT (size), last;
10327 /* See if we have a constant small number of probes to generate. If so,
10328 that's the easy case. The run-time loop is made up of 11 insns in the
10329 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10330 for n # of intervals. */
10331 if (size <= 5 * PROBE_INTERVAL)
10333 HOST_WIDE_INT i, adjust;
10334 bool first_probe = true;
10336 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10337 values of N from 1 until it exceeds SIZE. If only one probe is
10338 needed, this will not generate any code. Then adjust and probe
10339 to PROBE_INTERVAL + SIZE. */
10340 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10342 if (first_probe)
10344 adjust = 2 * PROBE_INTERVAL + dope;
10345 first_probe = false;
10347 else
10348 adjust = PROBE_INTERVAL;
10350 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10351 plus_constant (Pmode, stack_pointer_rtx,
10352 -adjust)));
10353 emit_stack_probe (stack_pointer_rtx);
10356 if (first_probe)
10357 adjust = size + PROBE_INTERVAL + dope;
10358 else
10359 adjust = size + PROBE_INTERVAL - i;
10361 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10362 plus_constant (Pmode, stack_pointer_rtx,
10363 -adjust)));
10364 emit_stack_probe (stack_pointer_rtx);
10366 /* Adjust back to account for the additional first interval. */
10367 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10368 plus_constant (Pmode, stack_pointer_rtx,
10369 PROBE_INTERVAL + dope)));
10372 /* Otherwise, do the same as above, but in a loop. Note that we must be
10373 extra careful with variables wrapping around because we might be at
10374 the very top (or the very bottom) of the address space and we have
10375 to be able to handle this case properly; in particular, we use an
10376 equality test for the loop condition. */
10377 else
10379 HOST_WIDE_INT rounded_size;
10380 struct scratch_reg sr;
10382 get_scratch_register_on_entry (&sr);
10385 /* Step 1: round SIZE to the previous multiple of the interval. */
10387 rounded_size = size & -PROBE_INTERVAL;
10390 /* Step 2: compute initial and final value of the loop counter. */
10392 /* SP = SP_0 + PROBE_INTERVAL. */
10393 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10394 plus_constant (Pmode, stack_pointer_rtx,
10395 - (PROBE_INTERVAL + dope))));
10397 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10398 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10399 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10400 gen_rtx_PLUS (Pmode, sr.reg,
10401 stack_pointer_rtx)));
10404 /* Step 3: the loop
10406 while (SP != LAST_ADDR)
10408 SP = SP + PROBE_INTERVAL
10409 probe at SP
10412 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10413 values of N from 1 until it is equal to ROUNDED_SIZE. */
10415 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10418 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10419 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10421 if (size != rounded_size)
10423 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10424 plus_constant (Pmode, stack_pointer_rtx,
10425 rounded_size - size)));
10426 emit_stack_probe (stack_pointer_rtx);
10429 /* Adjust back to account for the additional first interval. */
10430 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10431 plus_constant (Pmode, stack_pointer_rtx,
10432 PROBE_INTERVAL + dope)));
10434 release_scratch_register_on_entry (&sr);
10437 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10439 /* Even if the stack pointer isn't the CFA register, we need to correctly
10440 describe the adjustments made to it, in particular differentiate the
10441 frame-related ones from the frame-unrelated ones. */
10442 if (size > 0)
10444 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10445 XVECEXP (expr, 0, 0)
10446 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10447 plus_constant (Pmode, stack_pointer_rtx, -size));
10448 XVECEXP (expr, 0, 1)
10449 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10450 plus_constant (Pmode, stack_pointer_rtx,
10451 PROBE_INTERVAL + dope + size));
10452 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10453 RTX_FRAME_RELATED_P (last) = 1;
10455 cfun->machine->fs.sp_offset += size;
10458 /* Make sure nothing is scheduled before we are done. */
10459 emit_insn (gen_blockage ());
10462 /* Adjust the stack pointer up to REG while probing it. */
10464 const char *
10465 output_adjust_stack_and_probe (rtx reg)
10467 static int labelno = 0;
10468 char loop_lab[32], end_lab[32];
10469 rtx xops[2];
10471 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10472 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10474 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10476 /* Jump to END_LAB if SP == LAST_ADDR. */
10477 xops[0] = stack_pointer_rtx;
10478 xops[1] = reg;
10479 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10480 fputs ("\tje\t", asm_out_file);
10481 assemble_name_raw (asm_out_file, end_lab);
10482 fputc ('\n', asm_out_file);
10484 /* SP = SP + PROBE_INTERVAL. */
10485 xops[1] = GEN_INT (PROBE_INTERVAL);
10486 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10488 /* Probe at SP. */
10489 xops[1] = const0_rtx;
10490 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10492 fprintf (asm_out_file, "\tjmp\t");
10493 assemble_name_raw (asm_out_file, loop_lab);
10494 fputc ('\n', asm_out_file);
10496 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10498 return "";
10501 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10502 inclusive. These are offsets from the current stack pointer. */
10504 static void
10505 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10507 /* See if we have a constant small number of probes to generate. If so,
10508 that's the easy case. The run-time loop is made up of 7 insns in the
10509 generic case while the compile-time loop is made up of n insns for n #
10510 of intervals. */
10511 if (size <= 7 * PROBE_INTERVAL)
10513 HOST_WIDE_INT i;
10515 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10516 it exceeds SIZE. If only one probe is needed, this will not
10517 generate any code. Then probe at FIRST + SIZE. */
10518 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10519 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10520 -(first + i)));
10522 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10523 -(first + size)));
10526 /* Otherwise, do the same as above, but in a loop. Note that we must be
10527 extra careful with variables wrapping around because we might be at
10528 the very top (or the very bottom) of the address space and we have
10529 to be able to handle this case properly; in particular, we use an
10530 equality test for the loop condition. */
10531 else
10533 HOST_WIDE_INT rounded_size, last;
10534 struct scratch_reg sr;
10536 get_scratch_register_on_entry (&sr);
10539 /* Step 1: round SIZE to the previous multiple of the interval. */
10541 rounded_size = size & -PROBE_INTERVAL;
10544 /* Step 2: compute initial and final value of the loop counter. */
10546 /* TEST_OFFSET = FIRST. */
10547 emit_move_insn (sr.reg, GEN_INT (-first));
10549 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10550 last = first + rounded_size;
10553 /* Step 3: the loop
10555 while (TEST_ADDR != LAST_ADDR)
10557 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10558 probe at TEST_ADDR
10561 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10562 until it is equal to ROUNDED_SIZE. */
10564 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10567 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10568 that SIZE is equal to ROUNDED_SIZE. */
10570 if (size != rounded_size)
10571 emit_stack_probe (plus_constant (Pmode,
10572 gen_rtx_PLUS (Pmode,
10573 stack_pointer_rtx,
10574 sr.reg),
10575 rounded_size - size));
10577 release_scratch_register_on_entry (&sr);
10580 /* Make sure nothing is scheduled before we are done. */
10581 emit_insn (gen_blockage ());
10584 /* Probe a range of stack addresses from REG to END, inclusive. These are
10585 offsets from the current stack pointer. */
10587 const char *
10588 output_probe_stack_range (rtx reg, rtx end)
10590 static int labelno = 0;
10591 char loop_lab[32], end_lab[32];
10592 rtx xops[3];
10594 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10595 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10597 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10599 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10600 xops[0] = reg;
10601 xops[1] = end;
10602 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10603 fputs ("\tje\t", asm_out_file);
10604 assemble_name_raw (asm_out_file, end_lab);
10605 fputc ('\n', asm_out_file);
10607 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10608 xops[1] = GEN_INT (PROBE_INTERVAL);
10609 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10611 /* Probe at TEST_ADDR. */
10612 xops[0] = stack_pointer_rtx;
10613 xops[1] = reg;
10614 xops[2] = const0_rtx;
10615 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10617 fprintf (asm_out_file, "\tjmp\t");
10618 assemble_name_raw (asm_out_file, loop_lab);
10619 fputc ('\n', asm_out_file);
10621 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10623 return "";
10626 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10627 to be generated in correct form. */
10628 static void
10629 ix86_finalize_stack_realign_flags (void)
10631 /* Check if stack realign is really needed after reload, and
10632 stores result in cfun */
10633 unsigned int incoming_stack_boundary
10634 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10635 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10636 unsigned int stack_realign = (incoming_stack_boundary
10637 < (crtl->is_leaf
10638 ? crtl->max_used_stack_slot_alignment
10639 : crtl->stack_alignment_needed));
10641 if (crtl->stack_realign_finalized)
10643 /* After stack_realign_needed is finalized, we can't no longer
10644 change it. */
10645 gcc_assert (crtl->stack_realign_needed == stack_realign);
10646 return;
10649 /* If the only reason for frame_pointer_needed is that we conservatively
10650 assumed stack realignment might be needed, but in the end nothing that
10651 needed the stack alignment had been spilled, clear frame_pointer_needed
10652 and say we don't need stack realignment. */
10653 if (stack_realign
10654 && frame_pointer_needed
10655 && crtl->is_leaf
10656 && flag_omit_frame_pointer
10657 && crtl->sp_is_unchanging
10658 && !ix86_current_function_calls_tls_descriptor
10659 && !crtl->accesses_prior_frames
10660 && !cfun->calls_alloca
10661 && !crtl->calls_eh_return
10662 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10663 && !ix86_frame_pointer_required ()
10664 && get_frame_size () == 0
10665 && ix86_nsaved_sseregs () == 0
10666 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10668 HARD_REG_SET set_up_by_prologue, prologue_used;
10669 basic_block bb;
10671 CLEAR_HARD_REG_SET (prologue_used);
10672 CLEAR_HARD_REG_SET (set_up_by_prologue);
10673 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10674 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10675 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10676 HARD_FRAME_POINTER_REGNUM);
10677 FOR_EACH_BB_FN (bb, cfun)
10679 rtx insn;
10680 FOR_BB_INSNS (bb, insn)
10681 if (NONDEBUG_INSN_P (insn)
10682 && requires_stack_frame_p (insn, prologue_used,
10683 set_up_by_prologue))
10685 crtl->stack_realign_needed = stack_realign;
10686 crtl->stack_realign_finalized = true;
10687 return;
10691 /* If drap has been set, but it actually isn't live at the start
10692 of the function, there is no reason to set it up. */
10693 if (crtl->drap_reg)
10695 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10696 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10698 crtl->drap_reg = NULL_RTX;
10699 crtl->need_drap = false;
10702 else
10703 cfun->machine->no_drap_save_restore = true;
10705 frame_pointer_needed = false;
10706 stack_realign = false;
10707 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10708 crtl->stack_alignment_needed = incoming_stack_boundary;
10709 crtl->stack_alignment_estimated = incoming_stack_boundary;
10710 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10711 crtl->preferred_stack_boundary = incoming_stack_boundary;
10712 df_finish_pass (true);
10713 df_scan_alloc (NULL);
10714 df_scan_blocks ();
10715 df_compute_regs_ever_live (true);
10716 df_analyze ();
10719 crtl->stack_realign_needed = stack_realign;
10720 crtl->stack_realign_finalized = true;
10723 /* Expand the prologue into a bunch of separate insns. */
10725 void
10726 ix86_expand_prologue (void)
10728 struct machine_function *m = cfun->machine;
10729 rtx insn, t;
10730 bool pic_reg_used;
10731 struct ix86_frame frame;
10732 HOST_WIDE_INT allocate;
10733 bool int_registers_saved;
10734 bool sse_registers_saved;
10736 ix86_finalize_stack_realign_flags ();
10738 /* DRAP should not coexist with stack_realign_fp */
10739 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10741 memset (&m->fs, 0, sizeof (m->fs));
10743 /* Initialize CFA state for before the prologue. */
10744 m->fs.cfa_reg = stack_pointer_rtx;
10745 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10747 /* Track SP offset to the CFA. We continue tracking this after we've
10748 swapped the CFA register away from SP. In the case of re-alignment
10749 this is fudged; we're interested to offsets within the local frame. */
10750 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10751 m->fs.sp_valid = true;
10753 ix86_compute_frame_layout (&frame);
10755 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10757 /* We should have already generated an error for any use of
10758 ms_hook on a nested function. */
10759 gcc_checking_assert (!ix86_static_chain_on_stack);
10761 /* Check if profiling is active and we shall use profiling before
10762 prologue variant. If so sorry. */
10763 if (crtl->profile && flag_fentry != 0)
10764 sorry ("ms_hook_prologue attribute isn%'t compatible "
10765 "with -mfentry for 32-bit");
10767 /* In ix86_asm_output_function_label we emitted:
10768 8b ff movl.s %edi,%edi
10769 55 push %ebp
10770 8b ec movl.s %esp,%ebp
10772 This matches the hookable function prologue in Win32 API
10773 functions in Microsoft Windows XP Service Pack 2 and newer.
10774 Wine uses this to enable Windows apps to hook the Win32 API
10775 functions provided by Wine.
10777 What that means is that we've already set up the frame pointer. */
10779 if (frame_pointer_needed
10780 && !(crtl->drap_reg && crtl->stack_realign_needed))
10782 rtx push, mov;
10784 /* We've decided to use the frame pointer already set up.
10785 Describe this to the unwinder by pretending that both
10786 push and mov insns happen right here.
10788 Putting the unwind info here at the end of the ms_hook
10789 is done so that we can make absolutely certain we get
10790 the required byte sequence at the start of the function,
10791 rather than relying on an assembler that can produce
10792 the exact encoding required.
10794 However it does mean (in the unpatched case) that we have
10795 a 1 insn window where the asynchronous unwind info is
10796 incorrect. However, if we placed the unwind info at
10797 its correct location we would have incorrect unwind info
10798 in the patched case. Which is probably all moot since
10799 I don't expect Wine generates dwarf2 unwind info for the
10800 system libraries that use this feature. */
10802 insn = emit_insn (gen_blockage ());
10804 push = gen_push (hard_frame_pointer_rtx);
10805 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10806 stack_pointer_rtx);
10807 RTX_FRAME_RELATED_P (push) = 1;
10808 RTX_FRAME_RELATED_P (mov) = 1;
10810 RTX_FRAME_RELATED_P (insn) = 1;
10811 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10812 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10814 /* Note that gen_push incremented m->fs.cfa_offset, even
10815 though we didn't emit the push insn here. */
10816 m->fs.cfa_reg = hard_frame_pointer_rtx;
10817 m->fs.fp_offset = m->fs.cfa_offset;
10818 m->fs.fp_valid = true;
10820 else
10822 /* The frame pointer is not needed so pop %ebp again.
10823 This leaves us with a pristine state. */
10824 emit_insn (gen_pop (hard_frame_pointer_rtx));
10828 /* The first insn of a function that accepts its static chain on the
10829 stack is to push the register that would be filled in by a direct
10830 call. This insn will be skipped by the trampoline. */
10831 else if (ix86_static_chain_on_stack)
10833 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10834 emit_insn (gen_blockage ());
10836 /* We don't want to interpret this push insn as a register save,
10837 only as a stack adjustment. The real copy of the register as
10838 a save will be done later, if needed. */
10839 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10840 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10841 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10842 RTX_FRAME_RELATED_P (insn) = 1;
10845 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10846 of DRAP is needed and stack realignment is really needed after reload */
10847 if (stack_realign_drap)
10849 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10851 /* Only need to push parameter pointer reg if it is caller saved. */
10852 if (!call_used_regs[REGNO (crtl->drap_reg)])
10854 /* Push arg pointer reg */
10855 insn = emit_insn (gen_push (crtl->drap_reg));
10856 RTX_FRAME_RELATED_P (insn) = 1;
10859 /* Grab the argument pointer. */
10860 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10861 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10862 RTX_FRAME_RELATED_P (insn) = 1;
10863 m->fs.cfa_reg = crtl->drap_reg;
10864 m->fs.cfa_offset = 0;
10866 /* Align the stack. */
10867 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10868 stack_pointer_rtx,
10869 GEN_INT (-align_bytes)));
10870 RTX_FRAME_RELATED_P (insn) = 1;
10872 /* Replicate the return address on the stack so that return
10873 address can be reached via (argp - 1) slot. This is needed
10874 to implement macro RETURN_ADDR_RTX and intrinsic function
10875 expand_builtin_return_addr etc. */
10876 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10877 t = gen_frame_mem (word_mode, t);
10878 insn = emit_insn (gen_push (t));
10879 RTX_FRAME_RELATED_P (insn) = 1;
10881 /* For the purposes of frame and register save area addressing,
10882 we've started over with a new frame. */
10883 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10884 m->fs.realigned = true;
10887 int_registers_saved = (frame.nregs == 0);
10888 sse_registers_saved = (frame.nsseregs == 0);
10890 if (frame_pointer_needed && !m->fs.fp_valid)
10892 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10893 slower on all targets. Also sdb doesn't like it. */
10894 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10895 RTX_FRAME_RELATED_P (insn) = 1;
10897 /* Push registers now, before setting the frame pointer
10898 on SEH target. */
10899 if (!int_registers_saved
10900 && TARGET_SEH
10901 && !frame.save_regs_using_mov)
10903 ix86_emit_save_regs ();
10904 int_registers_saved = true;
10905 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10908 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10910 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10911 RTX_FRAME_RELATED_P (insn) = 1;
10913 if (m->fs.cfa_reg == stack_pointer_rtx)
10914 m->fs.cfa_reg = hard_frame_pointer_rtx;
10915 m->fs.fp_offset = m->fs.sp_offset;
10916 m->fs.fp_valid = true;
10920 if (!int_registers_saved)
10922 /* If saving registers via PUSH, do so now. */
10923 if (!frame.save_regs_using_mov)
10925 ix86_emit_save_regs ();
10926 int_registers_saved = true;
10927 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10930 /* When using red zone we may start register saving before allocating
10931 the stack frame saving one cycle of the prologue. However, avoid
10932 doing this if we have to probe the stack; at least on x86_64 the
10933 stack probe can turn into a call that clobbers a red zone location. */
10934 else if (ix86_using_red_zone ()
10935 && (! TARGET_STACK_PROBE
10936 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10938 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10939 int_registers_saved = true;
10943 if (stack_realign_fp)
10945 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10946 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10948 /* The computation of the size of the re-aligned stack frame means
10949 that we must allocate the size of the register save area before
10950 performing the actual alignment. Otherwise we cannot guarantee
10951 that there's enough storage above the realignment point. */
10952 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10953 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10954 GEN_INT (m->fs.sp_offset
10955 - frame.sse_reg_save_offset),
10956 -1, false);
10958 /* Align the stack. */
10959 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10960 stack_pointer_rtx,
10961 GEN_INT (-align_bytes)));
10963 /* For the purposes of register save area addressing, the stack
10964 pointer is no longer valid. As for the value of sp_offset,
10965 see ix86_compute_frame_layout, which we need to match in order
10966 to pass verification of stack_pointer_offset at the end. */
10967 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10968 m->fs.sp_valid = false;
10971 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10973 if (flag_stack_usage_info)
10975 /* We start to count from ARG_POINTER. */
10976 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10978 /* If it was realigned, take into account the fake frame. */
10979 if (stack_realign_drap)
10981 if (ix86_static_chain_on_stack)
10982 stack_size += UNITS_PER_WORD;
10984 if (!call_used_regs[REGNO (crtl->drap_reg)])
10985 stack_size += UNITS_PER_WORD;
10987 /* This over-estimates by 1 minimal-stack-alignment-unit but
10988 mitigates that by counting in the new return address slot. */
10989 current_function_dynamic_stack_size
10990 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10993 current_function_static_stack_size = stack_size;
10996 /* On SEH target with very large frame size, allocate an area to save
10997 SSE registers (as the very large allocation won't be described). */
10998 if (TARGET_SEH
10999 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11000 && !sse_registers_saved)
11002 HOST_WIDE_INT sse_size =
11003 frame.sse_reg_save_offset - frame.reg_save_offset;
11005 gcc_assert (int_registers_saved);
11007 /* No need to do stack checking as the area will be immediately
11008 written. */
11009 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11010 GEN_INT (-sse_size), -1,
11011 m->fs.cfa_reg == stack_pointer_rtx);
11012 allocate -= sse_size;
11013 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11014 sse_registers_saved = true;
11017 /* The stack has already been decremented by the instruction calling us
11018 so probe if the size is non-negative to preserve the protection area. */
11019 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11021 /* We expect the registers to be saved when probes are used. */
11022 gcc_assert (int_registers_saved);
11024 if (STACK_CHECK_MOVING_SP)
11026 if (!(crtl->is_leaf && !cfun->calls_alloca
11027 && allocate <= PROBE_INTERVAL))
11029 ix86_adjust_stack_and_probe (allocate);
11030 allocate = 0;
11033 else
11035 HOST_WIDE_INT size = allocate;
11037 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11038 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11040 if (TARGET_STACK_PROBE)
11042 if (crtl->is_leaf && !cfun->calls_alloca)
11044 if (size > PROBE_INTERVAL)
11045 ix86_emit_probe_stack_range (0, size);
11047 else
11048 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11050 else
11052 if (crtl->is_leaf && !cfun->calls_alloca)
11054 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11055 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11056 size - STACK_CHECK_PROTECT);
11058 else
11059 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11064 if (allocate == 0)
11066 else if (!ix86_target_stack_probe ()
11067 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11069 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11070 GEN_INT (-allocate), -1,
11071 m->fs.cfa_reg == stack_pointer_rtx);
11073 else
11075 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11076 rtx r10 = NULL;
11077 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11078 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11079 bool eax_live = ix86_eax_live_at_start_p ();
11080 bool r10_live = false;
11082 if (TARGET_64BIT)
11083 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11085 if (eax_live)
11087 insn = emit_insn (gen_push (eax));
11088 allocate -= UNITS_PER_WORD;
11089 /* Note that SEH directives need to continue tracking the stack
11090 pointer even after the frame pointer has been set up. */
11091 if (sp_is_cfa_reg || TARGET_SEH)
11093 if (sp_is_cfa_reg)
11094 m->fs.cfa_offset += UNITS_PER_WORD;
11095 RTX_FRAME_RELATED_P (insn) = 1;
11099 if (r10_live)
11101 r10 = gen_rtx_REG (Pmode, R10_REG);
11102 insn = emit_insn (gen_push (r10));
11103 allocate -= UNITS_PER_WORD;
11104 if (sp_is_cfa_reg || TARGET_SEH)
11106 if (sp_is_cfa_reg)
11107 m->fs.cfa_offset += UNITS_PER_WORD;
11108 RTX_FRAME_RELATED_P (insn) = 1;
11112 emit_move_insn (eax, GEN_INT (allocate));
11113 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11115 /* Use the fact that AX still contains ALLOCATE. */
11116 adjust_stack_insn = (Pmode == DImode
11117 ? gen_pro_epilogue_adjust_stack_di_sub
11118 : gen_pro_epilogue_adjust_stack_si_sub);
11120 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11121 stack_pointer_rtx, eax));
11123 if (sp_is_cfa_reg || TARGET_SEH)
11125 if (sp_is_cfa_reg)
11126 m->fs.cfa_offset += allocate;
11127 RTX_FRAME_RELATED_P (insn) = 1;
11128 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11129 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11130 plus_constant (Pmode, stack_pointer_rtx,
11131 -allocate)));
11133 m->fs.sp_offset += allocate;
11135 /* Use stack_pointer_rtx for relative addressing so that code
11136 works for realigned stack, too. */
11137 if (r10_live && eax_live)
11139 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11140 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11141 gen_frame_mem (word_mode, t));
11142 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11143 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11144 gen_frame_mem (word_mode, t));
11146 else if (eax_live || r10_live)
11148 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11149 emit_move_insn (gen_rtx_REG (word_mode,
11150 (eax_live ? AX_REG : R10_REG)),
11151 gen_frame_mem (word_mode, t));
11154 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11156 /* If we havn't already set up the frame pointer, do so now. */
11157 if (frame_pointer_needed && !m->fs.fp_valid)
11159 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11160 GEN_INT (frame.stack_pointer_offset
11161 - frame.hard_frame_pointer_offset));
11162 insn = emit_insn (insn);
11163 RTX_FRAME_RELATED_P (insn) = 1;
11164 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11166 if (m->fs.cfa_reg == stack_pointer_rtx)
11167 m->fs.cfa_reg = hard_frame_pointer_rtx;
11168 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11169 m->fs.fp_valid = true;
11172 if (!int_registers_saved)
11173 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11174 if (!sse_registers_saved)
11175 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11177 pic_reg_used = false;
11178 /* We don't use pic-register for pe-coff target. */
11179 if (pic_offset_table_rtx
11180 && !TARGET_PECOFF
11181 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11182 || crtl->profile))
11184 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11186 if (alt_pic_reg_used != INVALID_REGNUM)
11187 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11189 pic_reg_used = true;
11192 if (pic_reg_used)
11194 if (TARGET_64BIT)
11196 if (ix86_cmodel == CM_LARGE_PIC)
11198 rtx label, tmp_reg;
11200 gcc_assert (Pmode == DImode);
11201 label = gen_label_rtx ();
11202 emit_label (label);
11203 LABEL_PRESERVE_P (label) = 1;
11204 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11205 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11206 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11207 label));
11208 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11209 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11210 pic_offset_table_rtx, tmp_reg));
11212 else
11213 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11215 else
11217 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11218 RTX_FRAME_RELATED_P (insn) = 1;
11219 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11223 /* In the pic_reg_used case, make sure that the got load isn't deleted
11224 when mcount needs it. Blockage to avoid call movement across mcount
11225 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11226 note. */
11227 if (crtl->profile && !flag_fentry && pic_reg_used)
11228 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11230 if (crtl->drap_reg && !crtl->stack_realign_needed)
11232 /* vDRAP is setup but after reload it turns out stack realign
11233 isn't necessary, here we will emit prologue to setup DRAP
11234 without stack realign adjustment */
11235 t = choose_baseaddr (0);
11236 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11239 /* Prevent instructions from being scheduled into register save push
11240 sequence when access to the redzone area is done through frame pointer.
11241 The offset between the frame pointer and the stack pointer is calculated
11242 relative to the value of the stack pointer at the end of the function
11243 prologue, and moving instructions that access redzone area via frame
11244 pointer inside push sequence violates this assumption. */
11245 if (frame_pointer_needed && frame.red_zone_size)
11246 emit_insn (gen_memory_blockage ());
11248 /* Emit cld instruction if stringops are used in the function. */
11249 if (TARGET_CLD && ix86_current_function_needs_cld)
11250 emit_insn (gen_cld ());
11252 /* SEH requires that the prologue end within 256 bytes of the start of
11253 the function. Prevent instruction schedules that would extend that.
11254 Further, prevent alloca modifications to the stack pointer from being
11255 combined with prologue modifications. */
11256 if (TARGET_SEH)
11257 emit_insn (gen_prologue_use (stack_pointer_rtx));
11260 /* Emit code to restore REG using a POP insn. */
11262 static void
11263 ix86_emit_restore_reg_using_pop (rtx reg)
11265 struct machine_function *m = cfun->machine;
11266 rtx insn = emit_insn (gen_pop (reg));
11268 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11269 m->fs.sp_offset -= UNITS_PER_WORD;
11271 if (m->fs.cfa_reg == crtl->drap_reg
11272 && REGNO (reg) == REGNO (crtl->drap_reg))
11274 /* Previously we'd represented the CFA as an expression
11275 like *(%ebp - 8). We've just popped that value from
11276 the stack, which means we need to reset the CFA to
11277 the drap register. This will remain until we restore
11278 the stack pointer. */
11279 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11280 RTX_FRAME_RELATED_P (insn) = 1;
11282 /* This means that the DRAP register is valid for addressing too. */
11283 m->fs.drap_valid = true;
11284 return;
11287 if (m->fs.cfa_reg == stack_pointer_rtx)
11289 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11290 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11291 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11292 RTX_FRAME_RELATED_P (insn) = 1;
11294 m->fs.cfa_offset -= UNITS_PER_WORD;
11297 /* When the frame pointer is the CFA, and we pop it, we are
11298 swapping back to the stack pointer as the CFA. This happens
11299 for stack frames that don't allocate other data, so we assume
11300 the stack pointer is now pointing at the return address, i.e.
11301 the function entry state, which makes the offset be 1 word. */
11302 if (reg == hard_frame_pointer_rtx)
11304 m->fs.fp_valid = false;
11305 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11307 m->fs.cfa_reg = stack_pointer_rtx;
11308 m->fs.cfa_offset -= UNITS_PER_WORD;
11310 add_reg_note (insn, REG_CFA_DEF_CFA,
11311 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11312 GEN_INT (m->fs.cfa_offset)));
11313 RTX_FRAME_RELATED_P (insn) = 1;
11318 /* Emit code to restore saved registers using POP insns. */
11320 static void
11321 ix86_emit_restore_regs_using_pop (void)
11323 unsigned int regno;
11325 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11326 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11327 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11330 /* Emit code and notes for the LEAVE instruction. */
11332 static void
11333 ix86_emit_leave (void)
11335 struct machine_function *m = cfun->machine;
11336 rtx insn = emit_insn (ix86_gen_leave ());
11338 ix86_add_queued_cfa_restore_notes (insn);
11340 gcc_assert (m->fs.fp_valid);
11341 m->fs.sp_valid = true;
11342 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11343 m->fs.fp_valid = false;
11345 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11347 m->fs.cfa_reg = stack_pointer_rtx;
11348 m->fs.cfa_offset = m->fs.sp_offset;
11350 add_reg_note (insn, REG_CFA_DEF_CFA,
11351 plus_constant (Pmode, stack_pointer_rtx,
11352 m->fs.sp_offset));
11353 RTX_FRAME_RELATED_P (insn) = 1;
11355 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11356 m->fs.fp_offset);
11359 /* Emit code to restore saved registers using MOV insns.
11360 First register is restored from CFA - CFA_OFFSET. */
11361 static void
11362 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11363 bool maybe_eh_return)
11365 struct machine_function *m = cfun->machine;
11366 unsigned int regno;
11368 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11369 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11371 rtx reg = gen_rtx_REG (word_mode, regno);
11372 rtx insn, mem;
11374 mem = choose_baseaddr (cfa_offset);
11375 mem = gen_frame_mem (word_mode, mem);
11376 insn = emit_move_insn (reg, mem);
11378 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11380 /* Previously we'd represented the CFA as an expression
11381 like *(%ebp - 8). We've just popped that value from
11382 the stack, which means we need to reset the CFA to
11383 the drap register. This will remain until we restore
11384 the stack pointer. */
11385 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11386 RTX_FRAME_RELATED_P (insn) = 1;
11388 /* This means that the DRAP register is valid for addressing. */
11389 m->fs.drap_valid = true;
11391 else
11392 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11394 cfa_offset -= UNITS_PER_WORD;
11398 /* Emit code to restore saved registers using MOV insns.
11399 First register is restored from CFA - CFA_OFFSET. */
11400 static void
11401 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11402 bool maybe_eh_return)
11404 unsigned int regno;
11406 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11407 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11409 rtx reg = gen_rtx_REG (V4SFmode, regno);
11410 rtx mem;
11412 mem = choose_baseaddr (cfa_offset);
11413 mem = gen_rtx_MEM (V4SFmode, mem);
11414 set_mem_align (mem, 128);
11415 emit_move_insn (reg, mem);
11417 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11419 cfa_offset -= 16;
11423 /* Restore function stack, frame, and registers. */
11425 void
11426 ix86_expand_epilogue (int style)
11428 struct machine_function *m = cfun->machine;
11429 struct machine_frame_state frame_state_save = m->fs;
11430 struct ix86_frame frame;
11431 bool restore_regs_via_mov;
11432 bool using_drap;
11434 ix86_finalize_stack_realign_flags ();
11435 ix86_compute_frame_layout (&frame);
11437 m->fs.sp_valid = (!frame_pointer_needed
11438 || (crtl->sp_is_unchanging
11439 && !stack_realign_fp));
11440 gcc_assert (!m->fs.sp_valid
11441 || m->fs.sp_offset == frame.stack_pointer_offset);
11443 /* The FP must be valid if the frame pointer is present. */
11444 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11445 gcc_assert (!m->fs.fp_valid
11446 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11448 /* We must have *some* valid pointer to the stack frame. */
11449 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11451 /* The DRAP is never valid at this point. */
11452 gcc_assert (!m->fs.drap_valid);
11454 /* See the comment about red zone and frame
11455 pointer usage in ix86_expand_prologue. */
11456 if (frame_pointer_needed && frame.red_zone_size)
11457 emit_insn (gen_memory_blockage ());
11459 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11460 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11462 /* Determine the CFA offset of the end of the red-zone. */
11463 m->fs.red_zone_offset = 0;
11464 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11466 /* The red-zone begins below the return address. */
11467 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11469 /* When the register save area is in the aligned portion of
11470 the stack, determine the maximum runtime displacement that
11471 matches up with the aligned frame. */
11472 if (stack_realign_drap)
11473 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11474 + UNITS_PER_WORD);
11477 /* Special care must be taken for the normal return case of a function
11478 using eh_return: the eax and edx registers are marked as saved, but
11479 not restored along this path. Adjust the save location to match. */
11480 if (crtl->calls_eh_return && style != 2)
11481 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11483 /* EH_RETURN requires the use of moves to function properly. */
11484 if (crtl->calls_eh_return)
11485 restore_regs_via_mov = true;
11486 /* SEH requires the use of pops to identify the epilogue. */
11487 else if (TARGET_SEH)
11488 restore_regs_via_mov = false;
11489 /* If we're only restoring one register and sp is not valid then
11490 using a move instruction to restore the register since it's
11491 less work than reloading sp and popping the register. */
11492 else if (!m->fs.sp_valid && frame.nregs <= 1)
11493 restore_regs_via_mov = true;
11494 else if (TARGET_EPILOGUE_USING_MOVE
11495 && cfun->machine->use_fast_prologue_epilogue
11496 && (frame.nregs > 1
11497 || m->fs.sp_offset != frame.reg_save_offset))
11498 restore_regs_via_mov = true;
11499 else if (frame_pointer_needed
11500 && !frame.nregs
11501 && m->fs.sp_offset != frame.reg_save_offset)
11502 restore_regs_via_mov = true;
11503 else if (frame_pointer_needed
11504 && TARGET_USE_LEAVE
11505 && cfun->machine->use_fast_prologue_epilogue
11506 && frame.nregs == 1)
11507 restore_regs_via_mov = true;
11508 else
11509 restore_regs_via_mov = false;
11511 if (restore_regs_via_mov || frame.nsseregs)
11513 /* Ensure that the entire register save area is addressable via
11514 the stack pointer, if we will restore via sp. */
11515 if (TARGET_64BIT
11516 && m->fs.sp_offset > 0x7fffffff
11517 && !(m->fs.fp_valid || m->fs.drap_valid)
11518 && (frame.nsseregs + frame.nregs) != 0)
11520 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11521 GEN_INT (m->fs.sp_offset
11522 - frame.sse_reg_save_offset),
11523 style,
11524 m->fs.cfa_reg == stack_pointer_rtx);
11528 /* If there are any SSE registers to restore, then we have to do it
11529 via moves, since there's obviously no pop for SSE regs. */
11530 if (frame.nsseregs)
11531 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11532 style == 2);
11534 if (restore_regs_via_mov)
11536 rtx t;
11538 if (frame.nregs)
11539 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11541 /* eh_return epilogues need %ecx added to the stack pointer. */
11542 if (style == 2)
11544 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11546 /* Stack align doesn't work with eh_return. */
11547 gcc_assert (!stack_realign_drap);
11548 /* Neither does regparm nested functions. */
11549 gcc_assert (!ix86_static_chain_on_stack);
11551 if (frame_pointer_needed)
11553 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11554 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11555 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11557 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11558 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11560 /* Note that we use SA as a temporary CFA, as the return
11561 address is at the proper place relative to it. We
11562 pretend this happens at the FP restore insn because
11563 prior to this insn the FP would be stored at the wrong
11564 offset relative to SA, and after this insn we have no
11565 other reasonable register to use for the CFA. We don't
11566 bother resetting the CFA to the SP for the duration of
11567 the return insn. */
11568 add_reg_note (insn, REG_CFA_DEF_CFA,
11569 plus_constant (Pmode, sa, UNITS_PER_WORD));
11570 ix86_add_queued_cfa_restore_notes (insn);
11571 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11572 RTX_FRAME_RELATED_P (insn) = 1;
11574 m->fs.cfa_reg = sa;
11575 m->fs.cfa_offset = UNITS_PER_WORD;
11576 m->fs.fp_valid = false;
11578 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11579 const0_rtx, style, false);
11581 else
11583 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11584 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11585 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11586 ix86_add_queued_cfa_restore_notes (insn);
11588 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11589 if (m->fs.cfa_offset != UNITS_PER_WORD)
11591 m->fs.cfa_offset = UNITS_PER_WORD;
11592 add_reg_note (insn, REG_CFA_DEF_CFA,
11593 plus_constant (Pmode, stack_pointer_rtx,
11594 UNITS_PER_WORD));
11595 RTX_FRAME_RELATED_P (insn) = 1;
11598 m->fs.sp_offset = UNITS_PER_WORD;
11599 m->fs.sp_valid = true;
11602 else
11604 /* SEH requires that the function end with (1) a stack adjustment
11605 if necessary, (2) a sequence of pops, and (3) a return or
11606 jump instruction. Prevent insns from the function body from
11607 being scheduled into this sequence. */
11608 if (TARGET_SEH)
11610 /* Prevent a catch region from being adjacent to the standard
11611 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11612 several other flags that would be interesting to test are
11613 not yet set up. */
11614 if (flag_non_call_exceptions)
11615 emit_insn (gen_nops (const1_rtx));
11616 else
11617 emit_insn (gen_blockage ());
11620 /* First step is to deallocate the stack frame so that we can
11621 pop the registers. Also do it on SEH target for very large
11622 frame as the emitted instructions aren't allowed by the ABI in
11623 epilogues. */
11624 if (!m->fs.sp_valid
11625 || (TARGET_SEH
11626 && (m->fs.sp_offset - frame.reg_save_offset
11627 >= SEH_MAX_FRAME_SIZE)))
11629 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11630 GEN_INT (m->fs.fp_offset
11631 - frame.reg_save_offset),
11632 style, false);
11634 else if (m->fs.sp_offset != frame.reg_save_offset)
11636 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11637 GEN_INT (m->fs.sp_offset
11638 - frame.reg_save_offset),
11639 style,
11640 m->fs.cfa_reg == stack_pointer_rtx);
11643 ix86_emit_restore_regs_using_pop ();
11646 /* If we used a stack pointer and haven't already got rid of it,
11647 then do so now. */
11648 if (m->fs.fp_valid)
11650 /* If the stack pointer is valid and pointing at the frame
11651 pointer store address, then we only need a pop. */
11652 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11653 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11654 /* Leave results in shorter dependency chains on CPUs that are
11655 able to grok it fast. */
11656 else if (TARGET_USE_LEAVE
11657 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11658 || !cfun->machine->use_fast_prologue_epilogue)
11659 ix86_emit_leave ();
11660 else
11662 pro_epilogue_adjust_stack (stack_pointer_rtx,
11663 hard_frame_pointer_rtx,
11664 const0_rtx, style, !using_drap);
11665 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11669 if (using_drap)
11671 int param_ptr_offset = UNITS_PER_WORD;
11672 rtx insn;
11674 gcc_assert (stack_realign_drap);
11676 if (ix86_static_chain_on_stack)
11677 param_ptr_offset += UNITS_PER_WORD;
11678 if (!call_used_regs[REGNO (crtl->drap_reg)])
11679 param_ptr_offset += UNITS_PER_WORD;
11681 insn = emit_insn (gen_rtx_SET
11682 (VOIDmode, stack_pointer_rtx,
11683 gen_rtx_PLUS (Pmode,
11684 crtl->drap_reg,
11685 GEN_INT (-param_ptr_offset))));
11686 m->fs.cfa_reg = stack_pointer_rtx;
11687 m->fs.cfa_offset = param_ptr_offset;
11688 m->fs.sp_offset = param_ptr_offset;
11689 m->fs.realigned = false;
11691 add_reg_note (insn, REG_CFA_DEF_CFA,
11692 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11693 GEN_INT (param_ptr_offset)));
11694 RTX_FRAME_RELATED_P (insn) = 1;
11696 if (!call_used_regs[REGNO (crtl->drap_reg)])
11697 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11700 /* At this point the stack pointer must be valid, and we must have
11701 restored all of the registers. We may not have deallocated the
11702 entire stack frame. We've delayed this until now because it may
11703 be possible to merge the local stack deallocation with the
11704 deallocation forced by ix86_static_chain_on_stack. */
11705 gcc_assert (m->fs.sp_valid);
11706 gcc_assert (!m->fs.fp_valid);
11707 gcc_assert (!m->fs.realigned);
11708 if (m->fs.sp_offset != UNITS_PER_WORD)
11710 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11711 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11712 style, true);
11714 else
11715 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11717 /* Sibcall epilogues don't want a return instruction. */
11718 if (style == 0)
11720 m->fs = frame_state_save;
11721 return;
11724 if (crtl->args.pops_args && crtl->args.size)
11726 rtx popc = GEN_INT (crtl->args.pops_args);
11728 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11729 address, do explicit add, and jump indirectly to the caller. */
11731 if (crtl->args.pops_args >= 65536)
11733 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11734 rtx insn;
11736 /* There is no "pascal" calling convention in any 64bit ABI. */
11737 gcc_assert (!TARGET_64BIT);
11739 insn = emit_insn (gen_pop (ecx));
11740 m->fs.cfa_offset -= UNITS_PER_WORD;
11741 m->fs.sp_offset -= UNITS_PER_WORD;
11743 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11744 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11745 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11746 add_reg_note (insn, REG_CFA_REGISTER,
11747 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11748 RTX_FRAME_RELATED_P (insn) = 1;
11750 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11751 popc, -1, true);
11752 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11754 else
11755 emit_jump_insn (gen_simple_return_pop_internal (popc));
11757 else
11758 emit_jump_insn (gen_simple_return_internal ());
11760 /* Restore the state back to the state from the prologue,
11761 so that it's correct for the next epilogue. */
11762 m->fs = frame_state_save;
11765 /* Reset from the function's potential modifications. */
11767 static void
11768 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11769 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11771 if (pic_offset_table_rtx)
11772 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11773 #if TARGET_MACHO
11774 /* Mach-O doesn't support labels at the end of objects, so if
11775 it looks like we might want one, insert a NOP. */
11777 rtx insn = get_last_insn ();
11778 rtx deleted_debug_label = NULL_RTX;
11779 while (insn
11780 && NOTE_P (insn)
11781 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11783 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11784 notes only, instead set their CODE_LABEL_NUMBER to -1,
11785 otherwise there would be code generation differences
11786 in between -g and -g0. */
11787 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11788 deleted_debug_label = insn;
11789 insn = PREV_INSN (insn);
11791 if (insn
11792 && (LABEL_P (insn)
11793 || (NOTE_P (insn)
11794 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11795 fputs ("\tnop\n", file);
11796 else if (deleted_debug_label)
11797 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11798 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11799 CODE_LABEL_NUMBER (insn) = -1;
11801 #endif
11805 /* Return a scratch register to use in the split stack prologue. The
11806 split stack prologue is used for -fsplit-stack. It is the first
11807 instructions in the function, even before the regular prologue.
11808 The scratch register can be any caller-saved register which is not
11809 used for parameters or for the static chain. */
11811 static unsigned int
11812 split_stack_prologue_scratch_regno (void)
11814 if (TARGET_64BIT)
11815 return R11_REG;
11816 else
11818 bool is_fastcall, is_thiscall;
11819 int regparm;
11821 is_fastcall = (lookup_attribute ("fastcall",
11822 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11823 != NULL);
11824 is_thiscall = (lookup_attribute ("thiscall",
11825 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11826 != NULL);
11827 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11829 if (is_fastcall)
11831 if (DECL_STATIC_CHAIN (cfun->decl))
11833 sorry ("-fsplit-stack does not support fastcall with "
11834 "nested function");
11835 return INVALID_REGNUM;
11837 return AX_REG;
11839 else if (is_thiscall)
11841 if (!DECL_STATIC_CHAIN (cfun->decl))
11842 return DX_REG;
11843 return AX_REG;
11845 else if (regparm < 3)
11847 if (!DECL_STATIC_CHAIN (cfun->decl))
11848 return CX_REG;
11849 else
11851 if (regparm >= 2)
11853 sorry ("-fsplit-stack does not support 2 register "
11854 "parameters for a nested function");
11855 return INVALID_REGNUM;
11857 return DX_REG;
11860 else
11862 /* FIXME: We could make this work by pushing a register
11863 around the addition and comparison. */
11864 sorry ("-fsplit-stack does not support 3 register parameters");
11865 return INVALID_REGNUM;
11870 /* A SYMBOL_REF for the function which allocates new stackspace for
11871 -fsplit-stack. */
11873 static GTY(()) rtx split_stack_fn;
11875 /* A SYMBOL_REF for the more stack function when using the large
11876 model. */
11878 static GTY(()) rtx split_stack_fn_large;
11880 /* Handle -fsplit-stack. These are the first instructions in the
11881 function, even before the regular prologue. */
11883 void
11884 ix86_expand_split_stack_prologue (void)
11886 struct ix86_frame frame;
11887 HOST_WIDE_INT allocate;
11888 unsigned HOST_WIDE_INT args_size;
11889 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11890 rtx scratch_reg = NULL_RTX;
11891 rtx varargs_label = NULL_RTX;
11892 rtx fn;
11894 gcc_assert (flag_split_stack && reload_completed);
11896 ix86_finalize_stack_realign_flags ();
11897 ix86_compute_frame_layout (&frame);
11898 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11900 /* This is the label we will branch to if we have enough stack
11901 space. We expect the basic block reordering pass to reverse this
11902 branch if optimizing, so that we branch in the unlikely case. */
11903 label = gen_label_rtx ();
11905 /* We need to compare the stack pointer minus the frame size with
11906 the stack boundary in the TCB. The stack boundary always gives
11907 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11908 can compare directly. Otherwise we need to do an addition. */
11910 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11911 UNSPEC_STACK_CHECK);
11912 limit = gen_rtx_CONST (Pmode, limit);
11913 limit = gen_rtx_MEM (Pmode, limit);
11914 if (allocate < SPLIT_STACK_AVAILABLE)
11915 current = stack_pointer_rtx;
11916 else
11918 unsigned int scratch_regno;
11919 rtx offset;
11921 /* We need a scratch register to hold the stack pointer minus
11922 the required frame size. Since this is the very start of the
11923 function, the scratch register can be any caller-saved
11924 register which is not used for parameters. */
11925 offset = GEN_INT (- allocate);
11926 scratch_regno = split_stack_prologue_scratch_regno ();
11927 if (scratch_regno == INVALID_REGNUM)
11928 return;
11929 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11930 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11932 /* We don't use ix86_gen_add3 in this case because it will
11933 want to split to lea, but when not optimizing the insn
11934 will not be split after this point. */
11935 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11936 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11937 offset)));
11939 else
11941 emit_move_insn (scratch_reg, offset);
11942 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11943 stack_pointer_rtx));
11945 current = scratch_reg;
11948 ix86_expand_branch (GEU, current, limit, label);
11949 jump_insn = get_last_insn ();
11950 JUMP_LABEL (jump_insn) = label;
11952 /* Mark the jump as very likely to be taken. */
11953 add_int_reg_note (jump_insn, REG_BR_PROB,
11954 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11956 if (split_stack_fn == NULL_RTX)
11957 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11958 fn = split_stack_fn;
11960 /* Get more stack space. We pass in the desired stack space and the
11961 size of the arguments to copy to the new stack. In 32-bit mode
11962 we push the parameters; __morestack will return on a new stack
11963 anyhow. In 64-bit mode we pass the parameters in r10 and
11964 r11. */
11965 allocate_rtx = GEN_INT (allocate);
11966 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11967 call_fusage = NULL_RTX;
11968 if (TARGET_64BIT)
11970 rtx reg10, reg11;
11972 reg10 = gen_rtx_REG (Pmode, R10_REG);
11973 reg11 = gen_rtx_REG (Pmode, R11_REG);
11975 /* If this function uses a static chain, it will be in %r10.
11976 Preserve it across the call to __morestack. */
11977 if (DECL_STATIC_CHAIN (cfun->decl))
11979 rtx rax;
11981 rax = gen_rtx_REG (word_mode, AX_REG);
11982 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11983 use_reg (&call_fusage, rax);
11986 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11987 && !TARGET_PECOFF)
11989 HOST_WIDE_INT argval;
11991 gcc_assert (Pmode == DImode);
11992 /* When using the large model we need to load the address
11993 into a register, and we've run out of registers. So we
11994 switch to a different calling convention, and we call a
11995 different function: __morestack_large. We pass the
11996 argument size in the upper 32 bits of r10 and pass the
11997 frame size in the lower 32 bits. */
11998 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11999 gcc_assert ((args_size & 0xffffffff) == args_size);
12001 if (split_stack_fn_large == NULL_RTX)
12002 split_stack_fn_large =
12003 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12005 if (ix86_cmodel == CM_LARGE_PIC)
12007 rtx label, x;
12009 label = gen_label_rtx ();
12010 emit_label (label);
12011 LABEL_PRESERVE_P (label) = 1;
12012 emit_insn (gen_set_rip_rex64 (reg10, label));
12013 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12014 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12015 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12016 UNSPEC_GOT);
12017 x = gen_rtx_CONST (Pmode, x);
12018 emit_move_insn (reg11, x);
12019 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12020 x = gen_const_mem (Pmode, x);
12021 emit_move_insn (reg11, x);
12023 else
12024 emit_move_insn (reg11, split_stack_fn_large);
12026 fn = reg11;
12028 argval = ((args_size << 16) << 16) + allocate;
12029 emit_move_insn (reg10, GEN_INT (argval));
12031 else
12033 emit_move_insn (reg10, allocate_rtx);
12034 emit_move_insn (reg11, GEN_INT (args_size));
12035 use_reg (&call_fusage, reg11);
12038 use_reg (&call_fusage, reg10);
12040 else
12042 emit_insn (gen_push (GEN_INT (args_size)));
12043 emit_insn (gen_push (allocate_rtx));
12045 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12046 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12047 NULL_RTX, false);
12048 add_function_usage_to (call_insn, call_fusage);
12050 /* In order to make call/return prediction work right, we now need
12051 to execute a return instruction. See
12052 libgcc/config/i386/morestack.S for the details on how this works.
12054 For flow purposes gcc must not see this as a return
12055 instruction--we need control flow to continue at the subsequent
12056 label. Therefore, we use an unspec. */
12057 gcc_assert (crtl->args.pops_args < 65536);
12058 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12060 /* If we are in 64-bit mode and this function uses a static chain,
12061 we saved %r10 in %rax before calling _morestack. */
12062 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12063 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12064 gen_rtx_REG (word_mode, AX_REG));
12066 /* If this function calls va_start, we need to store a pointer to
12067 the arguments on the old stack, because they may not have been
12068 all copied to the new stack. At this point the old stack can be
12069 found at the frame pointer value used by __morestack, because
12070 __morestack has set that up before calling back to us. Here we
12071 store that pointer in a scratch register, and in
12072 ix86_expand_prologue we store the scratch register in a stack
12073 slot. */
12074 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12076 unsigned int scratch_regno;
12077 rtx frame_reg;
12078 int words;
12080 scratch_regno = split_stack_prologue_scratch_regno ();
12081 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12082 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12084 /* 64-bit:
12085 fp -> old fp value
12086 return address within this function
12087 return address of caller of this function
12088 stack arguments
12089 So we add three words to get to the stack arguments.
12091 32-bit:
12092 fp -> old fp value
12093 return address within this function
12094 first argument to __morestack
12095 second argument to __morestack
12096 return address of caller of this function
12097 stack arguments
12098 So we add five words to get to the stack arguments.
12100 words = TARGET_64BIT ? 3 : 5;
12101 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12102 gen_rtx_PLUS (Pmode, frame_reg,
12103 GEN_INT (words * UNITS_PER_WORD))));
12105 varargs_label = gen_label_rtx ();
12106 emit_jump_insn (gen_jump (varargs_label));
12107 JUMP_LABEL (get_last_insn ()) = varargs_label;
12109 emit_barrier ();
12112 emit_label (label);
12113 LABEL_NUSES (label) = 1;
12115 /* If this function calls va_start, we now have to set the scratch
12116 register for the case where we do not call __morestack. In this
12117 case we need to set it based on the stack pointer. */
12118 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12120 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12121 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12122 GEN_INT (UNITS_PER_WORD))));
12124 emit_label (varargs_label);
12125 LABEL_NUSES (varargs_label) = 1;
12129 /* We may have to tell the dataflow pass that the split stack prologue
12130 is initializing a scratch register. */
12132 static void
12133 ix86_live_on_entry (bitmap regs)
12135 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12137 gcc_assert (flag_split_stack);
12138 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12142 /* Extract the parts of an RTL expression that is a valid memory address
12143 for an instruction. Return 0 if the structure of the address is
12144 grossly off. Return -1 if the address contains ASHIFT, so it is not
12145 strictly valid, but still used for computing length of lea instruction. */
12148 ix86_decompose_address (rtx addr, struct ix86_address *out)
12150 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12151 rtx base_reg, index_reg;
12152 HOST_WIDE_INT scale = 1;
12153 rtx scale_rtx = NULL_RTX;
12154 rtx tmp;
12155 int retval = 1;
12156 enum ix86_address_seg seg = SEG_DEFAULT;
12158 /* Allow zero-extended SImode addresses,
12159 they will be emitted with addr32 prefix. */
12160 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12162 if (GET_CODE (addr) == ZERO_EXTEND
12163 && GET_MODE (XEXP (addr, 0)) == SImode)
12165 addr = XEXP (addr, 0);
12166 if (CONST_INT_P (addr))
12167 return 0;
12169 else if (GET_CODE (addr) == AND
12170 && const_32bit_mask (XEXP (addr, 1), DImode))
12172 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12173 if (addr == NULL_RTX)
12174 return 0;
12176 if (CONST_INT_P (addr))
12177 return 0;
12181 /* Allow SImode subregs of DImode addresses,
12182 they will be emitted with addr32 prefix. */
12183 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12185 if (GET_CODE (addr) == SUBREG
12186 && GET_MODE (SUBREG_REG (addr)) == DImode)
12188 addr = SUBREG_REG (addr);
12189 if (CONST_INT_P (addr))
12190 return 0;
12194 if (REG_P (addr))
12195 base = addr;
12196 else if (GET_CODE (addr) == SUBREG)
12198 if (REG_P (SUBREG_REG (addr)))
12199 base = addr;
12200 else
12201 return 0;
12203 else if (GET_CODE (addr) == PLUS)
12205 rtx addends[4], op;
12206 int n = 0, i;
12208 op = addr;
12211 if (n >= 4)
12212 return 0;
12213 addends[n++] = XEXP (op, 1);
12214 op = XEXP (op, 0);
12216 while (GET_CODE (op) == PLUS);
12217 if (n >= 4)
12218 return 0;
12219 addends[n] = op;
12221 for (i = n; i >= 0; --i)
12223 op = addends[i];
12224 switch (GET_CODE (op))
12226 case MULT:
12227 if (index)
12228 return 0;
12229 index = XEXP (op, 0);
12230 scale_rtx = XEXP (op, 1);
12231 break;
12233 case ASHIFT:
12234 if (index)
12235 return 0;
12236 index = XEXP (op, 0);
12237 tmp = XEXP (op, 1);
12238 if (!CONST_INT_P (tmp))
12239 return 0;
12240 scale = INTVAL (tmp);
12241 if ((unsigned HOST_WIDE_INT) scale > 3)
12242 return 0;
12243 scale = 1 << scale;
12244 break;
12246 case ZERO_EXTEND:
12247 op = XEXP (op, 0);
12248 if (GET_CODE (op) != UNSPEC)
12249 return 0;
12250 /* FALLTHRU */
12252 case UNSPEC:
12253 if (XINT (op, 1) == UNSPEC_TP
12254 && TARGET_TLS_DIRECT_SEG_REFS
12255 && seg == SEG_DEFAULT)
12256 seg = DEFAULT_TLS_SEG_REG;
12257 else
12258 return 0;
12259 break;
12261 case SUBREG:
12262 if (!REG_P (SUBREG_REG (op)))
12263 return 0;
12264 /* FALLTHRU */
12266 case REG:
12267 if (!base)
12268 base = op;
12269 else if (!index)
12270 index = op;
12271 else
12272 return 0;
12273 break;
12275 case CONST:
12276 case CONST_INT:
12277 case SYMBOL_REF:
12278 case LABEL_REF:
12279 if (disp)
12280 return 0;
12281 disp = op;
12282 break;
12284 default:
12285 return 0;
12289 else if (GET_CODE (addr) == MULT)
12291 index = XEXP (addr, 0); /* index*scale */
12292 scale_rtx = XEXP (addr, 1);
12294 else if (GET_CODE (addr) == ASHIFT)
12296 /* We're called for lea too, which implements ashift on occasion. */
12297 index = XEXP (addr, 0);
12298 tmp = XEXP (addr, 1);
12299 if (!CONST_INT_P (tmp))
12300 return 0;
12301 scale = INTVAL (tmp);
12302 if ((unsigned HOST_WIDE_INT) scale > 3)
12303 return 0;
12304 scale = 1 << scale;
12305 retval = -1;
12307 else
12308 disp = addr; /* displacement */
12310 if (index)
12312 if (REG_P (index))
12314 else if (GET_CODE (index) == SUBREG
12315 && REG_P (SUBREG_REG (index)))
12317 else
12318 return 0;
12321 /* Extract the integral value of scale. */
12322 if (scale_rtx)
12324 if (!CONST_INT_P (scale_rtx))
12325 return 0;
12326 scale = INTVAL (scale_rtx);
12329 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12330 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12332 /* Avoid useless 0 displacement. */
12333 if (disp == const0_rtx && (base || index))
12334 disp = NULL_RTX;
12336 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12337 if (base_reg && index_reg && scale == 1
12338 && (index_reg == arg_pointer_rtx
12339 || index_reg == frame_pointer_rtx
12340 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12342 rtx tmp;
12343 tmp = base, base = index, index = tmp;
12344 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12347 /* Special case: %ebp cannot be encoded as a base without a displacement.
12348 Similarly %r13. */
12349 if (!disp
12350 && base_reg
12351 && (base_reg == hard_frame_pointer_rtx
12352 || base_reg == frame_pointer_rtx
12353 || base_reg == arg_pointer_rtx
12354 || (REG_P (base_reg)
12355 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12356 || REGNO (base_reg) == R13_REG))))
12357 disp = const0_rtx;
12359 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12360 Avoid this by transforming to [%esi+0].
12361 Reload calls address legitimization without cfun defined, so we need
12362 to test cfun for being non-NULL. */
12363 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12364 && base_reg && !index_reg && !disp
12365 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12366 disp = const0_rtx;
12368 /* Special case: encode reg+reg instead of reg*2. */
12369 if (!base && index && scale == 2)
12370 base = index, base_reg = index_reg, scale = 1;
12372 /* Special case: scaling cannot be encoded without base or displacement. */
12373 if (!base && !disp && index && scale != 1)
12374 disp = const0_rtx;
12376 out->base = base;
12377 out->index = index;
12378 out->disp = disp;
12379 out->scale = scale;
12380 out->seg = seg;
12382 return retval;
12385 /* Return cost of the memory address x.
12386 For i386, it is better to use a complex address than let gcc copy
12387 the address into a reg and make a new pseudo. But not if the address
12388 requires to two regs - that would mean more pseudos with longer
12389 lifetimes. */
12390 static int
12391 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12392 addr_space_t as ATTRIBUTE_UNUSED,
12393 bool speed ATTRIBUTE_UNUSED)
12395 struct ix86_address parts;
12396 int cost = 1;
12397 int ok = ix86_decompose_address (x, &parts);
12399 gcc_assert (ok);
12401 if (parts.base && GET_CODE (parts.base) == SUBREG)
12402 parts.base = SUBREG_REG (parts.base);
12403 if (parts.index && GET_CODE (parts.index) == SUBREG)
12404 parts.index = SUBREG_REG (parts.index);
12406 /* Attempt to minimize number of registers in the address. */
12407 if ((parts.base
12408 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12409 || (parts.index
12410 && (!REG_P (parts.index)
12411 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12412 cost++;
12414 if (parts.base
12415 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12416 && parts.index
12417 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12418 && parts.base != parts.index)
12419 cost++;
12421 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12422 since it's predecode logic can't detect the length of instructions
12423 and it degenerates to vector decoded. Increase cost of such
12424 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12425 to split such addresses or even refuse such addresses at all.
12427 Following addressing modes are affected:
12428 [base+scale*index]
12429 [scale*index+disp]
12430 [base+index]
12432 The first and last case may be avoidable by explicitly coding the zero in
12433 memory address, but I don't have AMD-K6 machine handy to check this
12434 theory. */
12436 if (TARGET_K6
12437 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12438 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12439 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12440 cost += 10;
12442 return cost;
12445 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12446 this is used for to form addresses to local data when -fPIC is in
12447 use. */
12449 static bool
12450 darwin_local_data_pic (rtx disp)
12452 return (GET_CODE (disp) == UNSPEC
12453 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12456 /* Determine if a given RTX is a valid constant. We already know this
12457 satisfies CONSTANT_P. */
12459 static bool
12460 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12462 switch (GET_CODE (x))
12464 case CONST:
12465 x = XEXP (x, 0);
12467 if (GET_CODE (x) == PLUS)
12469 if (!CONST_INT_P (XEXP (x, 1)))
12470 return false;
12471 x = XEXP (x, 0);
12474 if (TARGET_MACHO && darwin_local_data_pic (x))
12475 return true;
12477 /* Only some unspecs are valid as "constants". */
12478 if (GET_CODE (x) == UNSPEC)
12479 switch (XINT (x, 1))
12481 case UNSPEC_GOT:
12482 case UNSPEC_GOTOFF:
12483 case UNSPEC_PLTOFF:
12484 return TARGET_64BIT;
12485 case UNSPEC_TPOFF:
12486 case UNSPEC_NTPOFF:
12487 x = XVECEXP (x, 0, 0);
12488 return (GET_CODE (x) == SYMBOL_REF
12489 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12490 case UNSPEC_DTPOFF:
12491 x = XVECEXP (x, 0, 0);
12492 return (GET_CODE (x) == SYMBOL_REF
12493 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12494 default:
12495 return false;
12498 /* We must have drilled down to a symbol. */
12499 if (GET_CODE (x) == LABEL_REF)
12500 return true;
12501 if (GET_CODE (x) != SYMBOL_REF)
12502 return false;
12503 /* FALLTHRU */
12505 case SYMBOL_REF:
12506 /* TLS symbols are never valid. */
12507 if (SYMBOL_REF_TLS_MODEL (x))
12508 return false;
12510 /* DLLIMPORT symbols are never valid. */
12511 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12512 && SYMBOL_REF_DLLIMPORT_P (x))
12513 return false;
12515 #if TARGET_MACHO
12516 /* mdynamic-no-pic */
12517 if (MACHO_DYNAMIC_NO_PIC_P)
12518 return machopic_symbol_defined_p (x);
12519 #endif
12520 break;
12522 case CONST_DOUBLE:
12523 if (GET_MODE (x) == TImode
12524 && x != CONST0_RTX (TImode)
12525 && !TARGET_64BIT)
12526 return false;
12527 break;
12529 case CONST_VECTOR:
12530 if (!standard_sse_constant_p (x))
12531 return false;
12533 default:
12534 break;
12537 /* Otherwise we handle everything else in the move patterns. */
12538 return true;
12541 /* Determine if it's legal to put X into the constant pool. This
12542 is not possible for the address of thread-local symbols, which
12543 is checked above. */
12545 static bool
12546 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12548 /* We can always put integral constants and vectors in memory. */
12549 switch (GET_CODE (x))
12551 case CONST_INT:
12552 case CONST_DOUBLE:
12553 case CONST_VECTOR:
12554 return false;
12556 default:
12557 break;
12559 return !ix86_legitimate_constant_p (mode, x);
12562 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12563 otherwise zero. */
12565 static bool
12566 is_imported_p (rtx x)
12568 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12569 || GET_CODE (x) != SYMBOL_REF)
12570 return false;
12572 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12576 /* Nonzero if the constant value X is a legitimate general operand
12577 when generating PIC code. It is given that flag_pic is on and
12578 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12580 bool
12581 legitimate_pic_operand_p (rtx x)
12583 rtx inner;
12585 switch (GET_CODE (x))
12587 case CONST:
12588 inner = XEXP (x, 0);
12589 if (GET_CODE (inner) == PLUS
12590 && CONST_INT_P (XEXP (inner, 1)))
12591 inner = XEXP (inner, 0);
12593 /* Only some unspecs are valid as "constants". */
12594 if (GET_CODE (inner) == UNSPEC)
12595 switch (XINT (inner, 1))
12597 case UNSPEC_GOT:
12598 case UNSPEC_GOTOFF:
12599 case UNSPEC_PLTOFF:
12600 return TARGET_64BIT;
12601 case UNSPEC_TPOFF:
12602 x = XVECEXP (inner, 0, 0);
12603 return (GET_CODE (x) == SYMBOL_REF
12604 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12605 case UNSPEC_MACHOPIC_OFFSET:
12606 return legitimate_pic_address_disp_p (x);
12607 default:
12608 return false;
12610 /* FALLTHRU */
12612 case SYMBOL_REF:
12613 case LABEL_REF:
12614 return legitimate_pic_address_disp_p (x);
12616 default:
12617 return true;
12621 /* Determine if a given CONST RTX is a valid memory displacement
12622 in PIC mode. */
12624 bool
12625 legitimate_pic_address_disp_p (rtx disp)
12627 bool saw_plus;
12629 /* In 64bit mode we can allow direct addresses of symbols and labels
12630 when they are not dynamic symbols. */
12631 if (TARGET_64BIT)
12633 rtx op0 = disp, op1;
12635 switch (GET_CODE (disp))
12637 case LABEL_REF:
12638 return true;
12640 case CONST:
12641 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12642 break;
12643 op0 = XEXP (XEXP (disp, 0), 0);
12644 op1 = XEXP (XEXP (disp, 0), 1);
12645 if (!CONST_INT_P (op1)
12646 || INTVAL (op1) >= 16*1024*1024
12647 || INTVAL (op1) < -16*1024*1024)
12648 break;
12649 if (GET_CODE (op0) == LABEL_REF)
12650 return true;
12651 if (GET_CODE (op0) == CONST
12652 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12653 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12654 return true;
12655 if (GET_CODE (op0) == UNSPEC
12656 && XINT (op0, 1) == UNSPEC_PCREL)
12657 return true;
12658 if (GET_CODE (op0) != SYMBOL_REF)
12659 break;
12660 /* FALLTHRU */
12662 case SYMBOL_REF:
12663 /* TLS references should always be enclosed in UNSPEC.
12664 The dllimported symbol needs always to be resolved. */
12665 if (SYMBOL_REF_TLS_MODEL (op0)
12666 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12667 return false;
12669 if (TARGET_PECOFF)
12671 if (is_imported_p (op0))
12672 return true;
12674 if (SYMBOL_REF_FAR_ADDR_P (op0)
12675 || !SYMBOL_REF_LOCAL_P (op0))
12676 break;
12678 /* Function-symbols need to be resolved only for
12679 large-model.
12680 For the small-model we don't need to resolve anything
12681 here. */
12682 if ((ix86_cmodel != CM_LARGE_PIC
12683 && SYMBOL_REF_FUNCTION_P (op0))
12684 || ix86_cmodel == CM_SMALL_PIC)
12685 return true;
12686 /* Non-external symbols don't need to be resolved for
12687 large, and medium-model. */
12688 if ((ix86_cmodel == CM_LARGE_PIC
12689 || ix86_cmodel == CM_MEDIUM_PIC)
12690 && !SYMBOL_REF_EXTERNAL_P (op0))
12691 return true;
12693 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12694 && SYMBOL_REF_LOCAL_P (op0)
12695 && ix86_cmodel != CM_LARGE_PIC)
12696 return true;
12697 break;
12699 default:
12700 break;
12703 if (GET_CODE (disp) != CONST)
12704 return false;
12705 disp = XEXP (disp, 0);
12707 if (TARGET_64BIT)
12709 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12710 of GOT tables. We should not need these anyway. */
12711 if (GET_CODE (disp) != UNSPEC
12712 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12713 && XINT (disp, 1) != UNSPEC_GOTOFF
12714 && XINT (disp, 1) != UNSPEC_PCREL
12715 && XINT (disp, 1) != UNSPEC_PLTOFF))
12716 return false;
12718 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12719 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12720 return false;
12721 return true;
12724 saw_plus = false;
12725 if (GET_CODE (disp) == PLUS)
12727 if (!CONST_INT_P (XEXP (disp, 1)))
12728 return false;
12729 disp = XEXP (disp, 0);
12730 saw_plus = true;
12733 if (TARGET_MACHO && darwin_local_data_pic (disp))
12734 return true;
12736 if (GET_CODE (disp) != UNSPEC)
12737 return false;
12739 switch (XINT (disp, 1))
12741 case UNSPEC_GOT:
12742 if (saw_plus)
12743 return false;
12744 /* We need to check for both symbols and labels because VxWorks loads
12745 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12746 details. */
12747 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12748 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12749 case UNSPEC_GOTOFF:
12750 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12751 While ABI specify also 32bit relocation but we don't produce it in
12752 small PIC model at all. */
12753 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12754 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12755 && !TARGET_64BIT)
12756 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12757 return false;
12758 case UNSPEC_GOTTPOFF:
12759 case UNSPEC_GOTNTPOFF:
12760 case UNSPEC_INDNTPOFF:
12761 if (saw_plus)
12762 return false;
12763 disp = XVECEXP (disp, 0, 0);
12764 return (GET_CODE (disp) == SYMBOL_REF
12765 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12766 case UNSPEC_NTPOFF:
12767 disp = XVECEXP (disp, 0, 0);
12768 return (GET_CODE (disp) == SYMBOL_REF
12769 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12770 case UNSPEC_DTPOFF:
12771 disp = XVECEXP (disp, 0, 0);
12772 return (GET_CODE (disp) == SYMBOL_REF
12773 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12776 return false;
12779 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12780 replace the input X, or the original X if no replacement is called for.
12781 The output parameter *WIN is 1 if the calling macro should goto WIN,
12782 0 if it should not. */
12784 bool
12785 ix86_legitimize_reload_address (rtx x,
12786 enum machine_mode mode ATTRIBUTE_UNUSED,
12787 int opnum, int type,
12788 int ind_levels ATTRIBUTE_UNUSED)
12790 /* Reload can generate:
12792 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12793 (reg:DI 97))
12794 (reg:DI 2 cx))
12796 This RTX is rejected from ix86_legitimate_address_p due to
12797 non-strictness of base register 97. Following this rejection,
12798 reload pushes all three components into separate registers,
12799 creating invalid memory address RTX.
12801 Following code reloads only the invalid part of the
12802 memory address RTX. */
12804 if (GET_CODE (x) == PLUS
12805 && REG_P (XEXP (x, 1))
12806 && GET_CODE (XEXP (x, 0)) == PLUS
12807 && REG_P (XEXP (XEXP (x, 0), 1)))
12809 rtx base, index;
12810 bool something_reloaded = false;
12812 base = XEXP (XEXP (x, 0), 1);
12813 if (!REG_OK_FOR_BASE_STRICT_P (base))
12815 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12816 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12817 opnum, (enum reload_type) type);
12818 something_reloaded = true;
12821 index = XEXP (x, 1);
12822 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12824 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12825 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12826 opnum, (enum reload_type) type);
12827 something_reloaded = true;
12830 gcc_assert (something_reloaded);
12831 return true;
12834 return false;
12837 /* Determine if op is suitable RTX for an address register.
12838 Return naked register if a register or a register subreg is
12839 found, otherwise return NULL_RTX. */
12841 static rtx
12842 ix86_validate_address_register (rtx op)
12844 enum machine_mode mode = GET_MODE (op);
12846 /* Only SImode or DImode registers can form the address. */
12847 if (mode != SImode && mode != DImode)
12848 return NULL_RTX;
12850 if (REG_P (op))
12851 return op;
12852 else if (GET_CODE (op) == SUBREG)
12854 rtx reg = SUBREG_REG (op);
12856 if (!REG_P (reg))
12857 return NULL_RTX;
12859 mode = GET_MODE (reg);
12861 /* Don't allow SUBREGs that span more than a word. It can
12862 lead to spill failures when the register is one word out
12863 of a two word structure. */
12864 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12865 return NULL_RTX;
12867 /* Allow only SUBREGs of non-eliminable hard registers. */
12868 if (register_no_elim_operand (reg, mode))
12869 return reg;
12872 /* Op is not a register. */
12873 return NULL_RTX;
12876 /* Recognizes RTL expressions that are valid memory addresses for an
12877 instruction. The MODE argument is the machine mode for the MEM
12878 expression that wants to use this address.
12880 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12881 convert common non-canonical forms to canonical form so that they will
12882 be recognized. */
12884 static bool
12885 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12886 rtx addr, bool strict)
12888 struct ix86_address parts;
12889 rtx base, index, disp;
12890 HOST_WIDE_INT scale;
12891 enum ix86_address_seg seg;
12893 if (ix86_decompose_address (addr, &parts) <= 0)
12894 /* Decomposition failed. */
12895 return false;
12897 base = parts.base;
12898 index = parts.index;
12899 disp = parts.disp;
12900 scale = parts.scale;
12901 seg = parts.seg;
12903 /* Validate base register. */
12904 if (base)
12906 rtx reg = ix86_validate_address_register (base);
12908 if (reg == NULL_RTX)
12909 return false;
12911 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12912 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12913 /* Base is not valid. */
12914 return false;
12917 /* Validate index register. */
12918 if (index)
12920 rtx reg = ix86_validate_address_register (index);
12922 if (reg == NULL_RTX)
12923 return false;
12925 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12926 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12927 /* Index is not valid. */
12928 return false;
12931 /* Index and base should have the same mode. */
12932 if (base && index
12933 && GET_MODE (base) != GET_MODE (index))
12934 return false;
12936 /* Address override works only on the (%reg) part of %fs:(%reg). */
12937 if (seg != SEG_DEFAULT
12938 && ((base && GET_MODE (base) != word_mode)
12939 || (index && GET_MODE (index) != word_mode)))
12940 return false;
12942 /* Validate scale factor. */
12943 if (scale != 1)
12945 if (!index)
12946 /* Scale without index. */
12947 return false;
12949 if (scale != 2 && scale != 4 && scale != 8)
12950 /* Scale is not a valid multiplier. */
12951 return false;
12954 /* Validate displacement. */
12955 if (disp)
12957 if (GET_CODE (disp) == CONST
12958 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12959 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12960 switch (XINT (XEXP (disp, 0), 1))
12962 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12963 used. While ABI specify also 32bit relocations, we don't produce
12964 them at all and use IP relative instead. */
12965 case UNSPEC_GOT:
12966 case UNSPEC_GOTOFF:
12967 gcc_assert (flag_pic);
12968 if (!TARGET_64BIT)
12969 goto is_legitimate_pic;
12971 /* 64bit address unspec. */
12972 return false;
12974 case UNSPEC_GOTPCREL:
12975 case UNSPEC_PCREL:
12976 gcc_assert (flag_pic);
12977 goto is_legitimate_pic;
12979 case UNSPEC_GOTTPOFF:
12980 case UNSPEC_GOTNTPOFF:
12981 case UNSPEC_INDNTPOFF:
12982 case UNSPEC_NTPOFF:
12983 case UNSPEC_DTPOFF:
12984 break;
12986 case UNSPEC_STACK_CHECK:
12987 gcc_assert (flag_split_stack);
12988 break;
12990 default:
12991 /* Invalid address unspec. */
12992 return false;
12995 else if (SYMBOLIC_CONST (disp)
12996 && (flag_pic
12997 || (TARGET_MACHO
12998 #if TARGET_MACHO
12999 && MACHOPIC_INDIRECT
13000 && !machopic_operand_p (disp)
13001 #endif
13005 is_legitimate_pic:
13006 if (TARGET_64BIT && (index || base))
13008 /* foo@dtpoff(%rX) is ok. */
13009 if (GET_CODE (disp) != CONST
13010 || GET_CODE (XEXP (disp, 0)) != PLUS
13011 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13012 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13013 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13014 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13015 /* Non-constant pic memory reference. */
13016 return false;
13018 else if ((!TARGET_MACHO || flag_pic)
13019 && ! legitimate_pic_address_disp_p (disp))
13020 /* Displacement is an invalid pic construct. */
13021 return false;
13022 #if TARGET_MACHO
13023 else if (MACHO_DYNAMIC_NO_PIC_P
13024 && !ix86_legitimate_constant_p (Pmode, disp))
13025 /* displacment must be referenced via non_lazy_pointer */
13026 return false;
13027 #endif
13029 /* This code used to verify that a symbolic pic displacement
13030 includes the pic_offset_table_rtx register.
13032 While this is good idea, unfortunately these constructs may
13033 be created by "adds using lea" optimization for incorrect
13034 code like:
13036 int a;
13037 int foo(int i)
13039 return *(&a+i);
13042 This code is nonsensical, but results in addressing
13043 GOT table with pic_offset_table_rtx base. We can't
13044 just refuse it easily, since it gets matched by
13045 "addsi3" pattern, that later gets split to lea in the
13046 case output register differs from input. While this
13047 can be handled by separate addsi pattern for this case
13048 that never results in lea, this seems to be easier and
13049 correct fix for crash to disable this test. */
13051 else if (GET_CODE (disp) != LABEL_REF
13052 && !CONST_INT_P (disp)
13053 && (GET_CODE (disp) != CONST
13054 || !ix86_legitimate_constant_p (Pmode, disp))
13055 && (GET_CODE (disp) != SYMBOL_REF
13056 || !ix86_legitimate_constant_p (Pmode, disp)))
13057 /* Displacement is not constant. */
13058 return false;
13059 else if (TARGET_64BIT
13060 && !x86_64_immediate_operand (disp, VOIDmode))
13061 /* Displacement is out of range. */
13062 return false;
13063 /* In x32 mode, constant addresses are sign extended to 64bit, so
13064 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13065 else if (TARGET_X32 && !(index || base)
13066 && CONST_INT_P (disp)
13067 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13068 return false;
13071 /* Everything looks valid. */
13072 return true;
13075 /* Determine if a given RTX is a valid constant address. */
13077 bool
13078 constant_address_p (rtx x)
13080 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13083 /* Return a unique alias set for the GOT. */
13085 static alias_set_type
13086 ix86_GOT_alias_set (void)
13088 static alias_set_type set = -1;
13089 if (set == -1)
13090 set = new_alias_set ();
13091 return set;
13094 /* Return a legitimate reference for ORIG (an address) using the
13095 register REG. If REG is 0, a new pseudo is generated.
13097 There are two types of references that must be handled:
13099 1. Global data references must load the address from the GOT, via
13100 the PIC reg. An insn is emitted to do this load, and the reg is
13101 returned.
13103 2. Static data references, constant pool addresses, and code labels
13104 compute the address as an offset from the GOT, whose base is in
13105 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13106 differentiate them from global data objects. The returned
13107 address is the PIC reg + an unspec constant.
13109 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13110 reg also appears in the address. */
13112 static rtx
13113 legitimize_pic_address (rtx orig, rtx reg)
13115 rtx addr = orig;
13116 rtx new_rtx = orig;
13118 #if TARGET_MACHO
13119 if (TARGET_MACHO && !TARGET_64BIT)
13121 if (reg == 0)
13122 reg = gen_reg_rtx (Pmode);
13123 /* Use the generic Mach-O PIC machinery. */
13124 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13126 #endif
13128 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13130 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13131 if (tmp)
13132 return tmp;
13135 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13136 new_rtx = addr;
13137 else if (TARGET_64BIT && !TARGET_PECOFF
13138 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13140 rtx tmpreg;
13141 /* This symbol may be referenced via a displacement from the PIC
13142 base address (@GOTOFF). */
13144 if (reload_in_progress)
13145 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13146 if (GET_CODE (addr) == CONST)
13147 addr = XEXP (addr, 0);
13148 if (GET_CODE (addr) == PLUS)
13150 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13151 UNSPEC_GOTOFF);
13152 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13154 else
13155 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13156 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13157 if (!reg)
13158 tmpreg = gen_reg_rtx (Pmode);
13159 else
13160 tmpreg = reg;
13161 emit_move_insn (tmpreg, new_rtx);
13163 if (reg != 0)
13165 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13166 tmpreg, 1, OPTAB_DIRECT);
13167 new_rtx = reg;
13169 else
13170 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13172 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13174 /* This symbol may be referenced via a displacement from the PIC
13175 base address (@GOTOFF). */
13177 if (reload_in_progress)
13178 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13179 if (GET_CODE (addr) == CONST)
13180 addr = XEXP (addr, 0);
13181 if (GET_CODE (addr) == PLUS)
13183 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13184 UNSPEC_GOTOFF);
13185 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13187 else
13188 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13189 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13190 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13192 if (reg != 0)
13194 emit_move_insn (reg, new_rtx);
13195 new_rtx = reg;
13198 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13199 /* We can't use @GOTOFF for text labels on VxWorks;
13200 see gotoff_operand. */
13201 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13203 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13204 if (tmp)
13205 return tmp;
13207 /* For x64 PE-COFF there is no GOT table. So we use address
13208 directly. */
13209 if (TARGET_64BIT && TARGET_PECOFF)
13211 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13212 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13214 if (reg == 0)
13215 reg = gen_reg_rtx (Pmode);
13216 emit_move_insn (reg, new_rtx);
13217 new_rtx = reg;
13219 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13221 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13222 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13223 new_rtx = gen_const_mem (Pmode, new_rtx);
13224 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13226 if (reg == 0)
13227 reg = gen_reg_rtx (Pmode);
13228 /* Use directly gen_movsi, otherwise the address is loaded
13229 into register for CSE. We don't want to CSE this addresses,
13230 instead we CSE addresses from the GOT table, so skip this. */
13231 emit_insn (gen_movsi (reg, new_rtx));
13232 new_rtx = reg;
13234 else
13236 /* This symbol must be referenced via a load from the
13237 Global Offset Table (@GOT). */
13239 if (reload_in_progress)
13240 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13241 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13242 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13243 if (TARGET_64BIT)
13244 new_rtx = force_reg (Pmode, new_rtx);
13245 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13246 new_rtx = gen_const_mem (Pmode, new_rtx);
13247 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13249 if (reg == 0)
13250 reg = gen_reg_rtx (Pmode);
13251 emit_move_insn (reg, new_rtx);
13252 new_rtx = reg;
13255 else
13257 if (CONST_INT_P (addr)
13258 && !x86_64_immediate_operand (addr, VOIDmode))
13260 if (reg)
13262 emit_move_insn (reg, addr);
13263 new_rtx = reg;
13265 else
13266 new_rtx = force_reg (Pmode, addr);
13268 else if (GET_CODE (addr) == CONST)
13270 addr = XEXP (addr, 0);
13272 /* We must match stuff we generate before. Assume the only
13273 unspecs that can get here are ours. Not that we could do
13274 anything with them anyway.... */
13275 if (GET_CODE (addr) == UNSPEC
13276 || (GET_CODE (addr) == PLUS
13277 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13278 return orig;
13279 gcc_assert (GET_CODE (addr) == PLUS);
13281 if (GET_CODE (addr) == PLUS)
13283 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13285 /* Check first to see if this is a constant offset from a @GOTOFF
13286 symbol reference. */
13287 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13288 && CONST_INT_P (op1))
13290 if (!TARGET_64BIT)
13292 if (reload_in_progress)
13293 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13294 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13295 UNSPEC_GOTOFF);
13296 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13297 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13298 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13300 if (reg != 0)
13302 emit_move_insn (reg, new_rtx);
13303 new_rtx = reg;
13306 else
13308 if (INTVAL (op1) < -16*1024*1024
13309 || INTVAL (op1) >= 16*1024*1024)
13311 if (!x86_64_immediate_operand (op1, Pmode))
13312 op1 = force_reg (Pmode, op1);
13313 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13317 else
13319 rtx base = legitimize_pic_address (op0, reg);
13320 enum machine_mode mode = GET_MODE (base);
13321 new_rtx
13322 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13324 if (CONST_INT_P (new_rtx))
13326 if (INTVAL (new_rtx) < -16*1024*1024
13327 || INTVAL (new_rtx) >= 16*1024*1024)
13329 if (!x86_64_immediate_operand (new_rtx, mode))
13330 new_rtx = force_reg (mode, new_rtx);
13331 new_rtx
13332 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13334 else
13335 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13337 else
13339 if (GET_CODE (new_rtx) == PLUS
13340 && CONSTANT_P (XEXP (new_rtx, 1)))
13342 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13343 new_rtx = XEXP (new_rtx, 1);
13345 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13350 return new_rtx;
13353 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13355 static rtx
13356 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13358 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13360 if (GET_MODE (tp) != tp_mode)
13362 gcc_assert (GET_MODE (tp) == SImode);
13363 gcc_assert (tp_mode == DImode);
13365 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13368 if (to_reg)
13369 tp = copy_to_mode_reg (tp_mode, tp);
13371 return tp;
13374 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13376 static GTY(()) rtx ix86_tls_symbol;
13378 static rtx
13379 ix86_tls_get_addr (void)
13381 if (!ix86_tls_symbol)
13383 const char *sym
13384 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13385 ? "___tls_get_addr" : "__tls_get_addr");
13387 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13390 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13392 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13393 UNSPEC_PLTOFF);
13394 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13395 gen_rtx_CONST (Pmode, unspec));
13398 return ix86_tls_symbol;
13401 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13403 static GTY(()) rtx ix86_tls_module_base_symbol;
13406 ix86_tls_module_base (void)
13408 if (!ix86_tls_module_base_symbol)
13410 ix86_tls_module_base_symbol
13411 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13413 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13414 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13417 return ix86_tls_module_base_symbol;
13420 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13421 false if we expect this to be used for a memory address and true if
13422 we expect to load the address into a register. */
13424 static rtx
13425 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13427 rtx dest, base, off;
13428 rtx pic = NULL_RTX, tp = NULL_RTX;
13429 enum machine_mode tp_mode = Pmode;
13430 int type;
13432 /* Fall back to global dynamic model if tool chain cannot support local
13433 dynamic. */
13434 if (TARGET_SUN_TLS && !TARGET_64BIT
13435 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13436 && model == TLS_MODEL_LOCAL_DYNAMIC)
13437 model = TLS_MODEL_GLOBAL_DYNAMIC;
13439 switch (model)
13441 case TLS_MODEL_GLOBAL_DYNAMIC:
13442 dest = gen_reg_rtx (Pmode);
13444 if (!TARGET_64BIT)
13446 if (flag_pic && !TARGET_PECOFF)
13447 pic = pic_offset_table_rtx;
13448 else
13450 pic = gen_reg_rtx (Pmode);
13451 emit_insn (gen_set_got (pic));
13455 if (TARGET_GNU2_TLS)
13457 if (TARGET_64BIT)
13458 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13459 else
13460 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13462 tp = get_thread_pointer (Pmode, true);
13463 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13465 if (GET_MODE (x) != Pmode)
13466 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13468 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13470 else
13472 rtx caddr = ix86_tls_get_addr ();
13474 if (TARGET_64BIT)
13476 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13477 rtx insns;
13479 start_sequence ();
13480 emit_call_insn
13481 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13482 insns = get_insns ();
13483 end_sequence ();
13485 if (GET_MODE (x) != Pmode)
13486 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13488 RTL_CONST_CALL_P (insns) = 1;
13489 emit_libcall_block (insns, dest, rax, x);
13491 else
13492 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13494 break;
13496 case TLS_MODEL_LOCAL_DYNAMIC:
13497 base = gen_reg_rtx (Pmode);
13499 if (!TARGET_64BIT)
13501 if (flag_pic)
13502 pic = pic_offset_table_rtx;
13503 else
13505 pic = gen_reg_rtx (Pmode);
13506 emit_insn (gen_set_got (pic));
13510 if (TARGET_GNU2_TLS)
13512 rtx tmp = ix86_tls_module_base ();
13514 if (TARGET_64BIT)
13515 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13516 else
13517 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13519 tp = get_thread_pointer (Pmode, true);
13520 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13521 gen_rtx_MINUS (Pmode, tmp, tp));
13523 else
13525 rtx caddr = ix86_tls_get_addr ();
13527 if (TARGET_64BIT)
13529 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13530 rtx insns, eqv;
13532 start_sequence ();
13533 emit_call_insn
13534 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13535 insns = get_insns ();
13536 end_sequence ();
13538 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13539 share the LD_BASE result with other LD model accesses. */
13540 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13541 UNSPEC_TLS_LD_BASE);
13543 RTL_CONST_CALL_P (insns) = 1;
13544 emit_libcall_block (insns, base, rax, eqv);
13546 else
13547 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13550 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13551 off = gen_rtx_CONST (Pmode, off);
13553 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13555 if (TARGET_GNU2_TLS)
13557 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13559 if (GET_MODE (x) != Pmode)
13560 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13562 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13564 break;
13566 case TLS_MODEL_INITIAL_EXEC:
13567 if (TARGET_64BIT)
13569 if (TARGET_SUN_TLS && !TARGET_X32)
13571 /* The Sun linker took the AMD64 TLS spec literally
13572 and can only handle %rax as destination of the
13573 initial executable code sequence. */
13575 dest = gen_reg_rtx (DImode);
13576 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13577 return dest;
13580 /* Generate DImode references to avoid %fs:(%reg32)
13581 problems and linker IE->LE relaxation bug. */
13582 tp_mode = DImode;
13583 pic = NULL;
13584 type = UNSPEC_GOTNTPOFF;
13586 else if (flag_pic)
13588 if (reload_in_progress)
13589 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13590 pic = pic_offset_table_rtx;
13591 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13593 else if (!TARGET_ANY_GNU_TLS)
13595 pic = gen_reg_rtx (Pmode);
13596 emit_insn (gen_set_got (pic));
13597 type = UNSPEC_GOTTPOFF;
13599 else
13601 pic = NULL;
13602 type = UNSPEC_INDNTPOFF;
13605 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13606 off = gen_rtx_CONST (tp_mode, off);
13607 if (pic)
13608 off = gen_rtx_PLUS (tp_mode, pic, off);
13609 off = gen_const_mem (tp_mode, off);
13610 set_mem_alias_set (off, ix86_GOT_alias_set ());
13612 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13614 base = get_thread_pointer (tp_mode,
13615 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13616 off = force_reg (tp_mode, off);
13617 return gen_rtx_PLUS (tp_mode, base, off);
13619 else
13621 base = get_thread_pointer (Pmode, true);
13622 dest = gen_reg_rtx (Pmode);
13623 emit_insn (ix86_gen_sub3 (dest, base, off));
13625 break;
13627 case TLS_MODEL_LOCAL_EXEC:
13628 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13629 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13630 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13631 off = gen_rtx_CONST (Pmode, off);
13633 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13635 base = get_thread_pointer (Pmode,
13636 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13637 return gen_rtx_PLUS (Pmode, base, off);
13639 else
13641 base = get_thread_pointer (Pmode, true);
13642 dest = gen_reg_rtx (Pmode);
13643 emit_insn (ix86_gen_sub3 (dest, base, off));
13645 break;
13647 default:
13648 gcc_unreachable ();
13651 return dest;
13654 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13655 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13656 unique refptr-DECL symbol corresponding to symbol DECL. */
13658 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13659 htab_t dllimport_map;
13661 static tree
13662 get_dllimport_decl (tree decl, bool beimport)
13664 struct tree_map *h, in;
13665 void **loc;
13666 const char *name;
13667 const char *prefix;
13668 size_t namelen, prefixlen;
13669 char *imp_name;
13670 tree to;
13671 rtx rtl;
13673 if (!dllimport_map)
13674 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13676 in.hash = htab_hash_pointer (decl);
13677 in.base.from = decl;
13678 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13679 h = (struct tree_map *) *loc;
13680 if (h)
13681 return h->to;
13683 *loc = h = ggc_alloc<tree_map> ();
13684 h->hash = in.hash;
13685 h->base.from = decl;
13686 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13687 VAR_DECL, NULL, ptr_type_node);
13688 DECL_ARTIFICIAL (to) = 1;
13689 DECL_IGNORED_P (to) = 1;
13690 DECL_EXTERNAL (to) = 1;
13691 TREE_READONLY (to) = 1;
13693 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13694 name = targetm.strip_name_encoding (name);
13695 if (beimport)
13696 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13697 ? "*__imp_" : "*__imp__";
13698 else
13699 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13700 namelen = strlen (name);
13701 prefixlen = strlen (prefix);
13702 imp_name = (char *) alloca (namelen + prefixlen + 1);
13703 memcpy (imp_name, prefix, prefixlen);
13704 memcpy (imp_name + prefixlen, name, namelen + 1);
13706 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13707 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13708 SET_SYMBOL_REF_DECL (rtl, to);
13709 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13710 if (!beimport)
13712 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13713 #ifdef SUB_TARGET_RECORD_STUB
13714 SUB_TARGET_RECORD_STUB (name);
13715 #endif
13718 rtl = gen_const_mem (Pmode, rtl);
13719 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13721 SET_DECL_RTL (to, rtl);
13722 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13724 return to;
13727 /* Expand SYMBOL into its corresponding far-addresse symbol.
13728 WANT_REG is true if we require the result be a register. */
13730 static rtx
13731 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13733 tree imp_decl;
13734 rtx x;
13736 gcc_assert (SYMBOL_REF_DECL (symbol));
13737 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13739 x = DECL_RTL (imp_decl);
13740 if (want_reg)
13741 x = force_reg (Pmode, x);
13742 return x;
13745 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13746 true if we require the result be a register. */
13748 static rtx
13749 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13751 tree imp_decl;
13752 rtx x;
13754 gcc_assert (SYMBOL_REF_DECL (symbol));
13755 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13757 x = DECL_RTL (imp_decl);
13758 if (want_reg)
13759 x = force_reg (Pmode, x);
13760 return x;
13763 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13764 is true if we require the result be a register. */
13766 static rtx
13767 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13769 if (!TARGET_PECOFF)
13770 return NULL_RTX;
13772 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13774 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13775 return legitimize_dllimport_symbol (addr, inreg);
13776 if (GET_CODE (addr) == CONST
13777 && GET_CODE (XEXP (addr, 0)) == PLUS
13778 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13779 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13781 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13782 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13786 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13787 return NULL_RTX;
13788 if (GET_CODE (addr) == SYMBOL_REF
13789 && !is_imported_p (addr)
13790 && SYMBOL_REF_EXTERNAL_P (addr)
13791 && SYMBOL_REF_DECL (addr))
13792 return legitimize_pe_coff_extern_decl (addr, inreg);
13794 if (GET_CODE (addr) == CONST
13795 && GET_CODE (XEXP (addr, 0)) == PLUS
13796 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13797 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13798 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13799 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13801 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13802 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13804 return NULL_RTX;
13807 /* Try machine-dependent ways of modifying an illegitimate address
13808 to be legitimate. If we find one, return the new, valid address.
13809 This macro is used in only one place: `memory_address' in explow.c.
13811 OLDX is the address as it was before break_out_memory_refs was called.
13812 In some cases it is useful to look at this to decide what needs to be done.
13814 It is always safe for this macro to do nothing. It exists to recognize
13815 opportunities to optimize the output.
13817 For the 80386, we handle X+REG by loading X into a register R and
13818 using R+REG. R will go in a general reg and indexing will be used.
13819 However, if REG is a broken-out memory address or multiplication,
13820 nothing needs to be done because REG can certainly go in a general reg.
13822 When -fpic is used, special handling is needed for symbolic references.
13823 See comments by legitimize_pic_address in i386.c for details. */
13825 static rtx
13826 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13827 enum machine_mode mode)
13829 int changed = 0;
13830 unsigned log;
13832 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13833 if (log)
13834 return legitimize_tls_address (x, (enum tls_model) log, false);
13835 if (GET_CODE (x) == CONST
13836 && GET_CODE (XEXP (x, 0)) == PLUS
13837 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13838 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13840 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13841 (enum tls_model) log, false);
13842 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13845 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13847 rtx tmp = legitimize_pe_coff_symbol (x, true);
13848 if (tmp)
13849 return tmp;
13852 if (flag_pic && SYMBOLIC_CONST (x))
13853 return legitimize_pic_address (x, 0);
13855 #if TARGET_MACHO
13856 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13857 return machopic_indirect_data_reference (x, 0);
13858 #endif
13860 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13861 if (GET_CODE (x) == ASHIFT
13862 && CONST_INT_P (XEXP (x, 1))
13863 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13865 changed = 1;
13866 log = INTVAL (XEXP (x, 1));
13867 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13868 GEN_INT (1 << log));
13871 if (GET_CODE (x) == PLUS)
13873 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13875 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13876 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13877 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13879 changed = 1;
13880 log = INTVAL (XEXP (XEXP (x, 0), 1));
13881 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13882 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13883 GEN_INT (1 << log));
13886 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13887 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13888 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13890 changed = 1;
13891 log = INTVAL (XEXP (XEXP (x, 1), 1));
13892 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13893 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13894 GEN_INT (1 << log));
13897 /* Put multiply first if it isn't already. */
13898 if (GET_CODE (XEXP (x, 1)) == MULT)
13900 rtx tmp = XEXP (x, 0);
13901 XEXP (x, 0) = XEXP (x, 1);
13902 XEXP (x, 1) = tmp;
13903 changed = 1;
13906 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13907 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13908 created by virtual register instantiation, register elimination, and
13909 similar optimizations. */
13910 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13912 changed = 1;
13913 x = gen_rtx_PLUS (Pmode,
13914 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13915 XEXP (XEXP (x, 1), 0)),
13916 XEXP (XEXP (x, 1), 1));
13919 /* Canonicalize
13920 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13921 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13922 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13923 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13924 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13925 && CONSTANT_P (XEXP (x, 1)))
13927 rtx constant;
13928 rtx other = NULL_RTX;
13930 if (CONST_INT_P (XEXP (x, 1)))
13932 constant = XEXP (x, 1);
13933 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13935 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13937 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13938 other = XEXP (x, 1);
13940 else
13941 constant = 0;
13943 if (constant)
13945 changed = 1;
13946 x = gen_rtx_PLUS (Pmode,
13947 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13948 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13949 plus_constant (Pmode, other,
13950 INTVAL (constant)));
13954 if (changed && ix86_legitimate_address_p (mode, x, false))
13955 return x;
13957 if (GET_CODE (XEXP (x, 0)) == MULT)
13959 changed = 1;
13960 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13963 if (GET_CODE (XEXP (x, 1)) == MULT)
13965 changed = 1;
13966 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13969 if (changed
13970 && REG_P (XEXP (x, 1))
13971 && REG_P (XEXP (x, 0)))
13972 return x;
13974 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13976 changed = 1;
13977 x = legitimize_pic_address (x, 0);
13980 if (changed && ix86_legitimate_address_p (mode, x, false))
13981 return x;
13983 if (REG_P (XEXP (x, 0)))
13985 rtx temp = gen_reg_rtx (Pmode);
13986 rtx val = force_operand (XEXP (x, 1), temp);
13987 if (val != temp)
13989 val = convert_to_mode (Pmode, val, 1);
13990 emit_move_insn (temp, val);
13993 XEXP (x, 1) = temp;
13994 return x;
13997 else if (REG_P (XEXP (x, 1)))
13999 rtx temp = gen_reg_rtx (Pmode);
14000 rtx val = force_operand (XEXP (x, 0), temp);
14001 if (val != temp)
14003 val = convert_to_mode (Pmode, val, 1);
14004 emit_move_insn (temp, val);
14007 XEXP (x, 0) = temp;
14008 return x;
14012 return x;
14015 /* Print an integer constant expression in assembler syntax. Addition
14016 and subtraction are the only arithmetic that may appear in these
14017 expressions. FILE is the stdio stream to write to, X is the rtx, and
14018 CODE is the operand print code from the output string. */
14020 static void
14021 output_pic_addr_const (FILE *file, rtx x, int code)
14023 char buf[256];
14025 switch (GET_CODE (x))
14027 case PC:
14028 gcc_assert (flag_pic);
14029 putc ('.', file);
14030 break;
14032 case SYMBOL_REF:
14033 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14034 output_addr_const (file, x);
14035 else
14037 const char *name = XSTR (x, 0);
14039 /* Mark the decl as referenced so that cgraph will
14040 output the function. */
14041 if (SYMBOL_REF_DECL (x))
14042 mark_decl_referenced (SYMBOL_REF_DECL (x));
14044 #if TARGET_MACHO
14045 if (MACHOPIC_INDIRECT
14046 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14047 name = machopic_indirection_name (x, /*stub_p=*/true);
14048 #endif
14049 assemble_name (file, name);
14051 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14052 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14053 fputs ("@PLT", file);
14054 break;
14056 case LABEL_REF:
14057 x = XEXP (x, 0);
14058 /* FALLTHRU */
14059 case CODE_LABEL:
14060 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14061 assemble_name (asm_out_file, buf);
14062 break;
14064 case CONST_INT:
14065 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14066 break;
14068 case CONST:
14069 /* This used to output parentheses around the expression,
14070 but that does not work on the 386 (either ATT or BSD assembler). */
14071 output_pic_addr_const (file, XEXP (x, 0), code);
14072 break;
14074 case CONST_DOUBLE:
14075 if (GET_MODE (x) == VOIDmode)
14077 /* We can use %d if the number is <32 bits and positive. */
14078 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14079 fprintf (file, "0x%lx%08lx",
14080 (unsigned long) CONST_DOUBLE_HIGH (x),
14081 (unsigned long) CONST_DOUBLE_LOW (x));
14082 else
14083 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14085 else
14086 /* We can't handle floating point constants;
14087 TARGET_PRINT_OPERAND must handle them. */
14088 output_operand_lossage ("floating constant misused");
14089 break;
14091 case PLUS:
14092 /* Some assemblers need integer constants to appear first. */
14093 if (CONST_INT_P (XEXP (x, 0)))
14095 output_pic_addr_const (file, XEXP (x, 0), code);
14096 putc ('+', file);
14097 output_pic_addr_const (file, XEXP (x, 1), code);
14099 else
14101 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14102 output_pic_addr_const (file, XEXP (x, 1), code);
14103 putc ('+', file);
14104 output_pic_addr_const (file, XEXP (x, 0), code);
14106 break;
14108 case MINUS:
14109 if (!TARGET_MACHO)
14110 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14111 output_pic_addr_const (file, XEXP (x, 0), code);
14112 putc ('-', file);
14113 output_pic_addr_const (file, XEXP (x, 1), code);
14114 if (!TARGET_MACHO)
14115 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14116 break;
14118 case UNSPEC:
14119 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14121 bool f = i386_asm_output_addr_const_extra (file, x);
14122 gcc_assert (f);
14123 break;
14126 gcc_assert (XVECLEN (x, 0) == 1);
14127 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14128 switch (XINT (x, 1))
14130 case UNSPEC_GOT:
14131 fputs ("@GOT", file);
14132 break;
14133 case UNSPEC_GOTOFF:
14134 fputs ("@GOTOFF", file);
14135 break;
14136 case UNSPEC_PLTOFF:
14137 fputs ("@PLTOFF", file);
14138 break;
14139 case UNSPEC_PCREL:
14140 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14141 "(%rip)" : "[rip]", file);
14142 break;
14143 case UNSPEC_GOTPCREL:
14144 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14145 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14146 break;
14147 case UNSPEC_GOTTPOFF:
14148 /* FIXME: This might be @TPOFF in Sun ld too. */
14149 fputs ("@gottpoff", file);
14150 break;
14151 case UNSPEC_TPOFF:
14152 fputs ("@tpoff", file);
14153 break;
14154 case UNSPEC_NTPOFF:
14155 if (TARGET_64BIT)
14156 fputs ("@tpoff", file);
14157 else
14158 fputs ("@ntpoff", file);
14159 break;
14160 case UNSPEC_DTPOFF:
14161 fputs ("@dtpoff", file);
14162 break;
14163 case UNSPEC_GOTNTPOFF:
14164 if (TARGET_64BIT)
14165 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14166 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14167 else
14168 fputs ("@gotntpoff", file);
14169 break;
14170 case UNSPEC_INDNTPOFF:
14171 fputs ("@indntpoff", file);
14172 break;
14173 #if TARGET_MACHO
14174 case UNSPEC_MACHOPIC_OFFSET:
14175 putc ('-', file);
14176 machopic_output_function_base_name (file);
14177 break;
14178 #endif
14179 default:
14180 output_operand_lossage ("invalid UNSPEC as operand");
14181 break;
14183 break;
14185 default:
14186 output_operand_lossage ("invalid expression as operand");
14190 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14191 We need to emit DTP-relative relocations. */
14193 static void ATTRIBUTE_UNUSED
14194 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14196 fputs (ASM_LONG, file);
14197 output_addr_const (file, x);
14198 fputs ("@dtpoff", file);
14199 switch (size)
14201 case 4:
14202 break;
14203 case 8:
14204 fputs (", 0", file);
14205 break;
14206 default:
14207 gcc_unreachable ();
14211 /* Return true if X is a representation of the PIC register. This copes
14212 with calls from ix86_find_base_term, where the register might have
14213 been replaced by a cselib value. */
14215 static bool
14216 ix86_pic_register_p (rtx x)
14218 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14219 return (pic_offset_table_rtx
14220 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14221 else
14222 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14225 /* Helper function for ix86_delegitimize_address.
14226 Attempt to delegitimize TLS local-exec accesses. */
14228 static rtx
14229 ix86_delegitimize_tls_address (rtx orig_x)
14231 rtx x = orig_x, unspec;
14232 struct ix86_address addr;
14234 if (!TARGET_TLS_DIRECT_SEG_REFS)
14235 return orig_x;
14236 if (MEM_P (x))
14237 x = XEXP (x, 0);
14238 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14239 return orig_x;
14240 if (ix86_decompose_address (x, &addr) == 0
14241 || addr.seg != DEFAULT_TLS_SEG_REG
14242 || addr.disp == NULL_RTX
14243 || GET_CODE (addr.disp) != CONST)
14244 return orig_x;
14245 unspec = XEXP (addr.disp, 0);
14246 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14247 unspec = XEXP (unspec, 0);
14248 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14249 return orig_x;
14250 x = XVECEXP (unspec, 0, 0);
14251 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14252 if (unspec != XEXP (addr.disp, 0))
14253 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14254 if (addr.index)
14256 rtx idx = addr.index;
14257 if (addr.scale != 1)
14258 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14259 x = gen_rtx_PLUS (Pmode, idx, x);
14261 if (addr.base)
14262 x = gen_rtx_PLUS (Pmode, addr.base, x);
14263 if (MEM_P (orig_x))
14264 x = replace_equiv_address_nv (orig_x, x);
14265 return x;
14268 /* In the name of slightly smaller debug output, and to cater to
14269 general assembler lossage, recognize PIC+GOTOFF and turn it back
14270 into a direct symbol reference.
14272 On Darwin, this is necessary to avoid a crash, because Darwin
14273 has a different PIC label for each routine but the DWARF debugging
14274 information is not associated with any particular routine, so it's
14275 necessary to remove references to the PIC label from RTL stored by
14276 the DWARF output code. */
14278 static rtx
14279 ix86_delegitimize_address (rtx x)
14281 rtx orig_x = delegitimize_mem_from_attrs (x);
14282 /* addend is NULL or some rtx if x is something+GOTOFF where
14283 something doesn't include the PIC register. */
14284 rtx addend = NULL_RTX;
14285 /* reg_addend is NULL or a multiple of some register. */
14286 rtx reg_addend = NULL_RTX;
14287 /* const_addend is NULL or a const_int. */
14288 rtx const_addend = NULL_RTX;
14289 /* This is the result, or NULL. */
14290 rtx result = NULL_RTX;
14292 x = orig_x;
14294 if (MEM_P (x))
14295 x = XEXP (x, 0);
14297 if (TARGET_64BIT)
14299 if (GET_CODE (x) == CONST
14300 && GET_CODE (XEXP (x, 0)) == PLUS
14301 && GET_MODE (XEXP (x, 0)) == Pmode
14302 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14303 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14304 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14306 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14307 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14308 if (MEM_P (orig_x))
14309 x = replace_equiv_address_nv (orig_x, x);
14310 return x;
14313 if (GET_CODE (x) == CONST
14314 && GET_CODE (XEXP (x, 0)) == UNSPEC
14315 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14316 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14317 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14319 x = XVECEXP (XEXP (x, 0), 0, 0);
14320 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14322 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14323 GET_MODE (x), 0);
14324 if (x == NULL_RTX)
14325 return orig_x;
14327 return x;
14330 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14331 return ix86_delegitimize_tls_address (orig_x);
14333 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14334 and -mcmodel=medium -fpic. */
14337 if (GET_CODE (x) != PLUS
14338 || GET_CODE (XEXP (x, 1)) != CONST)
14339 return ix86_delegitimize_tls_address (orig_x);
14341 if (ix86_pic_register_p (XEXP (x, 0)))
14342 /* %ebx + GOT/GOTOFF */
14344 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14346 /* %ebx + %reg * scale + GOT/GOTOFF */
14347 reg_addend = XEXP (x, 0);
14348 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14349 reg_addend = XEXP (reg_addend, 1);
14350 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14351 reg_addend = XEXP (reg_addend, 0);
14352 else
14354 reg_addend = NULL_RTX;
14355 addend = XEXP (x, 0);
14358 else
14359 addend = XEXP (x, 0);
14361 x = XEXP (XEXP (x, 1), 0);
14362 if (GET_CODE (x) == PLUS
14363 && CONST_INT_P (XEXP (x, 1)))
14365 const_addend = XEXP (x, 1);
14366 x = XEXP (x, 0);
14369 if (GET_CODE (x) == UNSPEC
14370 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14371 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14372 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14373 && !MEM_P (orig_x) && !addend)))
14374 result = XVECEXP (x, 0, 0);
14376 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14377 && !MEM_P (orig_x))
14378 result = XVECEXP (x, 0, 0);
14380 if (! result)
14381 return ix86_delegitimize_tls_address (orig_x);
14383 if (const_addend)
14384 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14385 if (reg_addend)
14386 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14387 if (addend)
14389 /* If the rest of original X doesn't involve the PIC register, add
14390 addend and subtract pic_offset_table_rtx. This can happen e.g.
14391 for code like:
14392 leal (%ebx, %ecx, 4), %ecx
14394 movl foo@GOTOFF(%ecx), %edx
14395 in which case we return (%ecx - %ebx) + foo. */
14396 if (pic_offset_table_rtx)
14397 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14398 pic_offset_table_rtx),
14399 result);
14400 else
14401 return orig_x;
14403 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14405 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14406 if (result == NULL_RTX)
14407 return orig_x;
14409 return result;
14412 /* If X is a machine specific address (i.e. a symbol or label being
14413 referenced as a displacement from the GOT implemented using an
14414 UNSPEC), then return the base term. Otherwise return X. */
14417 ix86_find_base_term (rtx x)
14419 rtx term;
14421 if (TARGET_64BIT)
14423 if (GET_CODE (x) != CONST)
14424 return x;
14425 term = XEXP (x, 0);
14426 if (GET_CODE (term) == PLUS
14427 && (CONST_INT_P (XEXP (term, 1))
14428 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14429 term = XEXP (term, 0);
14430 if (GET_CODE (term) != UNSPEC
14431 || (XINT (term, 1) != UNSPEC_GOTPCREL
14432 && XINT (term, 1) != UNSPEC_PCREL))
14433 return x;
14435 return XVECEXP (term, 0, 0);
14438 return ix86_delegitimize_address (x);
14441 static void
14442 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14443 bool fp, FILE *file)
14445 const char *suffix;
14447 if (mode == CCFPmode || mode == CCFPUmode)
14449 code = ix86_fp_compare_code_to_integer (code);
14450 mode = CCmode;
14452 if (reverse)
14453 code = reverse_condition (code);
14455 switch (code)
14457 case EQ:
14458 switch (mode)
14460 case CCAmode:
14461 suffix = "a";
14462 break;
14464 case CCCmode:
14465 suffix = "c";
14466 break;
14468 case CCOmode:
14469 suffix = "o";
14470 break;
14472 case CCSmode:
14473 suffix = "s";
14474 break;
14476 default:
14477 suffix = "e";
14479 break;
14480 case NE:
14481 switch (mode)
14483 case CCAmode:
14484 suffix = "na";
14485 break;
14487 case CCCmode:
14488 suffix = "nc";
14489 break;
14491 case CCOmode:
14492 suffix = "no";
14493 break;
14495 case CCSmode:
14496 suffix = "ns";
14497 break;
14499 default:
14500 suffix = "ne";
14502 break;
14503 case GT:
14504 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14505 suffix = "g";
14506 break;
14507 case GTU:
14508 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14509 Those same assemblers have the same but opposite lossage on cmov. */
14510 if (mode == CCmode)
14511 suffix = fp ? "nbe" : "a";
14512 else
14513 gcc_unreachable ();
14514 break;
14515 case LT:
14516 switch (mode)
14518 case CCNOmode:
14519 case CCGOCmode:
14520 suffix = "s";
14521 break;
14523 case CCmode:
14524 case CCGCmode:
14525 suffix = "l";
14526 break;
14528 default:
14529 gcc_unreachable ();
14531 break;
14532 case LTU:
14533 if (mode == CCmode)
14534 suffix = "b";
14535 else if (mode == CCCmode)
14536 suffix = "c";
14537 else
14538 gcc_unreachable ();
14539 break;
14540 case GE:
14541 switch (mode)
14543 case CCNOmode:
14544 case CCGOCmode:
14545 suffix = "ns";
14546 break;
14548 case CCmode:
14549 case CCGCmode:
14550 suffix = "ge";
14551 break;
14553 default:
14554 gcc_unreachable ();
14556 break;
14557 case GEU:
14558 if (mode == CCmode)
14559 suffix = fp ? "nb" : "ae";
14560 else if (mode == CCCmode)
14561 suffix = "nc";
14562 else
14563 gcc_unreachable ();
14564 break;
14565 case LE:
14566 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14567 suffix = "le";
14568 break;
14569 case LEU:
14570 if (mode == CCmode)
14571 suffix = "be";
14572 else
14573 gcc_unreachable ();
14574 break;
14575 case UNORDERED:
14576 suffix = fp ? "u" : "p";
14577 break;
14578 case ORDERED:
14579 suffix = fp ? "nu" : "np";
14580 break;
14581 default:
14582 gcc_unreachable ();
14584 fputs (suffix, file);
14587 /* Print the name of register X to FILE based on its machine mode and number.
14588 If CODE is 'w', pretend the mode is HImode.
14589 If CODE is 'b', pretend the mode is QImode.
14590 If CODE is 'k', pretend the mode is SImode.
14591 If CODE is 'q', pretend the mode is DImode.
14592 If CODE is 'x', pretend the mode is V4SFmode.
14593 If CODE is 't', pretend the mode is V8SFmode.
14594 If CODE is 'g', pretend the mode is V16SFmode.
14595 If CODE is 'h', pretend the reg is the 'high' byte register.
14596 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14597 If CODE is 'd', duplicate the operand for AVX instruction.
14600 void
14601 print_reg (rtx x, int code, FILE *file)
14603 const char *reg;
14604 unsigned int regno;
14605 bool duplicated = code == 'd' && TARGET_AVX;
14607 if (ASSEMBLER_DIALECT == ASM_ATT)
14608 putc ('%', file);
14610 if (x == pc_rtx)
14612 gcc_assert (TARGET_64BIT);
14613 fputs ("rip", file);
14614 return;
14617 regno = true_regnum (x);
14618 gcc_assert (regno != ARG_POINTER_REGNUM
14619 && regno != FRAME_POINTER_REGNUM
14620 && regno != FLAGS_REG
14621 && regno != FPSR_REG
14622 && regno != FPCR_REG);
14624 if (code == 'w' || MMX_REG_P (x))
14625 code = 2;
14626 else if (code == 'b')
14627 code = 1;
14628 else if (code == 'k')
14629 code = 4;
14630 else if (code == 'q')
14631 code = 8;
14632 else if (code == 'y')
14633 code = 3;
14634 else if (code == 'h')
14635 code = 0;
14636 else if (code == 'x')
14637 code = 16;
14638 else if (code == 't')
14639 code = 32;
14640 else if (code == 'g')
14641 code = 64;
14642 else
14643 code = GET_MODE_SIZE (GET_MODE (x));
14645 /* Irritatingly, AMD extended registers use different naming convention
14646 from the normal registers: "r%d[bwd]" */
14647 if (REX_INT_REGNO_P (regno))
14649 gcc_assert (TARGET_64BIT);
14650 putc ('r', file);
14651 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14652 switch (code)
14654 case 0:
14655 error ("extended registers have no high halves");
14656 break;
14657 case 1:
14658 putc ('b', file);
14659 break;
14660 case 2:
14661 putc ('w', file);
14662 break;
14663 case 4:
14664 putc ('d', file);
14665 break;
14666 case 8:
14667 /* no suffix */
14668 break;
14669 default:
14670 error ("unsupported operand size for extended register");
14671 break;
14673 return;
14676 reg = NULL;
14677 switch (code)
14679 case 3:
14680 if (STACK_TOP_P (x))
14682 reg = "st(0)";
14683 break;
14685 /* FALLTHRU */
14686 case 8:
14687 case 4:
14688 case 12:
14689 if (! ANY_FP_REG_P (x))
14690 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14691 /* FALLTHRU */
14692 case 16:
14693 case 2:
14694 normal:
14695 reg = hi_reg_name[regno];
14696 break;
14697 case 1:
14698 if (regno >= ARRAY_SIZE (qi_reg_name))
14699 goto normal;
14700 reg = qi_reg_name[regno];
14701 break;
14702 case 0:
14703 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14704 goto normal;
14705 reg = qi_high_reg_name[regno];
14706 break;
14707 case 32:
14708 if (SSE_REG_P (x))
14710 gcc_assert (!duplicated);
14711 putc ('y', file);
14712 fputs (hi_reg_name[regno] + 1, file);
14713 return;
14715 case 64:
14716 if (SSE_REG_P (x))
14718 gcc_assert (!duplicated);
14719 putc ('z', file);
14720 fputs (hi_reg_name[REGNO (x)] + 1, file);
14721 return;
14723 break;
14724 default:
14725 gcc_unreachable ();
14728 fputs (reg, file);
14729 if (duplicated)
14731 if (ASSEMBLER_DIALECT == ASM_ATT)
14732 fprintf (file, ", %%%s", reg);
14733 else
14734 fprintf (file, ", %s", reg);
14738 /* Locate some local-dynamic symbol still in use by this function
14739 so that we can print its name in some tls_local_dynamic_base
14740 pattern. */
14742 static int
14743 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14745 rtx x = *px;
14747 if (GET_CODE (x) == SYMBOL_REF
14748 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14750 cfun->machine->some_ld_name = XSTR (x, 0);
14751 return 1;
14754 return 0;
14757 static const char *
14758 get_some_local_dynamic_name (void)
14760 rtx insn;
14762 if (cfun->machine->some_ld_name)
14763 return cfun->machine->some_ld_name;
14765 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14766 if (NONDEBUG_INSN_P (insn)
14767 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14768 return cfun->machine->some_ld_name;
14770 return NULL;
14773 /* Meaning of CODE:
14774 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14775 C -- print opcode suffix for set/cmov insn.
14776 c -- like C, but print reversed condition
14777 F,f -- likewise, but for floating-point.
14778 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14779 otherwise nothing
14780 R -- print embeded rounding and sae.
14781 r -- print only sae.
14782 z -- print the opcode suffix for the size of the current operand.
14783 Z -- likewise, with special suffixes for x87 instructions.
14784 * -- print a star (in certain assembler syntax)
14785 A -- print an absolute memory reference.
14786 E -- print address with DImode register names if TARGET_64BIT.
14787 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14788 s -- print a shift double count, followed by the assemblers argument
14789 delimiter.
14790 b -- print the QImode name of the register for the indicated operand.
14791 %b0 would print %al if operands[0] is reg 0.
14792 w -- likewise, print the HImode name of the register.
14793 k -- likewise, print the SImode name of the register.
14794 q -- likewise, print the DImode name of the register.
14795 x -- likewise, print the V4SFmode name of the register.
14796 t -- likewise, print the V8SFmode name of the register.
14797 g -- likewise, print the V16SFmode name of the register.
14798 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14799 y -- print "st(0)" instead of "st" as a register.
14800 d -- print duplicated register operand for AVX instruction.
14801 D -- print condition for SSE cmp instruction.
14802 P -- if PIC, print an @PLT suffix.
14803 p -- print raw symbol name.
14804 X -- don't print any sort of PIC '@' suffix for a symbol.
14805 & -- print some in-use local-dynamic symbol name.
14806 H -- print a memory address offset by 8; used for sse high-parts
14807 Y -- print condition for XOP pcom* instruction.
14808 + -- print a branch hint as 'cs' or 'ds' prefix
14809 ; -- print a semicolon (after prefixes due to bug in older gas).
14810 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14811 @ -- print a segment register of thread base pointer load
14812 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14815 void
14816 ix86_print_operand (FILE *file, rtx x, int code)
14818 if (code)
14820 switch (code)
14822 case 'A':
14823 switch (ASSEMBLER_DIALECT)
14825 case ASM_ATT:
14826 putc ('*', file);
14827 break;
14829 case ASM_INTEL:
14830 /* Intel syntax. For absolute addresses, registers should not
14831 be surrounded by braces. */
14832 if (!REG_P (x))
14834 putc ('[', file);
14835 ix86_print_operand (file, x, 0);
14836 putc (']', file);
14837 return;
14839 break;
14841 default:
14842 gcc_unreachable ();
14845 ix86_print_operand (file, x, 0);
14846 return;
14848 case 'E':
14849 /* Wrap address in an UNSPEC to declare special handling. */
14850 if (TARGET_64BIT)
14851 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14853 output_address (x);
14854 return;
14856 case 'L':
14857 if (ASSEMBLER_DIALECT == ASM_ATT)
14858 putc ('l', file);
14859 return;
14861 case 'W':
14862 if (ASSEMBLER_DIALECT == ASM_ATT)
14863 putc ('w', file);
14864 return;
14866 case 'B':
14867 if (ASSEMBLER_DIALECT == ASM_ATT)
14868 putc ('b', file);
14869 return;
14871 case 'Q':
14872 if (ASSEMBLER_DIALECT == ASM_ATT)
14873 putc ('l', file);
14874 return;
14876 case 'S':
14877 if (ASSEMBLER_DIALECT == ASM_ATT)
14878 putc ('s', file);
14879 return;
14881 case 'T':
14882 if (ASSEMBLER_DIALECT == ASM_ATT)
14883 putc ('t', file);
14884 return;
14886 case 'O':
14887 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14888 if (ASSEMBLER_DIALECT != ASM_ATT)
14889 return;
14891 switch (GET_MODE_SIZE (GET_MODE (x)))
14893 case 2:
14894 putc ('w', file);
14895 break;
14897 case 4:
14898 putc ('l', file);
14899 break;
14901 case 8:
14902 putc ('q', file);
14903 break;
14905 default:
14906 output_operand_lossage
14907 ("invalid operand size for operand code 'O'");
14908 return;
14911 putc ('.', file);
14912 #endif
14913 return;
14915 case 'z':
14916 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14918 /* Opcodes don't get size suffixes if using Intel opcodes. */
14919 if (ASSEMBLER_DIALECT == ASM_INTEL)
14920 return;
14922 switch (GET_MODE_SIZE (GET_MODE (x)))
14924 case 1:
14925 putc ('b', file);
14926 return;
14928 case 2:
14929 putc ('w', file);
14930 return;
14932 case 4:
14933 putc ('l', file);
14934 return;
14936 case 8:
14937 putc ('q', file);
14938 return;
14940 default:
14941 output_operand_lossage
14942 ("invalid operand size for operand code 'z'");
14943 return;
14947 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14948 warning
14949 (0, "non-integer operand used with operand code 'z'");
14950 /* FALLTHRU */
14952 case 'Z':
14953 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14954 if (ASSEMBLER_DIALECT == ASM_INTEL)
14955 return;
14957 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14959 switch (GET_MODE_SIZE (GET_MODE (x)))
14961 case 2:
14962 #ifdef HAVE_AS_IX86_FILDS
14963 putc ('s', file);
14964 #endif
14965 return;
14967 case 4:
14968 putc ('l', file);
14969 return;
14971 case 8:
14972 #ifdef HAVE_AS_IX86_FILDQ
14973 putc ('q', file);
14974 #else
14975 fputs ("ll", file);
14976 #endif
14977 return;
14979 default:
14980 break;
14983 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14985 /* 387 opcodes don't get size suffixes
14986 if the operands are registers. */
14987 if (STACK_REG_P (x))
14988 return;
14990 switch (GET_MODE_SIZE (GET_MODE (x)))
14992 case 4:
14993 putc ('s', file);
14994 return;
14996 case 8:
14997 putc ('l', file);
14998 return;
15000 case 12:
15001 case 16:
15002 putc ('t', file);
15003 return;
15005 default:
15006 break;
15009 else
15011 output_operand_lossage
15012 ("invalid operand type used with operand code 'Z'");
15013 return;
15016 output_operand_lossage
15017 ("invalid operand size for operand code 'Z'");
15018 return;
15020 case 'd':
15021 case 'b':
15022 case 'w':
15023 case 'k':
15024 case 'q':
15025 case 'h':
15026 case 't':
15027 case 'g':
15028 case 'y':
15029 case 'x':
15030 case 'X':
15031 case 'P':
15032 case 'p':
15033 break;
15035 case 's':
15036 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15038 ix86_print_operand (file, x, 0);
15039 fputs (", ", file);
15041 return;
15043 case 'Y':
15044 switch (GET_CODE (x))
15046 case NE:
15047 fputs ("neq", file);
15048 break;
15049 case EQ:
15050 fputs ("eq", file);
15051 break;
15052 case GE:
15053 case GEU:
15054 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15055 break;
15056 case GT:
15057 case GTU:
15058 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15059 break;
15060 case LE:
15061 case LEU:
15062 fputs ("le", file);
15063 break;
15064 case LT:
15065 case LTU:
15066 fputs ("lt", file);
15067 break;
15068 case UNORDERED:
15069 fputs ("unord", file);
15070 break;
15071 case ORDERED:
15072 fputs ("ord", file);
15073 break;
15074 case UNEQ:
15075 fputs ("ueq", file);
15076 break;
15077 case UNGE:
15078 fputs ("nlt", file);
15079 break;
15080 case UNGT:
15081 fputs ("nle", file);
15082 break;
15083 case UNLE:
15084 fputs ("ule", file);
15085 break;
15086 case UNLT:
15087 fputs ("ult", file);
15088 break;
15089 case LTGT:
15090 fputs ("une", file);
15091 break;
15092 default:
15093 output_operand_lossage ("operand is not a condition code, "
15094 "invalid operand code 'Y'");
15095 return;
15097 return;
15099 case 'D':
15100 /* Little bit of braindamage here. The SSE compare instructions
15101 does use completely different names for the comparisons that the
15102 fp conditional moves. */
15103 switch (GET_CODE (x))
15105 case UNEQ:
15106 if (TARGET_AVX)
15108 fputs ("eq_us", file);
15109 break;
15111 case EQ:
15112 fputs ("eq", file);
15113 break;
15114 case UNLT:
15115 if (TARGET_AVX)
15117 fputs ("nge", file);
15118 break;
15120 case LT:
15121 fputs ("lt", file);
15122 break;
15123 case UNLE:
15124 if (TARGET_AVX)
15126 fputs ("ngt", file);
15127 break;
15129 case LE:
15130 fputs ("le", file);
15131 break;
15132 case UNORDERED:
15133 fputs ("unord", file);
15134 break;
15135 case LTGT:
15136 if (TARGET_AVX)
15138 fputs ("neq_oq", file);
15139 break;
15141 case NE:
15142 fputs ("neq", file);
15143 break;
15144 case GE:
15145 if (TARGET_AVX)
15147 fputs ("ge", file);
15148 break;
15150 case UNGE:
15151 fputs ("nlt", file);
15152 break;
15153 case GT:
15154 if (TARGET_AVX)
15156 fputs ("gt", file);
15157 break;
15159 case UNGT:
15160 fputs ("nle", file);
15161 break;
15162 case ORDERED:
15163 fputs ("ord", file);
15164 break;
15165 default:
15166 output_operand_lossage ("operand is not a condition code, "
15167 "invalid operand code 'D'");
15168 return;
15170 return;
15172 case 'F':
15173 case 'f':
15174 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15175 if (ASSEMBLER_DIALECT == ASM_ATT)
15176 putc ('.', file);
15177 #endif
15179 case 'C':
15180 case 'c':
15181 if (!COMPARISON_P (x))
15183 output_operand_lossage ("operand is not a condition code, "
15184 "invalid operand code '%c'", code);
15185 return;
15187 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15188 code == 'c' || code == 'f',
15189 code == 'F' || code == 'f',
15190 file);
15191 return;
15193 case 'H':
15194 if (!offsettable_memref_p (x))
15196 output_operand_lossage ("operand is not an offsettable memory "
15197 "reference, invalid operand code 'H'");
15198 return;
15200 /* It doesn't actually matter what mode we use here, as we're
15201 only going to use this for printing. */
15202 x = adjust_address_nv (x, DImode, 8);
15203 /* Output 'qword ptr' for intel assembler dialect. */
15204 if (ASSEMBLER_DIALECT == ASM_INTEL)
15205 code = 'q';
15206 break;
15208 case 'K':
15209 gcc_assert (CONST_INT_P (x));
15211 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15212 #ifdef HAVE_AS_IX86_HLE
15213 fputs ("xacquire ", file);
15214 #else
15215 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15216 #endif
15217 else if (INTVAL (x) & IX86_HLE_RELEASE)
15218 #ifdef HAVE_AS_IX86_HLE
15219 fputs ("xrelease ", file);
15220 #else
15221 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15222 #endif
15223 /* We do not want to print value of the operand. */
15224 return;
15226 case 'N':
15227 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15228 fputs ("{z}", file);
15229 return;
15231 case 'r':
15232 gcc_assert (CONST_INT_P (x));
15233 gcc_assert (INTVAL (x) == ROUND_SAE);
15235 if (ASSEMBLER_DIALECT == ASM_INTEL)
15236 fputs (", ", file);
15238 fputs ("{sae}", file);
15240 if (ASSEMBLER_DIALECT == ASM_ATT)
15241 fputs (", ", file);
15243 return;
15245 case 'R':
15246 gcc_assert (CONST_INT_P (x));
15248 if (ASSEMBLER_DIALECT == ASM_INTEL)
15249 fputs (", ", file);
15251 switch (INTVAL (x))
15253 case ROUND_NEAREST_INT | ROUND_SAE:
15254 fputs ("{rn-sae}", file);
15255 break;
15256 case ROUND_NEG_INF | ROUND_SAE:
15257 fputs ("{rd-sae}", file);
15258 break;
15259 case ROUND_POS_INF | ROUND_SAE:
15260 fputs ("{ru-sae}", file);
15261 break;
15262 case ROUND_ZERO | ROUND_SAE:
15263 fputs ("{rz-sae}", file);
15264 break;
15265 default:
15266 gcc_unreachable ();
15269 if (ASSEMBLER_DIALECT == ASM_ATT)
15270 fputs (", ", file);
15272 return;
15274 case '*':
15275 if (ASSEMBLER_DIALECT == ASM_ATT)
15276 putc ('*', file);
15277 return;
15279 case '&':
15281 const char *name = get_some_local_dynamic_name ();
15282 if (name == NULL)
15283 output_operand_lossage ("'%%&' used without any "
15284 "local dynamic TLS references");
15285 else
15286 assemble_name (file, name);
15287 return;
15290 case '+':
15292 rtx x;
15294 if (!optimize
15295 || optimize_function_for_size_p (cfun)
15296 || !TARGET_BRANCH_PREDICTION_HINTS)
15297 return;
15299 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15300 if (x)
15302 int pred_val = XINT (x, 0);
15304 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15305 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15307 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15308 bool cputaken
15309 = final_forward_branch_p (current_output_insn) == 0;
15311 /* Emit hints only in the case default branch prediction
15312 heuristics would fail. */
15313 if (taken != cputaken)
15315 /* We use 3e (DS) prefix for taken branches and
15316 2e (CS) prefix for not taken branches. */
15317 if (taken)
15318 fputs ("ds ; ", file);
15319 else
15320 fputs ("cs ; ", file);
15324 return;
15327 case ';':
15328 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15329 putc (';', file);
15330 #endif
15331 return;
15333 case '@':
15334 if (ASSEMBLER_DIALECT == ASM_ATT)
15335 putc ('%', file);
15337 /* The kernel uses a different segment register for performance
15338 reasons; a system call would not have to trash the userspace
15339 segment register, which would be expensive. */
15340 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15341 fputs ("fs", file);
15342 else
15343 fputs ("gs", file);
15344 return;
15346 case '~':
15347 putc (TARGET_AVX2 ? 'i' : 'f', file);
15348 return;
15350 case '^':
15351 if (TARGET_64BIT && Pmode != word_mode)
15352 fputs ("addr32 ", file);
15353 return;
15355 default:
15356 output_operand_lossage ("invalid operand code '%c'", code);
15360 if (REG_P (x))
15361 print_reg (x, code, file);
15363 else if (MEM_P (x))
15365 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15366 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15367 && GET_MODE (x) != BLKmode)
15369 const char * size;
15370 switch (GET_MODE_SIZE (GET_MODE (x)))
15372 case 1: size = "BYTE"; break;
15373 case 2: size = "WORD"; break;
15374 case 4: size = "DWORD"; break;
15375 case 8: size = "QWORD"; break;
15376 case 12: size = "TBYTE"; break;
15377 case 16:
15378 if (GET_MODE (x) == XFmode)
15379 size = "TBYTE";
15380 else
15381 size = "XMMWORD";
15382 break;
15383 case 32: size = "YMMWORD"; break;
15384 case 64: size = "ZMMWORD"; break;
15385 default:
15386 gcc_unreachable ();
15389 /* Check for explicit size override (codes 'b', 'w', 'k',
15390 'q' and 'x') */
15391 if (code == 'b')
15392 size = "BYTE";
15393 else if (code == 'w')
15394 size = "WORD";
15395 else if (code == 'k')
15396 size = "DWORD";
15397 else if (code == 'q')
15398 size = "QWORD";
15399 else if (code == 'x')
15400 size = "XMMWORD";
15402 fputs (size, file);
15403 fputs (" PTR ", file);
15406 x = XEXP (x, 0);
15407 /* Avoid (%rip) for call operands. */
15408 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15409 && !CONST_INT_P (x))
15410 output_addr_const (file, x);
15411 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15412 output_operand_lossage ("invalid constraints for operand");
15413 else
15414 output_address (x);
15417 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15419 REAL_VALUE_TYPE r;
15420 long l;
15422 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15423 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15425 if (ASSEMBLER_DIALECT == ASM_ATT)
15426 putc ('$', file);
15427 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15428 if (code == 'q')
15429 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15430 (unsigned long long) (int) l);
15431 else
15432 fprintf (file, "0x%08x", (unsigned int) l);
15435 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15437 REAL_VALUE_TYPE r;
15438 long l[2];
15440 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15441 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15443 if (ASSEMBLER_DIALECT == ASM_ATT)
15444 putc ('$', file);
15445 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15448 /* These float cases don't actually occur as immediate operands. */
15449 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15451 char dstr[30];
15453 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15454 fputs (dstr, file);
15457 else
15459 /* We have patterns that allow zero sets of memory, for instance.
15460 In 64-bit mode, we should probably support all 8-byte vectors,
15461 since we can in fact encode that into an immediate. */
15462 if (GET_CODE (x) == CONST_VECTOR)
15464 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15465 x = const0_rtx;
15468 if (code != 'P' && code != 'p')
15470 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15472 if (ASSEMBLER_DIALECT == ASM_ATT)
15473 putc ('$', file);
15475 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15476 || GET_CODE (x) == LABEL_REF)
15478 if (ASSEMBLER_DIALECT == ASM_ATT)
15479 putc ('$', file);
15480 else
15481 fputs ("OFFSET FLAT:", file);
15484 if (CONST_INT_P (x))
15485 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15486 else if (flag_pic || MACHOPIC_INDIRECT)
15487 output_pic_addr_const (file, x, code);
15488 else
15489 output_addr_const (file, x);
15493 static bool
15494 ix86_print_operand_punct_valid_p (unsigned char code)
15496 return (code == '@' || code == '*' || code == '+' || code == '&'
15497 || code == ';' || code == '~' || code == '^');
15500 /* Print a memory operand whose address is ADDR. */
15502 static void
15503 ix86_print_operand_address (FILE *file, rtx addr)
15505 struct ix86_address parts;
15506 rtx base, index, disp;
15507 int scale;
15508 int ok;
15509 bool vsib = false;
15510 int code = 0;
15512 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15514 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15515 gcc_assert (parts.index == NULL_RTX);
15516 parts.index = XVECEXP (addr, 0, 1);
15517 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15518 addr = XVECEXP (addr, 0, 0);
15519 vsib = true;
15521 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15523 gcc_assert (TARGET_64BIT);
15524 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15525 code = 'q';
15527 else
15528 ok = ix86_decompose_address (addr, &parts);
15530 gcc_assert (ok);
15532 base = parts.base;
15533 index = parts.index;
15534 disp = parts.disp;
15535 scale = parts.scale;
15537 switch (parts.seg)
15539 case SEG_DEFAULT:
15540 break;
15541 case SEG_FS:
15542 case SEG_GS:
15543 if (ASSEMBLER_DIALECT == ASM_ATT)
15544 putc ('%', file);
15545 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15546 break;
15547 default:
15548 gcc_unreachable ();
15551 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15552 if (TARGET_64BIT && !base && !index)
15554 rtx symbol = disp;
15556 if (GET_CODE (disp) == CONST
15557 && GET_CODE (XEXP (disp, 0)) == PLUS
15558 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15559 symbol = XEXP (XEXP (disp, 0), 0);
15561 if (GET_CODE (symbol) == LABEL_REF
15562 || (GET_CODE (symbol) == SYMBOL_REF
15563 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15564 base = pc_rtx;
15566 if (!base && !index)
15568 /* Displacement only requires special attention. */
15570 if (CONST_INT_P (disp))
15572 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15573 fputs ("ds:", file);
15574 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15576 else if (flag_pic)
15577 output_pic_addr_const (file, disp, 0);
15578 else
15579 output_addr_const (file, disp);
15581 else
15583 /* Print SImode register names to force addr32 prefix. */
15584 if (SImode_address_operand (addr, VOIDmode))
15586 #ifdef ENABLE_CHECKING
15587 gcc_assert (TARGET_64BIT);
15588 switch (GET_CODE (addr))
15590 case SUBREG:
15591 gcc_assert (GET_MODE (addr) == SImode);
15592 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15593 break;
15594 case ZERO_EXTEND:
15595 case AND:
15596 gcc_assert (GET_MODE (addr) == DImode);
15597 break;
15598 default:
15599 gcc_unreachable ();
15601 #endif
15602 gcc_assert (!code);
15603 code = 'k';
15605 else if (code == 0
15606 && TARGET_X32
15607 && disp
15608 && CONST_INT_P (disp)
15609 && INTVAL (disp) < -16*1024*1024)
15611 /* X32 runs in 64-bit mode, where displacement, DISP, in
15612 address DISP(%r64), is encoded as 32-bit immediate sign-
15613 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15614 address is %r64 + 0xffffffffbffffd00. When %r64 <
15615 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15616 which is invalid for x32. The correct address is %r64
15617 - 0x40000300 == 0xf7ffdd64. To properly encode
15618 -0x40000300(%r64) for x32, we zero-extend negative
15619 displacement by forcing addr32 prefix which truncates
15620 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15621 zero-extend all negative displacements, including -1(%rsp).
15622 However, for small negative displacements, sign-extension
15623 won't cause overflow. We only zero-extend negative
15624 displacements if they < -16*1024*1024, which is also used
15625 to check legitimate address displacements for PIC. */
15626 code = 'k';
15629 if (ASSEMBLER_DIALECT == ASM_ATT)
15631 if (disp)
15633 if (flag_pic)
15634 output_pic_addr_const (file, disp, 0);
15635 else if (GET_CODE (disp) == LABEL_REF)
15636 output_asm_label (disp);
15637 else
15638 output_addr_const (file, disp);
15641 putc ('(', file);
15642 if (base)
15643 print_reg (base, code, file);
15644 if (index)
15646 putc (',', file);
15647 print_reg (index, vsib ? 0 : code, file);
15648 if (scale != 1 || vsib)
15649 fprintf (file, ",%d", scale);
15651 putc (')', file);
15653 else
15655 rtx offset = NULL_RTX;
15657 if (disp)
15659 /* Pull out the offset of a symbol; print any symbol itself. */
15660 if (GET_CODE (disp) == CONST
15661 && GET_CODE (XEXP (disp, 0)) == PLUS
15662 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15664 offset = XEXP (XEXP (disp, 0), 1);
15665 disp = gen_rtx_CONST (VOIDmode,
15666 XEXP (XEXP (disp, 0), 0));
15669 if (flag_pic)
15670 output_pic_addr_const (file, disp, 0);
15671 else if (GET_CODE (disp) == LABEL_REF)
15672 output_asm_label (disp);
15673 else if (CONST_INT_P (disp))
15674 offset = disp;
15675 else
15676 output_addr_const (file, disp);
15679 putc ('[', file);
15680 if (base)
15682 print_reg (base, code, file);
15683 if (offset)
15685 if (INTVAL (offset) >= 0)
15686 putc ('+', file);
15687 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15690 else if (offset)
15691 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15692 else
15693 putc ('0', file);
15695 if (index)
15697 putc ('+', file);
15698 print_reg (index, vsib ? 0 : code, file);
15699 if (scale != 1 || vsib)
15700 fprintf (file, "*%d", scale);
15702 putc (']', file);
15707 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15709 static bool
15710 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15712 rtx op;
15714 if (GET_CODE (x) != UNSPEC)
15715 return false;
15717 op = XVECEXP (x, 0, 0);
15718 switch (XINT (x, 1))
15720 case UNSPEC_GOTTPOFF:
15721 output_addr_const (file, op);
15722 /* FIXME: This might be @TPOFF in Sun ld. */
15723 fputs ("@gottpoff", file);
15724 break;
15725 case UNSPEC_TPOFF:
15726 output_addr_const (file, op);
15727 fputs ("@tpoff", file);
15728 break;
15729 case UNSPEC_NTPOFF:
15730 output_addr_const (file, op);
15731 if (TARGET_64BIT)
15732 fputs ("@tpoff", file);
15733 else
15734 fputs ("@ntpoff", file);
15735 break;
15736 case UNSPEC_DTPOFF:
15737 output_addr_const (file, op);
15738 fputs ("@dtpoff", file);
15739 break;
15740 case UNSPEC_GOTNTPOFF:
15741 output_addr_const (file, op);
15742 if (TARGET_64BIT)
15743 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15744 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15745 else
15746 fputs ("@gotntpoff", file);
15747 break;
15748 case UNSPEC_INDNTPOFF:
15749 output_addr_const (file, op);
15750 fputs ("@indntpoff", file);
15751 break;
15752 #if TARGET_MACHO
15753 case UNSPEC_MACHOPIC_OFFSET:
15754 output_addr_const (file, op);
15755 putc ('-', file);
15756 machopic_output_function_base_name (file);
15757 break;
15758 #endif
15760 case UNSPEC_STACK_CHECK:
15762 int offset;
15764 gcc_assert (flag_split_stack);
15766 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15767 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15768 #else
15769 gcc_unreachable ();
15770 #endif
15772 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15774 break;
15776 default:
15777 return false;
15780 return true;
15783 /* Split one or more double-mode RTL references into pairs of half-mode
15784 references. The RTL can be REG, offsettable MEM, integer constant, or
15785 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15786 split and "num" is its length. lo_half and hi_half are output arrays
15787 that parallel "operands". */
15789 void
15790 split_double_mode (enum machine_mode mode, rtx operands[],
15791 int num, rtx lo_half[], rtx hi_half[])
15793 enum machine_mode half_mode;
15794 unsigned int byte;
15796 switch (mode)
15798 case TImode:
15799 half_mode = DImode;
15800 break;
15801 case DImode:
15802 half_mode = SImode;
15803 break;
15804 default:
15805 gcc_unreachable ();
15808 byte = GET_MODE_SIZE (half_mode);
15810 while (num--)
15812 rtx op = operands[num];
15814 /* simplify_subreg refuse to split volatile memory addresses,
15815 but we still have to handle it. */
15816 if (MEM_P (op))
15818 lo_half[num] = adjust_address (op, half_mode, 0);
15819 hi_half[num] = adjust_address (op, half_mode, byte);
15821 else
15823 lo_half[num] = simplify_gen_subreg (half_mode, op,
15824 GET_MODE (op) == VOIDmode
15825 ? mode : GET_MODE (op), 0);
15826 hi_half[num] = simplify_gen_subreg (half_mode, op,
15827 GET_MODE (op) == VOIDmode
15828 ? mode : GET_MODE (op), byte);
15833 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15834 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15835 is the expression of the binary operation. The output may either be
15836 emitted here, or returned to the caller, like all output_* functions.
15838 There is no guarantee that the operands are the same mode, as they
15839 might be within FLOAT or FLOAT_EXTEND expressions. */
15841 #ifndef SYSV386_COMPAT
15842 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15843 wants to fix the assemblers because that causes incompatibility
15844 with gcc. No-one wants to fix gcc because that causes
15845 incompatibility with assemblers... You can use the option of
15846 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15847 #define SYSV386_COMPAT 1
15848 #endif
15850 const char *
15851 output_387_binary_op (rtx insn, rtx *operands)
15853 static char buf[40];
15854 const char *p;
15855 const char *ssep;
15856 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15858 #ifdef ENABLE_CHECKING
15859 /* Even if we do not want to check the inputs, this documents input
15860 constraints. Which helps in understanding the following code. */
15861 if (STACK_REG_P (operands[0])
15862 && ((REG_P (operands[1])
15863 && REGNO (operands[0]) == REGNO (operands[1])
15864 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15865 || (REG_P (operands[2])
15866 && REGNO (operands[0]) == REGNO (operands[2])
15867 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15868 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15869 ; /* ok */
15870 else
15871 gcc_assert (is_sse);
15872 #endif
15874 switch (GET_CODE (operands[3]))
15876 case PLUS:
15877 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15878 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15879 p = "fiadd";
15880 else
15881 p = "fadd";
15882 ssep = "vadd";
15883 break;
15885 case MINUS:
15886 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15887 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15888 p = "fisub";
15889 else
15890 p = "fsub";
15891 ssep = "vsub";
15892 break;
15894 case MULT:
15895 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15896 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15897 p = "fimul";
15898 else
15899 p = "fmul";
15900 ssep = "vmul";
15901 break;
15903 case DIV:
15904 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15905 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15906 p = "fidiv";
15907 else
15908 p = "fdiv";
15909 ssep = "vdiv";
15910 break;
15912 default:
15913 gcc_unreachable ();
15916 if (is_sse)
15918 if (TARGET_AVX)
15920 strcpy (buf, ssep);
15921 if (GET_MODE (operands[0]) == SFmode)
15922 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15923 else
15924 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15926 else
15928 strcpy (buf, ssep + 1);
15929 if (GET_MODE (operands[0]) == SFmode)
15930 strcat (buf, "ss\t{%2, %0|%0, %2}");
15931 else
15932 strcat (buf, "sd\t{%2, %0|%0, %2}");
15934 return buf;
15936 strcpy (buf, p);
15938 switch (GET_CODE (operands[3]))
15940 case MULT:
15941 case PLUS:
15942 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15944 rtx temp = operands[2];
15945 operands[2] = operands[1];
15946 operands[1] = temp;
15949 /* know operands[0] == operands[1]. */
15951 if (MEM_P (operands[2]))
15953 p = "%Z2\t%2";
15954 break;
15957 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15959 if (STACK_TOP_P (operands[0]))
15960 /* How is it that we are storing to a dead operand[2]?
15961 Well, presumably operands[1] is dead too. We can't
15962 store the result to st(0) as st(0) gets popped on this
15963 instruction. Instead store to operands[2] (which I
15964 think has to be st(1)). st(1) will be popped later.
15965 gcc <= 2.8.1 didn't have this check and generated
15966 assembly code that the Unixware assembler rejected. */
15967 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15968 else
15969 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15970 break;
15973 if (STACK_TOP_P (operands[0]))
15974 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15975 else
15976 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15977 break;
15979 case MINUS:
15980 case DIV:
15981 if (MEM_P (operands[1]))
15983 p = "r%Z1\t%1";
15984 break;
15987 if (MEM_P (operands[2]))
15989 p = "%Z2\t%2";
15990 break;
15993 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15995 #if SYSV386_COMPAT
15996 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15997 derived assemblers, confusingly reverse the direction of
15998 the operation for fsub{r} and fdiv{r} when the
15999 destination register is not st(0). The Intel assembler
16000 doesn't have this brain damage. Read !SYSV386_COMPAT to
16001 figure out what the hardware really does. */
16002 if (STACK_TOP_P (operands[0]))
16003 p = "{p\t%0, %2|rp\t%2, %0}";
16004 else
16005 p = "{rp\t%2, %0|p\t%0, %2}";
16006 #else
16007 if (STACK_TOP_P (operands[0]))
16008 /* As above for fmul/fadd, we can't store to st(0). */
16009 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16010 else
16011 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16012 #endif
16013 break;
16016 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16018 #if SYSV386_COMPAT
16019 if (STACK_TOP_P (operands[0]))
16020 p = "{rp\t%0, %1|p\t%1, %0}";
16021 else
16022 p = "{p\t%1, %0|rp\t%0, %1}";
16023 #else
16024 if (STACK_TOP_P (operands[0]))
16025 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16026 else
16027 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16028 #endif
16029 break;
16032 if (STACK_TOP_P (operands[0]))
16034 if (STACK_TOP_P (operands[1]))
16035 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16036 else
16037 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16038 break;
16040 else if (STACK_TOP_P (operands[1]))
16042 #if SYSV386_COMPAT
16043 p = "{\t%1, %0|r\t%0, %1}";
16044 #else
16045 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16046 #endif
16048 else
16050 #if SYSV386_COMPAT
16051 p = "{r\t%2, %0|\t%0, %2}";
16052 #else
16053 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16054 #endif
16056 break;
16058 default:
16059 gcc_unreachable ();
16062 strcat (buf, p);
16063 return buf;
16066 /* Check if a 256bit AVX register is referenced inside of EXP. */
16068 static int
16069 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16071 rtx exp = *pexp;
16073 if (GET_CODE (exp) == SUBREG)
16074 exp = SUBREG_REG (exp);
16076 if (REG_P (exp)
16077 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16078 return 1;
16080 return 0;
16083 /* Return needed mode for entity in optimize_mode_switching pass. */
16085 static int
16086 ix86_avx_u128_mode_needed (rtx insn)
16088 if (CALL_P (insn))
16090 rtx link;
16092 /* Needed mode is set to AVX_U128_CLEAN if there are
16093 no 256bit modes used in function arguments. */
16094 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16095 link;
16096 link = XEXP (link, 1))
16098 if (GET_CODE (XEXP (link, 0)) == USE)
16100 rtx arg = XEXP (XEXP (link, 0), 0);
16102 if (ix86_check_avx256_register (&arg, NULL))
16103 return AVX_U128_DIRTY;
16107 return AVX_U128_CLEAN;
16110 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16111 changes state only when a 256bit register is written to, but we need
16112 to prevent the compiler from moving optimal insertion point above
16113 eventual read from 256bit register. */
16114 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16115 return AVX_U128_DIRTY;
16117 return AVX_U128_ANY;
16120 /* Return mode that i387 must be switched into
16121 prior to the execution of insn. */
16123 static int
16124 ix86_i387_mode_needed (int entity, rtx insn)
16126 enum attr_i387_cw mode;
16128 /* The mode UNINITIALIZED is used to store control word after a
16129 function call or ASM pattern. The mode ANY specify that function
16130 has no requirements on the control word and make no changes in the
16131 bits we are interested in. */
16133 if (CALL_P (insn)
16134 || (NONJUMP_INSN_P (insn)
16135 && (asm_noperands (PATTERN (insn)) >= 0
16136 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16137 return I387_CW_UNINITIALIZED;
16139 if (recog_memoized (insn) < 0)
16140 return I387_CW_ANY;
16142 mode = get_attr_i387_cw (insn);
16144 switch (entity)
16146 case I387_TRUNC:
16147 if (mode == I387_CW_TRUNC)
16148 return mode;
16149 break;
16151 case I387_FLOOR:
16152 if (mode == I387_CW_FLOOR)
16153 return mode;
16154 break;
16156 case I387_CEIL:
16157 if (mode == I387_CW_CEIL)
16158 return mode;
16159 break;
16161 case I387_MASK_PM:
16162 if (mode == I387_CW_MASK_PM)
16163 return mode;
16164 break;
16166 default:
16167 gcc_unreachable ();
16170 return I387_CW_ANY;
16173 /* Return mode that entity must be switched into
16174 prior to the execution of insn. */
16176 static int
16177 ix86_mode_needed (int entity, rtx insn)
16179 switch (entity)
16181 case AVX_U128:
16182 return ix86_avx_u128_mode_needed (insn);
16183 case I387_TRUNC:
16184 case I387_FLOOR:
16185 case I387_CEIL:
16186 case I387_MASK_PM:
16187 return ix86_i387_mode_needed (entity, insn);
16188 default:
16189 gcc_unreachable ();
16191 return 0;
16194 /* Check if a 256bit AVX register is referenced in stores. */
16196 static void
16197 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16199 if (ix86_check_avx256_register (&dest, NULL))
16201 bool *used = (bool *) data;
16202 *used = true;
16206 /* Calculate mode of upper 128bit AVX registers after the insn. */
16208 static int
16209 ix86_avx_u128_mode_after (int mode, rtx insn)
16211 rtx pat = PATTERN (insn);
16213 if (vzeroupper_operation (pat, VOIDmode)
16214 || vzeroall_operation (pat, VOIDmode))
16215 return AVX_U128_CLEAN;
16217 /* We know that state is clean after CALL insn if there are no
16218 256bit registers used in the function return register. */
16219 if (CALL_P (insn))
16221 bool avx_reg256_found = false;
16222 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16224 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16227 /* Otherwise, return current mode. Remember that if insn
16228 references AVX 256bit registers, the mode was already changed
16229 to DIRTY from MODE_NEEDED. */
16230 return mode;
16233 /* Return the mode that an insn results in. */
16236 ix86_mode_after (int entity, int mode, rtx insn)
16238 switch (entity)
16240 case AVX_U128:
16241 return ix86_avx_u128_mode_after (mode, insn);
16242 case I387_TRUNC:
16243 case I387_FLOOR:
16244 case I387_CEIL:
16245 case I387_MASK_PM:
16246 return mode;
16247 default:
16248 gcc_unreachable ();
16252 static int
16253 ix86_avx_u128_mode_entry (void)
16255 tree arg;
16257 /* Entry mode is set to AVX_U128_DIRTY if there are
16258 256bit modes used in function arguments. */
16259 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16260 arg = TREE_CHAIN (arg))
16262 rtx incoming = DECL_INCOMING_RTL (arg);
16264 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16265 return AVX_U128_DIRTY;
16268 return AVX_U128_CLEAN;
16271 /* Return a mode that ENTITY is assumed to be
16272 switched to at function entry. */
16274 static int
16275 ix86_mode_entry (int entity)
16277 switch (entity)
16279 case AVX_U128:
16280 return ix86_avx_u128_mode_entry ();
16281 case I387_TRUNC:
16282 case I387_FLOOR:
16283 case I387_CEIL:
16284 case I387_MASK_PM:
16285 return I387_CW_ANY;
16286 default:
16287 gcc_unreachable ();
16291 static int
16292 ix86_avx_u128_mode_exit (void)
16294 rtx reg = crtl->return_rtx;
16296 /* Exit mode is set to AVX_U128_DIRTY if there are
16297 256bit modes used in the function return register. */
16298 if (reg && ix86_check_avx256_register (&reg, NULL))
16299 return AVX_U128_DIRTY;
16301 return AVX_U128_CLEAN;
16304 /* Return a mode that ENTITY is assumed to be
16305 switched to at function exit. */
16307 static int
16308 ix86_mode_exit (int entity)
16310 switch (entity)
16312 case AVX_U128:
16313 return ix86_avx_u128_mode_exit ();
16314 case I387_TRUNC:
16315 case I387_FLOOR:
16316 case I387_CEIL:
16317 case I387_MASK_PM:
16318 return I387_CW_ANY;
16319 default:
16320 gcc_unreachable ();
16324 static int
16325 ix86_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
16327 return n;
16330 /* Output code to initialize control word copies used by trunc?f?i and
16331 rounding patterns. CURRENT_MODE is set to current control word,
16332 while NEW_MODE is set to new control word. */
16334 static void
16335 emit_i387_cw_initialization (int mode)
16337 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16338 rtx new_mode;
16340 enum ix86_stack_slot slot;
16342 rtx reg = gen_reg_rtx (HImode);
16344 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16345 emit_move_insn (reg, copy_rtx (stored_mode));
16347 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16348 || optimize_insn_for_size_p ())
16350 switch (mode)
16352 case I387_CW_TRUNC:
16353 /* round toward zero (truncate) */
16354 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16355 slot = SLOT_CW_TRUNC;
16356 break;
16358 case I387_CW_FLOOR:
16359 /* round down toward -oo */
16360 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16361 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16362 slot = SLOT_CW_FLOOR;
16363 break;
16365 case I387_CW_CEIL:
16366 /* round up toward +oo */
16367 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16368 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16369 slot = SLOT_CW_CEIL;
16370 break;
16372 case I387_CW_MASK_PM:
16373 /* mask precision exception for nearbyint() */
16374 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16375 slot = SLOT_CW_MASK_PM;
16376 break;
16378 default:
16379 gcc_unreachable ();
16382 else
16384 switch (mode)
16386 case I387_CW_TRUNC:
16387 /* round toward zero (truncate) */
16388 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16389 slot = SLOT_CW_TRUNC;
16390 break;
16392 case I387_CW_FLOOR:
16393 /* round down toward -oo */
16394 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16395 slot = SLOT_CW_FLOOR;
16396 break;
16398 case I387_CW_CEIL:
16399 /* round up toward +oo */
16400 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16401 slot = SLOT_CW_CEIL;
16402 break;
16404 case I387_CW_MASK_PM:
16405 /* mask precision exception for nearbyint() */
16406 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16407 slot = SLOT_CW_MASK_PM;
16408 break;
16410 default:
16411 gcc_unreachable ();
16415 gcc_assert (slot < MAX_386_STACK_LOCALS);
16417 new_mode = assign_386_stack_local (HImode, slot);
16418 emit_move_insn (new_mode, reg);
16421 /* Emit vzeroupper. */
16423 void
16424 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16426 int i;
16428 /* Cancel automatic vzeroupper insertion if there are
16429 live call-saved SSE registers at the insertion point. */
16431 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16432 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16433 return;
16435 if (TARGET_64BIT)
16436 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16437 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16438 return;
16440 emit_insn (gen_avx_vzeroupper ());
16443 /* Generate one or more insns to set ENTITY to MODE. */
16445 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16446 is the set of hard registers live at the point where the insn(s)
16447 are to be inserted. */
16449 static void
16450 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16452 switch (entity)
16454 case AVX_U128:
16455 if (mode == AVX_U128_CLEAN)
16456 ix86_avx_emit_vzeroupper (regs_live);
16457 break;
16458 case I387_TRUNC:
16459 case I387_FLOOR:
16460 case I387_CEIL:
16461 case I387_MASK_PM:
16462 if (mode != I387_CW_ANY
16463 && mode != I387_CW_UNINITIALIZED)
16464 emit_i387_cw_initialization (mode);
16465 break;
16466 default:
16467 gcc_unreachable ();
16471 /* Output code for INSN to convert a float to a signed int. OPERANDS
16472 are the insn operands. The output may be [HSD]Imode and the input
16473 operand may be [SDX]Fmode. */
16475 const char *
16476 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16478 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16479 int dimode_p = GET_MODE (operands[0]) == DImode;
16480 int round_mode = get_attr_i387_cw (insn);
16482 /* Jump through a hoop or two for DImode, since the hardware has no
16483 non-popping instruction. We used to do this a different way, but
16484 that was somewhat fragile and broke with post-reload splitters. */
16485 if ((dimode_p || fisttp) && !stack_top_dies)
16486 output_asm_insn ("fld\t%y1", operands);
16488 gcc_assert (STACK_TOP_P (operands[1]));
16489 gcc_assert (MEM_P (operands[0]));
16490 gcc_assert (GET_MODE (operands[1]) != TFmode);
16492 if (fisttp)
16493 output_asm_insn ("fisttp%Z0\t%0", operands);
16494 else
16496 if (round_mode != I387_CW_ANY)
16497 output_asm_insn ("fldcw\t%3", operands);
16498 if (stack_top_dies || dimode_p)
16499 output_asm_insn ("fistp%Z0\t%0", operands);
16500 else
16501 output_asm_insn ("fist%Z0\t%0", operands);
16502 if (round_mode != I387_CW_ANY)
16503 output_asm_insn ("fldcw\t%2", operands);
16506 return "";
16509 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16510 have the values zero or one, indicates the ffreep insn's operand
16511 from the OPERANDS array. */
16513 static const char *
16514 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16516 if (TARGET_USE_FFREEP)
16517 #ifdef HAVE_AS_IX86_FFREEP
16518 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16519 #else
16521 static char retval[32];
16522 int regno = REGNO (operands[opno]);
16524 gcc_assert (STACK_REGNO_P (regno));
16526 regno -= FIRST_STACK_REG;
16528 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16529 return retval;
16531 #endif
16533 return opno ? "fstp\t%y1" : "fstp\t%y0";
16537 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16538 should be used. UNORDERED_P is true when fucom should be used. */
16540 const char *
16541 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16543 int stack_top_dies;
16544 rtx cmp_op0, cmp_op1;
16545 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16547 if (eflags_p)
16549 cmp_op0 = operands[0];
16550 cmp_op1 = operands[1];
16552 else
16554 cmp_op0 = operands[1];
16555 cmp_op1 = operands[2];
16558 if (is_sse)
16560 if (GET_MODE (operands[0]) == SFmode)
16561 if (unordered_p)
16562 return "%vucomiss\t{%1, %0|%0, %1}";
16563 else
16564 return "%vcomiss\t{%1, %0|%0, %1}";
16565 else
16566 if (unordered_p)
16567 return "%vucomisd\t{%1, %0|%0, %1}";
16568 else
16569 return "%vcomisd\t{%1, %0|%0, %1}";
16572 gcc_assert (STACK_TOP_P (cmp_op0));
16574 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16576 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16578 if (stack_top_dies)
16580 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16581 return output_387_ffreep (operands, 1);
16583 else
16584 return "ftst\n\tfnstsw\t%0";
16587 if (STACK_REG_P (cmp_op1)
16588 && stack_top_dies
16589 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16590 && REGNO (cmp_op1) != FIRST_STACK_REG)
16592 /* If both the top of the 387 stack dies, and the other operand
16593 is also a stack register that dies, then this must be a
16594 `fcompp' float compare */
16596 if (eflags_p)
16598 /* There is no double popping fcomi variant. Fortunately,
16599 eflags is immune from the fstp's cc clobbering. */
16600 if (unordered_p)
16601 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16602 else
16603 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16604 return output_387_ffreep (operands, 0);
16606 else
16608 if (unordered_p)
16609 return "fucompp\n\tfnstsw\t%0";
16610 else
16611 return "fcompp\n\tfnstsw\t%0";
16614 else
16616 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16618 static const char * const alt[16] =
16620 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16621 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16622 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16623 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16625 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16626 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16627 NULL,
16628 NULL,
16630 "fcomi\t{%y1, %0|%0, %y1}",
16631 "fcomip\t{%y1, %0|%0, %y1}",
16632 "fucomi\t{%y1, %0|%0, %y1}",
16633 "fucomip\t{%y1, %0|%0, %y1}",
16635 NULL,
16636 NULL,
16637 NULL,
16638 NULL
16641 int mask;
16642 const char *ret;
16644 mask = eflags_p << 3;
16645 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16646 mask |= unordered_p << 1;
16647 mask |= stack_top_dies;
16649 gcc_assert (mask < 16);
16650 ret = alt[mask];
16651 gcc_assert (ret);
16653 return ret;
16657 void
16658 ix86_output_addr_vec_elt (FILE *file, int value)
16660 const char *directive = ASM_LONG;
16662 #ifdef ASM_QUAD
16663 if (TARGET_LP64)
16664 directive = ASM_QUAD;
16665 #else
16666 gcc_assert (!TARGET_64BIT);
16667 #endif
16669 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16672 void
16673 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16675 const char *directive = ASM_LONG;
16677 #ifdef ASM_QUAD
16678 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16679 directive = ASM_QUAD;
16680 #else
16681 gcc_assert (!TARGET_64BIT);
16682 #endif
16683 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16684 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16685 fprintf (file, "%s%s%d-%s%d\n",
16686 directive, LPREFIX, value, LPREFIX, rel);
16687 else if (HAVE_AS_GOTOFF_IN_DATA)
16688 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16689 #if TARGET_MACHO
16690 else if (TARGET_MACHO)
16692 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16693 machopic_output_function_base_name (file);
16694 putc ('\n', file);
16696 #endif
16697 else
16698 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16699 GOT_SYMBOL_NAME, LPREFIX, value);
16702 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16703 for the target. */
16705 void
16706 ix86_expand_clear (rtx dest)
16708 rtx tmp;
16710 /* We play register width games, which are only valid after reload. */
16711 gcc_assert (reload_completed);
16713 /* Avoid HImode and its attendant prefix byte. */
16714 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16715 dest = gen_rtx_REG (SImode, REGNO (dest));
16716 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16718 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16720 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16721 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16724 emit_insn (tmp);
16727 /* X is an unchanging MEM. If it is a constant pool reference, return
16728 the constant pool rtx, else NULL. */
16731 maybe_get_pool_constant (rtx x)
16733 x = ix86_delegitimize_address (XEXP (x, 0));
16735 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16736 return get_pool_constant (x);
16738 return NULL_RTX;
16741 void
16742 ix86_expand_move (enum machine_mode mode, rtx operands[])
16744 rtx op0, op1;
16745 enum tls_model model;
16747 op0 = operands[0];
16748 op1 = operands[1];
16750 if (GET_CODE (op1) == SYMBOL_REF)
16752 rtx tmp;
16754 model = SYMBOL_REF_TLS_MODEL (op1);
16755 if (model)
16757 op1 = legitimize_tls_address (op1, model, true);
16758 op1 = force_operand (op1, op0);
16759 if (op1 == op0)
16760 return;
16761 op1 = convert_to_mode (mode, op1, 1);
16763 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16764 op1 = tmp;
16766 else if (GET_CODE (op1) == CONST
16767 && GET_CODE (XEXP (op1, 0)) == PLUS
16768 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16770 rtx addend = XEXP (XEXP (op1, 0), 1);
16771 rtx symbol = XEXP (XEXP (op1, 0), 0);
16772 rtx tmp;
16774 model = SYMBOL_REF_TLS_MODEL (symbol);
16775 if (model)
16776 tmp = legitimize_tls_address (symbol, model, true);
16777 else
16778 tmp = legitimize_pe_coff_symbol (symbol, true);
16780 if (tmp)
16782 tmp = force_operand (tmp, NULL);
16783 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16784 op0, 1, OPTAB_DIRECT);
16785 if (tmp == op0)
16786 return;
16787 op1 = convert_to_mode (mode, tmp, 1);
16791 if ((flag_pic || MACHOPIC_INDIRECT)
16792 && symbolic_operand (op1, mode))
16794 if (TARGET_MACHO && !TARGET_64BIT)
16796 #if TARGET_MACHO
16797 /* dynamic-no-pic */
16798 if (MACHOPIC_INDIRECT)
16800 rtx temp = ((reload_in_progress
16801 || ((op0 && REG_P (op0))
16802 && mode == Pmode))
16803 ? op0 : gen_reg_rtx (Pmode));
16804 op1 = machopic_indirect_data_reference (op1, temp);
16805 if (MACHOPIC_PURE)
16806 op1 = machopic_legitimize_pic_address (op1, mode,
16807 temp == op1 ? 0 : temp);
16809 if (op0 != op1 && GET_CODE (op0) != MEM)
16811 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16812 emit_insn (insn);
16813 return;
16815 if (GET_CODE (op0) == MEM)
16816 op1 = force_reg (Pmode, op1);
16817 else
16819 rtx temp = op0;
16820 if (GET_CODE (temp) != REG)
16821 temp = gen_reg_rtx (Pmode);
16822 temp = legitimize_pic_address (op1, temp);
16823 if (temp == op0)
16824 return;
16825 op1 = temp;
16827 /* dynamic-no-pic */
16828 #endif
16830 else
16832 if (MEM_P (op0))
16833 op1 = force_reg (mode, op1);
16834 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16836 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16837 op1 = legitimize_pic_address (op1, reg);
16838 if (op0 == op1)
16839 return;
16840 op1 = convert_to_mode (mode, op1, 1);
16844 else
16846 if (MEM_P (op0)
16847 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16848 || !push_operand (op0, mode))
16849 && MEM_P (op1))
16850 op1 = force_reg (mode, op1);
16852 if (push_operand (op0, mode)
16853 && ! general_no_elim_operand (op1, mode))
16854 op1 = copy_to_mode_reg (mode, op1);
16856 /* Force large constants in 64bit compilation into register
16857 to get them CSEed. */
16858 if (can_create_pseudo_p ()
16859 && (mode == DImode) && TARGET_64BIT
16860 && immediate_operand (op1, mode)
16861 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16862 && !register_operand (op0, mode)
16863 && optimize)
16864 op1 = copy_to_mode_reg (mode, op1);
16866 if (can_create_pseudo_p ()
16867 && FLOAT_MODE_P (mode)
16868 && GET_CODE (op1) == CONST_DOUBLE)
16870 /* If we are loading a floating point constant to a register,
16871 force the value to memory now, since we'll get better code
16872 out the back end. */
16874 op1 = validize_mem (force_const_mem (mode, op1));
16875 if (!register_operand (op0, mode))
16877 rtx temp = gen_reg_rtx (mode);
16878 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16879 emit_move_insn (op0, temp);
16880 return;
16885 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16888 void
16889 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16891 rtx op0 = operands[0], op1 = operands[1];
16892 unsigned int align = GET_MODE_ALIGNMENT (mode);
16894 if (push_operand (op0, VOIDmode))
16895 op0 = emit_move_resolve_push (mode, op0);
16897 /* Force constants other than zero into memory. We do not know how
16898 the instructions used to build constants modify the upper 64 bits
16899 of the register, once we have that information we may be able
16900 to handle some of them more efficiently. */
16901 if (can_create_pseudo_p ()
16902 && register_operand (op0, mode)
16903 && (CONSTANT_P (op1)
16904 || (GET_CODE (op1) == SUBREG
16905 && CONSTANT_P (SUBREG_REG (op1))))
16906 && !standard_sse_constant_p (op1))
16907 op1 = validize_mem (force_const_mem (mode, op1));
16909 /* We need to check memory alignment for SSE mode since attribute
16910 can make operands unaligned. */
16911 if (can_create_pseudo_p ()
16912 && SSE_REG_MODE_P (mode)
16913 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16914 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16916 rtx tmp[2];
16918 /* ix86_expand_vector_move_misalign() does not like constants ... */
16919 if (CONSTANT_P (op1)
16920 || (GET_CODE (op1) == SUBREG
16921 && CONSTANT_P (SUBREG_REG (op1))))
16922 op1 = validize_mem (force_const_mem (mode, op1));
16924 /* ... nor both arguments in memory. */
16925 if (!register_operand (op0, mode)
16926 && !register_operand (op1, mode))
16927 op1 = force_reg (mode, op1);
16929 tmp[0] = op0; tmp[1] = op1;
16930 ix86_expand_vector_move_misalign (mode, tmp);
16931 return;
16934 /* Make operand1 a register if it isn't already. */
16935 if (can_create_pseudo_p ()
16936 && !register_operand (op0, mode)
16937 && !register_operand (op1, mode))
16939 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16940 return;
16943 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16946 /* Split 32-byte AVX unaligned load and store if needed. */
16948 static void
16949 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16951 rtx m;
16952 rtx (*extract) (rtx, rtx, rtx);
16953 rtx (*load_unaligned) (rtx, rtx);
16954 rtx (*store_unaligned) (rtx, rtx);
16955 enum machine_mode mode;
16957 switch (GET_MODE (op0))
16959 default:
16960 gcc_unreachable ();
16961 case V32QImode:
16962 extract = gen_avx_vextractf128v32qi;
16963 load_unaligned = gen_avx_loaddquv32qi;
16964 store_unaligned = gen_avx_storedquv32qi;
16965 mode = V16QImode;
16966 break;
16967 case V8SFmode:
16968 extract = gen_avx_vextractf128v8sf;
16969 load_unaligned = gen_avx_loadups256;
16970 store_unaligned = gen_avx_storeups256;
16971 mode = V4SFmode;
16972 break;
16973 case V4DFmode:
16974 extract = gen_avx_vextractf128v4df;
16975 load_unaligned = gen_avx_loadupd256;
16976 store_unaligned = gen_avx_storeupd256;
16977 mode = V2DFmode;
16978 break;
16981 if (MEM_P (op1))
16983 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16985 rtx r = gen_reg_rtx (mode);
16986 m = adjust_address (op1, mode, 0);
16987 emit_move_insn (r, m);
16988 m = adjust_address (op1, mode, 16);
16989 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16990 emit_move_insn (op0, r);
16992 /* Normal *mov<mode>_internal pattern will handle
16993 unaligned loads just fine if misaligned_operand
16994 is true, and without the UNSPEC it can be combined
16995 with arithmetic instructions. */
16996 else if (misaligned_operand (op1, GET_MODE (op1)))
16997 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16998 else
16999 emit_insn (load_unaligned (op0, op1));
17001 else if (MEM_P (op0))
17003 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17005 m = adjust_address (op0, mode, 0);
17006 emit_insn (extract (m, op1, const0_rtx));
17007 m = adjust_address (op0, mode, 16);
17008 emit_insn (extract (m, op1, const1_rtx));
17010 else
17011 emit_insn (store_unaligned (op0, op1));
17013 else
17014 gcc_unreachable ();
17017 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17018 straight to ix86_expand_vector_move. */
17019 /* Code generation for scalar reg-reg moves of single and double precision data:
17020 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17021 movaps reg, reg
17022 else
17023 movss reg, reg
17024 if (x86_sse_partial_reg_dependency == true)
17025 movapd reg, reg
17026 else
17027 movsd reg, reg
17029 Code generation for scalar loads of double precision data:
17030 if (x86_sse_split_regs == true)
17031 movlpd mem, reg (gas syntax)
17032 else
17033 movsd mem, reg
17035 Code generation for unaligned packed loads of single precision data
17036 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17037 if (x86_sse_unaligned_move_optimal)
17038 movups mem, reg
17040 if (x86_sse_partial_reg_dependency == true)
17042 xorps reg, reg
17043 movlps mem, reg
17044 movhps mem+8, reg
17046 else
17048 movlps mem, reg
17049 movhps mem+8, reg
17052 Code generation for unaligned packed loads of double precision data
17053 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17054 if (x86_sse_unaligned_move_optimal)
17055 movupd mem, reg
17057 if (x86_sse_split_regs == true)
17059 movlpd mem, reg
17060 movhpd mem+8, reg
17062 else
17064 movsd mem, reg
17065 movhpd mem+8, reg
17069 void
17070 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17072 rtx op0, op1, orig_op0 = NULL_RTX, m;
17073 rtx (*load_unaligned) (rtx, rtx);
17074 rtx (*store_unaligned) (rtx, rtx);
17076 op0 = operands[0];
17077 op1 = operands[1];
17079 if (GET_MODE_SIZE (mode) == 64)
17081 switch (GET_MODE_CLASS (mode))
17083 case MODE_VECTOR_INT:
17084 case MODE_INT:
17085 if (GET_MODE (op0) != V16SImode)
17087 if (!MEM_P (op0))
17089 orig_op0 = op0;
17090 op0 = gen_reg_rtx (V16SImode);
17092 else
17093 op0 = gen_lowpart (V16SImode, op0);
17095 op1 = gen_lowpart (V16SImode, op1);
17096 /* FALLTHRU */
17098 case MODE_VECTOR_FLOAT:
17099 switch (GET_MODE (op0))
17101 default:
17102 gcc_unreachable ();
17103 case V16SImode:
17104 load_unaligned = gen_avx512f_loaddquv16si;
17105 store_unaligned = gen_avx512f_storedquv16si;
17106 break;
17107 case V16SFmode:
17108 load_unaligned = gen_avx512f_loadups512;
17109 store_unaligned = gen_avx512f_storeups512;
17110 break;
17111 case V8DFmode:
17112 load_unaligned = gen_avx512f_loadupd512;
17113 store_unaligned = gen_avx512f_storeupd512;
17114 break;
17117 if (MEM_P (op1))
17118 emit_insn (load_unaligned (op0, op1));
17119 else if (MEM_P (op0))
17120 emit_insn (store_unaligned (op0, op1));
17121 else
17122 gcc_unreachable ();
17123 if (orig_op0)
17124 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17125 break;
17127 default:
17128 gcc_unreachable ();
17131 return;
17134 if (TARGET_AVX
17135 && GET_MODE_SIZE (mode) == 32)
17137 switch (GET_MODE_CLASS (mode))
17139 case MODE_VECTOR_INT:
17140 case MODE_INT:
17141 if (GET_MODE (op0) != V32QImode)
17143 if (!MEM_P (op0))
17145 orig_op0 = op0;
17146 op0 = gen_reg_rtx (V32QImode);
17148 else
17149 op0 = gen_lowpart (V32QImode, op0);
17151 op1 = gen_lowpart (V32QImode, op1);
17152 /* FALLTHRU */
17154 case MODE_VECTOR_FLOAT:
17155 ix86_avx256_split_vector_move_misalign (op0, op1);
17156 if (orig_op0)
17157 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17158 break;
17160 default:
17161 gcc_unreachable ();
17164 return;
17167 if (MEM_P (op1))
17169 /* Normal *mov<mode>_internal pattern will handle
17170 unaligned loads just fine if misaligned_operand
17171 is true, and without the UNSPEC it can be combined
17172 with arithmetic instructions. */
17173 if (TARGET_AVX
17174 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17175 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17176 && misaligned_operand (op1, GET_MODE (op1)))
17177 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17178 /* ??? If we have typed data, then it would appear that using
17179 movdqu is the only way to get unaligned data loaded with
17180 integer type. */
17181 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17183 if (GET_MODE (op0) != V16QImode)
17185 orig_op0 = op0;
17186 op0 = gen_reg_rtx (V16QImode);
17188 op1 = gen_lowpart (V16QImode, op1);
17189 /* We will eventually emit movups based on insn attributes. */
17190 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17191 if (orig_op0)
17192 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17194 else if (TARGET_SSE2 && mode == V2DFmode)
17196 rtx zero;
17198 if (TARGET_AVX
17199 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17200 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17201 || optimize_insn_for_size_p ())
17203 /* We will eventually emit movups based on insn attributes. */
17204 emit_insn (gen_sse2_loadupd (op0, op1));
17205 return;
17208 /* When SSE registers are split into halves, we can avoid
17209 writing to the top half twice. */
17210 if (TARGET_SSE_SPLIT_REGS)
17212 emit_clobber (op0);
17213 zero = op0;
17215 else
17217 /* ??? Not sure about the best option for the Intel chips.
17218 The following would seem to satisfy; the register is
17219 entirely cleared, breaking the dependency chain. We
17220 then store to the upper half, with a dependency depth
17221 of one. A rumor has it that Intel recommends two movsd
17222 followed by an unpacklpd, but this is unconfirmed. And
17223 given that the dependency depth of the unpacklpd would
17224 still be one, I'm not sure why this would be better. */
17225 zero = CONST0_RTX (V2DFmode);
17228 m = adjust_address (op1, DFmode, 0);
17229 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17230 m = adjust_address (op1, DFmode, 8);
17231 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17233 else
17235 rtx t;
17237 if (TARGET_AVX
17238 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17239 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17240 || optimize_insn_for_size_p ())
17242 if (GET_MODE (op0) != V4SFmode)
17244 orig_op0 = op0;
17245 op0 = gen_reg_rtx (V4SFmode);
17247 op1 = gen_lowpart (V4SFmode, op1);
17248 emit_insn (gen_sse_loadups (op0, op1));
17249 if (orig_op0)
17250 emit_move_insn (orig_op0,
17251 gen_lowpart (GET_MODE (orig_op0), op0));
17252 return;
17255 if (mode != V4SFmode)
17256 t = gen_reg_rtx (V4SFmode);
17257 else
17258 t = op0;
17260 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17261 emit_move_insn (t, CONST0_RTX (V4SFmode));
17262 else
17263 emit_clobber (t);
17265 m = adjust_address (op1, V2SFmode, 0);
17266 emit_insn (gen_sse_loadlps (t, t, m));
17267 m = adjust_address (op1, V2SFmode, 8);
17268 emit_insn (gen_sse_loadhps (t, t, m));
17269 if (mode != V4SFmode)
17270 emit_move_insn (op0, gen_lowpart (mode, t));
17273 else if (MEM_P (op0))
17275 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17277 op0 = gen_lowpart (V16QImode, op0);
17278 op1 = gen_lowpart (V16QImode, op1);
17279 /* We will eventually emit movups based on insn attributes. */
17280 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17282 else if (TARGET_SSE2 && mode == V2DFmode)
17284 if (TARGET_AVX
17285 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17286 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17287 || optimize_insn_for_size_p ())
17288 /* We will eventually emit movups based on insn attributes. */
17289 emit_insn (gen_sse2_storeupd (op0, op1));
17290 else
17292 m = adjust_address (op0, DFmode, 0);
17293 emit_insn (gen_sse2_storelpd (m, op1));
17294 m = adjust_address (op0, DFmode, 8);
17295 emit_insn (gen_sse2_storehpd (m, op1));
17298 else
17300 if (mode != V4SFmode)
17301 op1 = gen_lowpart (V4SFmode, op1);
17303 if (TARGET_AVX
17304 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17305 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17306 || optimize_insn_for_size_p ())
17308 op0 = gen_lowpart (V4SFmode, op0);
17309 emit_insn (gen_sse_storeups (op0, op1));
17311 else
17313 m = adjust_address (op0, V2SFmode, 0);
17314 emit_insn (gen_sse_storelps (m, op1));
17315 m = adjust_address (op0, V2SFmode, 8);
17316 emit_insn (gen_sse_storehps (m, op1));
17320 else
17321 gcc_unreachable ();
17324 /* Helper function of ix86_fixup_binary_operands to canonicalize
17325 operand order. Returns true if the operands should be swapped. */
17327 static bool
17328 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17329 rtx operands[])
17331 rtx dst = operands[0];
17332 rtx src1 = operands[1];
17333 rtx src2 = operands[2];
17335 /* If the operation is not commutative, we can't do anything. */
17336 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17337 return false;
17339 /* Highest priority is that src1 should match dst. */
17340 if (rtx_equal_p (dst, src1))
17341 return false;
17342 if (rtx_equal_p (dst, src2))
17343 return true;
17345 /* Next highest priority is that immediate constants come second. */
17346 if (immediate_operand (src2, mode))
17347 return false;
17348 if (immediate_operand (src1, mode))
17349 return true;
17351 /* Lowest priority is that memory references should come second. */
17352 if (MEM_P (src2))
17353 return false;
17354 if (MEM_P (src1))
17355 return true;
17357 return false;
17361 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17362 destination to use for the operation. If different from the true
17363 destination in operands[0], a copy operation will be required. */
17366 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17367 rtx operands[])
17369 rtx dst = operands[0];
17370 rtx src1 = operands[1];
17371 rtx src2 = operands[2];
17373 /* Canonicalize operand order. */
17374 if (ix86_swap_binary_operands_p (code, mode, operands))
17376 rtx temp;
17378 /* It is invalid to swap operands of different modes. */
17379 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17381 temp = src1;
17382 src1 = src2;
17383 src2 = temp;
17386 /* Both source operands cannot be in memory. */
17387 if (MEM_P (src1) && MEM_P (src2))
17389 /* Optimization: Only read from memory once. */
17390 if (rtx_equal_p (src1, src2))
17392 src2 = force_reg (mode, src2);
17393 src1 = src2;
17395 else if (rtx_equal_p (dst, src1))
17396 src2 = force_reg (mode, src2);
17397 else
17398 src1 = force_reg (mode, src1);
17401 /* If the destination is memory, and we do not have matching source
17402 operands, do things in registers. */
17403 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17404 dst = gen_reg_rtx (mode);
17406 /* Source 1 cannot be a constant. */
17407 if (CONSTANT_P (src1))
17408 src1 = force_reg (mode, src1);
17410 /* Source 1 cannot be a non-matching memory. */
17411 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17412 src1 = force_reg (mode, src1);
17414 /* Improve address combine. */
17415 if (code == PLUS
17416 && GET_MODE_CLASS (mode) == MODE_INT
17417 && MEM_P (src2))
17418 src2 = force_reg (mode, src2);
17420 operands[1] = src1;
17421 operands[2] = src2;
17422 return dst;
17425 /* Similarly, but assume that the destination has already been
17426 set up properly. */
17428 void
17429 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17430 enum machine_mode mode, rtx operands[])
17432 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17433 gcc_assert (dst == operands[0]);
17436 /* Attempt to expand a binary operator. Make the expansion closer to the
17437 actual machine, then just general_operand, which will allow 3 separate
17438 memory references (one output, two input) in a single insn. */
17440 void
17441 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17442 rtx operands[])
17444 rtx src1, src2, dst, op, clob;
17446 dst = ix86_fixup_binary_operands (code, mode, operands);
17447 src1 = operands[1];
17448 src2 = operands[2];
17450 /* Emit the instruction. */
17452 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17453 if (reload_in_progress)
17455 /* Reload doesn't know about the flags register, and doesn't know that
17456 it doesn't want to clobber it. We can only do this with PLUS. */
17457 gcc_assert (code == PLUS);
17458 emit_insn (op);
17460 else if (reload_completed
17461 && code == PLUS
17462 && !rtx_equal_p (dst, src1))
17464 /* This is going to be an LEA; avoid splitting it later. */
17465 emit_insn (op);
17467 else
17469 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17470 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17473 /* Fix up the destination if needed. */
17474 if (dst != operands[0])
17475 emit_move_insn (operands[0], dst);
17478 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17479 the given OPERANDS. */
17481 void
17482 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17483 rtx operands[])
17485 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17486 if (GET_CODE (operands[1]) == SUBREG)
17488 op1 = operands[1];
17489 op2 = operands[2];
17491 else if (GET_CODE (operands[2]) == SUBREG)
17493 op1 = operands[2];
17494 op2 = operands[1];
17496 /* Optimize (__m128i) d | (__m128i) e and similar code
17497 when d and e are float vectors into float vector logical
17498 insn. In C/C++ without using intrinsics there is no other way
17499 to express vector logical operation on float vectors than
17500 to cast them temporarily to integer vectors. */
17501 if (op1
17502 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17503 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17504 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17505 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17506 && SUBREG_BYTE (op1) == 0
17507 && (GET_CODE (op2) == CONST_VECTOR
17508 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17509 && SUBREG_BYTE (op2) == 0))
17510 && can_create_pseudo_p ())
17512 rtx dst;
17513 switch (GET_MODE (SUBREG_REG (op1)))
17515 case V4SFmode:
17516 case V8SFmode:
17517 case V2DFmode:
17518 case V4DFmode:
17519 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17520 if (GET_CODE (op2) == CONST_VECTOR)
17522 op2 = gen_lowpart (GET_MODE (dst), op2);
17523 op2 = force_reg (GET_MODE (dst), op2);
17525 else
17527 op1 = operands[1];
17528 op2 = SUBREG_REG (operands[2]);
17529 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17530 op2 = force_reg (GET_MODE (dst), op2);
17532 op1 = SUBREG_REG (op1);
17533 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17534 op1 = force_reg (GET_MODE (dst), op1);
17535 emit_insn (gen_rtx_SET (VOIDmode, dst,
17536 gen_rtx_fmt_ee (code, GET_MODE (dst),
17537 op1, op2)));
17538 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17539 return;
17540 default:
17541 break;
17544 if (!nonimmediate_operand (operands[1], mode))
17545 operands[1] = force_reg (mode, operands[1]);
17546 if (!nonimmediate_operand (operands[2], mode))
17547 operands[2] = force_reg (mode, operands[2]);
17548 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17549 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17550 gen_rtx_fmt_ee (code, mode, operands[1],
17551 operands[2])));
17554 /* Return TRUE or FALSE depending on whether the binary operator meets the
17555 appropriate constraints. */
17557 bool
17558 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17559 rtx operands[3])
17561 rtx dst = operands[0];
17562 rtx src1 = operands[1];
17563 rtx src2 = operands[2];
17565 /* Both source operands cannot be in memory. */
17566 if (MEM_P (src1) && MEM_P (src2))
17567 return false;
17569 /* Canonicalize operand order for commutative operators. */
17570 if (ix86_swap_binary_operands_p (code, mode, operands))
17572 rtx temp = src1;
17573 src1 = src2;
17574 src2 = temp;
17577 /* If the destination is memory, we must have a matching source operand. */
17578 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17579 return false;
17581 /* Source 1 cannot be a constant. */
17582 if (CONSTANT_P (src1))
17583 return false;
17585 /* Source 1 cannot be a non-matching memory. */
17586 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17587 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17588 return (code == AND
17589 && (mode == HImode
17590 || mode == SImode
17591 || (TARGET_64BIT && mode == DImode))
17592 && satisfies_constraint_L (src2));
17594 return true;
17597 /* Attempt to expand a unary operator. Make the expansion closer to the
17598 actual machine, then just general_operand, which will allow 2 separate
17599 memory references (one output, one input) in a single insn. */
17601 void
17602 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17603 rtx operands[])
17605 int matching_memory;
17606 rtx src, dst, op, clob;
17608 dst = operands[0];
17609 src = operands[1];
17611 /* If the destination is memory, and we do not have matching source
17612 operands, do things in registers. */
17613 matching_memory = 0;
17614 if (MEM_P (dst))
17616 if (rtx_equal_p (dst, src))
17617 matching_memory = 1;
17618 else
17619 dst = gen_reg_rtx (mode);
17622 /* When source operand is memory, destination must match. */
17623 if (MEM_P (src) && !matching_memory)
17624 src = force_reg (mode, src);
17626 /* Emit the instruction. */
17628 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17629 if (reload_in_progress || code == NOT)
17631 /* Reload doesn't know about the flags register, and doesn't know that
17632 it doesn't want to clobber it. */
17633 gcc_assert (code == NOT);
17634 emit_insn (op);
17636 else
17638 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17639 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17642 /* Fix up the destination if needed. */
17643 if (dst != operands[0])
17644 emit_move_insn (operands[0], dst);
17647 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17648 divisor are within the range [0-255]. */
17650 void
17651 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17652 bool signed_p)
17654 rtx end_label, qimode_label;
17655 rtx insn, div, mod;
17656 rtx scratch, tmp0, tmp1, tmp2;
17657 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17658 rtx (*gen_zero_extend) (rtx, rtx);
17659 rtx (*gen_test_ccno_1) (rtx, rtx);
17661 switch (mode)
17663 case SImode:
17664 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17665 gen_test_ccno_1 = gen_testsi_ccno_1;
17666 gen_zero_extend = gen_zero_extendqisi2;
17667 break;
17668 case DImode:
17669 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17670 gen_test_ccno_1 = gen_testdi_ccno_1;
17671 gen_zero_extend = gen_zero_extendqidi2;
17672 break;
17673 default:
17674 gcc_unreachable ();
17677 end_label = gen_label_rtx ();
17678 qimode_label = gen_label_rtx ();
17680 scratch = gen_reg_rtx (mode);
17682 /* Use 8bit unsigned divimod if dividend and divisor are within
17683 the range [0-255]. */
17684 emit_move_insn (scratch, operands[2]);
17685 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17686 scratch, 1, OPTAB_DIRECT);
17687 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17688 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17689 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17690 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17691 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17692 pc_rtx);
17693 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17694 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17695 JUMP_LABEL (insn) = qimode_label;
17697 /* Generate original signed/unsigned divimod. */
17698 div = gen_divmod4_1 (operands[0], operands[1],
17699 operands[2], operands[3]);
17700 emit_insn (div);
17702 /* Branch to the end. */
17703 emit_jump_insn (gen_jump (end_label));
17704 emit_barrier ();
17706 /* Generate 8bit unsigned divide. */
17707 emit_label (qimode_label);
17708 /* Don't use operands[0] for result of 8bit divide since not all
17709 registers support QImode ZERO_EXTRACT. */
17710 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17711 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17712 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17713 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17715 if (signed_p)
17717 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17718 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17720 else
17722 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17723 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17726 /* Extract remainder from AH. */
17727 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17728 if (REG_P (operands[1]))
17729 insn = emit_move_insn (operands[1], tmp1);
17730 else
17732 /* Need a new scratch register since the old one has result
17733 of 8bit divide. */
17734 scratch = gen_reg_rtx (mode);
17735 emit_move_insn (scratch, tmp1);
17736 insn = emit_move_insn (operands[1], scratch);
17738 set_unique_reg_note (insn, REG_EQUAL, mod);
17740 /* Zero extend quotient from AL. */
17741 tmp1 = gen_lowpart (QImode, tmp0);
17742 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17743 set_unique_reg_note (insn, REG_EQUAL, div);
17745 emit_label (end_label);
17748 /* Whether it is OK to emit CFI directives when emitting asm code. */
17750 bool
17751 ix86_emit_cfi ()
17753 return dwarf2out_do_cfi_asm ();
17756 #define LEA_MAX_STALL (3)
17757 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17759 /* Increase given DISTANCE in half-cycles according to
17760 dependencies between PREV and NEXT instructions.
17761 Add 1 half-cycle if there is no dependency and
17762 go to next cycle if there is some dependecy. */
17764 static unsigned int
17765 increase_distance (rtx prev, rtx next, unsigned int distance)
17767 df_ref def, use;
17769 if (!prev || !next)
17770 return distance + (distance & 1) + 2;
17772 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17773 return distance + 1;
17775 FOR_EACH_INSN_USE (use, next)
17776 FOR_EACH_INSN_DEF (def, prev)
17777 if (!DF_REF_IS_ARTIFICIAL (def)
17778 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17779 return distance + (distance & 1) + 2;
17781 return distance + 1;
17784 /* Function checks if instruction INSN defines register number
17785 REGNO1 or REGNO2. */
17787 static bool
17788 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17789 rtx insn)
17791 df_ref def;
17793 FOR_EACH_INSN_DEF (def, insn)
17794 if (DF_REF_REG_DEF_P (def)
17795 && !DF_REF_IS_ARTIFICIAL (def)
17796 && (regno1 == DF_REF_REGNO (def)
17797 || regno2 == DF_REF_REGNO (def)))
17798 return true;
17800 return false;
17803 /* Function checks if instruction INSN uses register number
17804 REGNO as a part of address expression. */
17806 static bool
17807 insn_uses_reg_mem (unsigned int regno, rtx insn)
17809 df_ref use;
17811 FOR_EACH_INSN_USE (use, insn)
17812 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17813 return true;
17815 return false;
17818 /* Search backward for non-agu definition of register number REGNO1
17819 or register number REGNO2 in basic block starting from instruction
17820 START up to head of basic block or instruction INSN.
17822 Function puts true value into *FOUND var if definition was found
17823 and false otherwise.
17825 Distance in half-cycles between START and found instruction or head
17826 of BB is added to DISTANCE and returned. */
17828 static int
17829 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17830 rtx insn, int distance,
17831 rtx start, bool *found)
17833 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17834 rtx prev = start;
17835 rtx next = NULL;
17837 *found = false;
17839 while (prev
17840 && prev != insn
17841 && distance < LEA_SEARCH_THRESHOLD)
17843 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17845 distance = increase_distance (prev, next, distance);
17846 if (insn_defines_reg (regno1, regno2, prev))
17848 if (recog_memoized (prev) < 0
17849 || get_attr_type (prev) != TYPE_LEA)
17851 *found = true;
17852 return distance;
17856 next = prev;
17858 if (prev == BB_HEAD (bb))
17859 break;
17861 prev = PREV_INSN (prev);
17864 return distance;
17867 /* Search backward for non-agu definition of register number REGNO1
17868 or register number REGNO2 in INSN's basic block until
17869 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17870 2. Reach neighbour BBs boundary, or
17871 3. Reach agu definition.
17872 Returns the distance between the non-agu definition point and INSN.
17873 If no definition point, returns -1. */
17875 static int
17876 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17877 rtx insn)
17879 basic_block bb = BLOCK_FOR_INSN (insn);
17880 int distance = 0;
17881 bool found = false;
17883 if (insn != BB_HEAD (bb))
17884 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17885 distance, PREV_INSN (insn),
17886 &found);
17888 if (!found && distance < LEA_SEARCH_THRESHOLD)
17890 edge e;
17891 edge_iterator ei;
17892 bool simple_loop = false;
17894 FOR_EACH_EDGE (e, ei, bb->preds)
17895 if (e->src == bb)
17897 simple_loop = true;
17898 break;
17901 if (simple_loop)
17902 distance = distance_non_agu_define_in_bb (regno1, regno2,
17903 insn, distance,
17904 BB_END (bb), &found);
17905 else
17907 int shortest_dist = -1;
17908 bool found_in_bb = false;
17910 FOR_EACH_EDGE (e, ei, bb->preds)
17912 int bb_dist
17913 = distance_non_agu_define_in_bb (regno1, regno2,
17914 insn, distance,
17915 BB_END (e->src),
17916 &found_in_bb);
17917 if (found_in_bb)
17919 if (shortest_dist < 0)
17920 shortest_dist = bb_dist;
17921 else if (bb_dist > 0)
17922 shortest_dist = MIN (bb_dist, shortest_dist);
17924 found = true;
17928 distance = shortest_dist;
17932 /* get_attr_type may modify recog data. We want to make sure
17933 that recog data is valid for instruction INSN, on which
17934 distance_non_agu_define is called. INSN is unchanged here. */
17935 extract_insn_cached (insn);
17937 if (!found)
17938 return -1;
17940 return distance >> 1;
17943 /* Return the distance in half-cycles between INSN and the next
17944 insn that uses register number REGNO in memory address added
17945 to DISTANCE. Return -1 if REGNO0 is set.
17947 Put true value into *FOUND if register usage was found and
17948 false otherwise.
17949 Put true value into *REDEFINED if register redefinition was
17950 found and false otherwise. */
17952 static int
17953 distance_agu_use_in_bb (unsigned int regno,
17954 rtx insn, int distance, rtx start,
17955 bool *found, bool *redefined)
17957 basic_block bb = NULL;
17958 rtx next = start;
17959 rtx prev = NULL;
17961 *found = false;
17962 *redefined = false;
17964 if (start != NULL_RTX)
17966 bb = BLOCK_FOR_INSN (start);
17967 if (start != BB_HEAD (bb))
17968 /* If insn and start belong to the same bb, set prev to insn,
17969 so the call to increase_distance will increase the distance
17970 between insns by 1. */
17971 prev = insn;
17974 while (next
17975 && next != insn
17976 && distance < LEA_SEARCH_THRESHOLD)
17978 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17980 distance = increase_distance(prev, next, distance);
17981 if (insn_uses_reg_mem (regno, next))
17983 /* Return DISTANCE if OP0 is used in memory
17984 address in NEXT. */
17985 *found = true;
17986 return distance;
17989 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17991 /* Return -1 if OP0 is set in NEXT. */
17992 *redefined = true;
17993 return -1;
17996 prev = next;
17999 if (next == BB_END (bb))
18000 break;
18002 next = NEXT_INSN (next);
18005 return distance;
18008 /* Return the distance between INSN and the next insn that uses
18009 register number REGNO0 in memory address. Return -1 if no such
18010 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18012 static int
18013 distance_agu_use (unsigned int regno0, rtx insn)
18015 basic_block bb = BLOCK_FOR_INSN (insn);
18016 int distance = 0;
18017 bool found = false;
18018 bool redefined = false;
18020 if (insn != BB_END (bb))
18021 distance = distance_agu_use_in_bb (regno0, insn, distance,
18022 NEXT_INSN (insn),
18023 &found, &redefined);
18025 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18027 edge e;
18028 edge_iterator ei;
18029 bool simple_loop = false;
18031 FOR_EACH_EDGE (e, ei, bb->succs)
18032 if (e->dest == bb)
18034 simple_loop = true;
18035 break;
18038 if (simple_loop)
18039 distance = distance_agu_use_in_bb (regno0, insn,
18040 distance, BB_HEAD (bb),
18041 &found, &redefined);
18042 else
18044 int shortest_dist = -1;
18045 bool found_in_bb = false;
18046 bool redefined_in_bb = false;
18048 FOR_EACH_EDGE (e, ei, bb->succs)
18050 int bb_dist
18051 = distance_agu_use_in_bb (regno0, insn,
18052 distance, BB_HEAD (e->dest),
18053 &found_in_bb, &redefined_in_bb);
18054 if (found_in_bb)
18056 if (shortest_dist < 0)
18057 shortest_dist = bb_dist;
18058 else if (bb_dist > 0)
18059 shortest_dist = MIN (bb_dist, shortest_dist);
18061 found = true;
18065 distance = shortest_dist;
18069 if (!found || redefined)
18070 return -1;
18072 return distance >> 1;
18075 /* Define this macro to tune LEA priority vs ADD, it take effect when
18076 there is a dilemma of choicing LEA or ADD
18077 Negative value: ADD is more preferred than LEA
18078 Zero: Netrual
18079 Positive value: LEA is more preferred than ADD*/
18080 #define IX86_LEA_PRIORITY 0
18082 /* Return true if usage of lea INSN has performance advantage
18083 over a sequence of instructions. Instructions sequence has
18084 SPLIT_COST cycles higher latency than lea latency. */
18086 static bool
18087 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18088 unsigned int regno2, int split_cost, bool has_scale)
18090 int dist_define, dist_use;
18092 /* For Silvermont if using a 2-source or 3-source LEA for
18093 non-destructive destination purposes, or due to wanting
18094 ability to use SCALE, the use of LEA is justified. */
18095 if (TARGET_SILVERMONT || TARGET_INTEL)
18097 if (has_scale)
18098 return true;
18099 if (split_cost < 1)
18100 return false;
18101 if (regno0 == regno1 || regno0 == regno2)
18102 return false;
18103 return true;
18106 dist_define = distance_non_agu_define (regno1, regno2, insn);
18107 dist_use = distance_agu_use (regno0, insn);
18109 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18111 /* If there is no non AGU operand definition, no AGU
18112 operand usage and split cost is 0 then both lea
18113 and non lea variants have same priority. Currently
18114 we prefer lea for 64 bit code and non lea on 32 bit
18115 code. */
18116 if (dist_use < 0 && split_cost == 0)
18117 return TARGET_64BIT || IX86_LEA_PRIORITY;
18118 else
18119 return true;
18122 /* With longer definitions distance lea is more preferable.
18123 Here we change it to take into account splitting cost and
18124 lea priority. */
18125 dist_define += split_cost + IX86_LEA_PRIORITY;
18127 /* If there is no use in memory addess then we just check
18128 that split cost exceeds AGU stall. */
18129 if (dist_use < 0)
18130 return dist_define > LEA_MAX_STALL;
18132 /* If this insn has both backward non-agu dependence and forward
18133 agu dependence, the one with short distance takes effect. */
18134 return dist_define >= dist_use;
18137 /* Return true if it is legal to clobber flags by INSN and
18138 false otherwise. */
18140 static bool
18141 ix86_ok_to_clobber_flags (rtx insn)
18143 basic_block bb = BLOCK_FOR_INSN (insn);
18144 df_ref use;
18145 bitmap live;
18147 while (insn)
18149 if (NONDEBUG_INSN_P (insn))
18151 FOR_EACH_INSN_USE (use, insn)
18152 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18153 return false;
18155 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18156 return true;
18159 if (insn == BB_END (bb))
18160 break;
18162 insn = NEXT_INSN (insn);
18165 live = df_get_live_out(bb);
18166 return !REGNO_REG_SET_P (live, FLAGS_REG);
18169 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18170 move and add to avoid AGU stalls. */
18172 bool
18173 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18175 unsigned int regno0, regno1, regno2;
18177 /* Check if we need to optimize. */
18178 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18179 return false;
18181 /* Check it is correct to split here. */
18182 if (!ix86_ok_to_clobber_flags(insn))
18183 return false;
18185 regno0 = true_regnum (operands[0]);
18186 regno1 = true_regnum (operands[1]);
18187 regno2 = true_regnum (operands[2]);
18189 /* We need to split only adds with non destructive
18190 destination operand. */
18191 if (regno0 == regno1 || regno0 == regno2)
18192 return false;
18193 else
18194 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18197 /* Return true if we should emit lea instruction instead of mov
18198 instruction. */
18200 bool
18201 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18203 unsigned int regno0, regno1;
18205 /* Check if we need to optimize. */
18206 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18207 return false;
18209 /* Use lea for reg to reg moves only. */
18210 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18211 return false;
18213 regno0 = true_regnum (operands[0]);
18214 regno1 = true_regnum (operands[1]);
18216 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18219 /* Return true if we need to split lea into a sequence of
18220 instructions to avoid AGU stalls. */
18222 bool
18223 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18225 unsigned int regno0, regno1, regno2;
18226 int split_cost;
18227 struct ix86_address parts;
18228 int ok;
18230 /* Check we need to optimize. */
18231 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18232 return false;
18234 /* The "at least two components" test below might not catch simple
18235 move or zero extension insns if parts.base is non-NULL and parts.disp
18236 is const0_rtx as the only components in the address, e.g. if the
18237 register is %rbp or %r13. As this test is much cheaper and moves or
18238 zero extensions are the common case, do this check first. */
18239 if (REG_P (operands[1])
18240 || (SImode_address_operand (operands[1], VOIDmode)
18241 && REG_P (XEXP (operands[1], 0))))
18242 return false;
18244 /* Check if it is OK to split here. */
18245 if (!ix86_ok_to_clobber_flags (insn))
18246 return false;
18248 ok = ix86_decompose_address (operands[1], &parts);
18249 gcc_assert (ok);
18251 /* There should be at least two components in the address. */
18252 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18253 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18254 return false;
18256 /* We should not split into add if non legitimate pic
18257 operand is used as displacement. */
18258 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18259 return false;
18261 regno0 = true_regnum (operands[0]) ;
18262 regno1 = INVALID_REGNUM;
18263 regno2 = INVALID_REGNUM;
18265 if (parts.base)
18266 regno1 = true_regnum (parts.base);
18267 if (parts.index)
18268 regno2 = true_regnum (parts.index);
18270 split_cost = 0;
18272 /* Compute how many cycles we will add to execution time
18273 if split lea into a sequence of instructions. */
18274 if (parts.base || parts.index)
18276 /* Have to use mov instruction if non desctructive
18277 destination form is used. */
18278 if (regno1 != regno0 && regno2 != regno0)
18279 split_cost += 1;
18281 /* Have to add index to base if both exist. */
18282 if (parts.base && parts.index)
18283 split_cost += 1;
18285 /* Have to use shift and adds if scale is 2 or greater. */
18286 if (parts.scale > 1)
18288 if (regno0 != regno1)
18289 split_cost += 1;
18290 else if (regno2 == regno0)
18291 split_cost += 4;
18292 else
18293 split_cost += parts.scale;
18296 /* Have to use add instruction with immediate if
18297 disp is non zero. */
18298 if (parts.disp && parts.disp != const0_rtx)
18299 split_cost += 1;
18301 /* Subtract the price of lea. */
18302 split_cost -= 1;
18305 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18306 parts.scale > 1);
18309 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18310 matches destination. RTX includes clobber of FLAGS_REG. */
18312 static void
18313 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18314 rtx dst, rtx src)
18316 rtx op, clob;
18318 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18319 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18321 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18324 /* Return true if regno1 def is nearest to the insn. */
18326 static bool
18327 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18329 rtx prev = insn;
18330 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18332 if (insn == start)
18333 return false;
18334 while (prev && prev != start)
18336 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18338 prev = PREV_INSN (prev);
18339 continue;
18341 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18342 return true;
18343 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18344 return false;
18345 prev = PREV_INSN (prev);
18348 /* None of the regs is defined in the bb. */
18349 return false;
18352 /* Split lea instructions into a sequence of instructions
18353 which are executed on ALU to avoid AGU stalls.
18354 It is assumed that it is allowed to clobber flags register
18355 at lea position. */
18357 void
18358 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18360 unsigned int regno0, regno1, regno2;
18361 struct ix86_address parts;
18362 rtx target, tmp;
18363 int ok, adds;
18365 ok = ix86_decompose_address (operands[1], &parts);
18366 gcc_assert (ok);
18368 target = gen_lowpart (mode, operands[0]);
18370 regno0 = true_regnum (target);
18371 regno1 = INVALID_REGNUM;
18372 regno2 = INVALID_REGNUM;
18374 if (parts.base)
18376 parts.base = gen_lowpart (mode, parts.base);
18377 regno1 = true_regnum (parts.base);
18380 if (parts.index)
18382 parts.index = gen_lowpart (mode, parts.index);
18383 regno2 = true_regnum (parts.index);
18386 if (parts.disp)
18387 parts.disp = gen_lowpart (mode, parts.disp);
18389 if (parts.scale > 1)
18391 /* Case r1 = r1 + ... */
18392 if (regno1 == regno0)
18394 /* If we have a case r1 = r1 + C * r2 then we
18395 should use multiplication which is very
18396 expensive. Assume cost model is wrong if we
18397 have such case here. */
18398 gcc_assert (regno2 != regno0);
18400 for (adds = parts.scale; adds > 0; adds--)
18401 ix86_emit_binop (PLUS, mode, target, parts.index);
18403 else
18405 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18406 if (regno0 != regno2)
18407 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18409 /* Use shift for scaling. */
18410 ix86_emit_binop (ASHIFT, mode, target,
18411 GEN_INT (exact_log2 (parts.scale)));
18413 if (parts.base)
18414 ix86_emit_binop (PLUS, mode, target, parts.base);
18416 if (parts.disp && parts.disp != const0_rtx)
18417 ix86_emit_binop (PLUS, mode, target, parts.disp);
18420 else if (!parts.base && !parts.index)
18422 gcc_assert(parts.disp);
18423 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18425 else
18427 if (!parts.base)
18429 if (regno0 != regno2)
18430 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18432 else if (!parts.index)
18434 if (regno0 != regno1)
18435 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18437 else
18439 if (regno0 == regno1)
18440 tmp = parts.index;
18441 else if (regno0 == regno2)
18442 tmp = parts.base;
18443 else
18445 rtx tmp1;
18447 /* Find better operand for SET instruction, depending
18448 on which definition is farther from the insn. */
18449 if (find_nearest_reg_def (insn, regno1, regno2))
18450 tmp = parts.index, tmp1 = parts.base;
18451 else
18452 tmp = parts.base, tmp1 = parts.index;
18454 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18456 if (parts.disp && parts.disp != const0_rtx)
18457 ix86_emit_binop (PLUS, mode, target, parts.disp);
18459 ix86_emit_binop (PLUS, mode, target, tmp1);
18460 return;
18463 ix86_emit_binop (PLUS, mode, target, tmp);
18466 if (parts.disp && parts.disp != const0_rtx)
18467 ix86_emit_binop (PLUS, mode, target, parts.disp);
18471 /* Return true if it is ok to optimize an ADD operation to LEA
18472 operation to avoid flag register consumation. For most processors,
18473 ADD is faster than LEA. For the processors like BONNELL, if the
18474 destination register of LEA holds an actual address which will be
18475 used soon, LEA is better and otherwise ADD is better. */
18477 bool
18478 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18480 unsigned int regno0 = true_regnum (operands[0]);
18481 unsigned int regno1 = true_regnum (operands[1]);
18482 unsigned int regno2 = true_regnum (operands[2]);
18484 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18485 if (regno0 != regno1 && regno0 != regno2)
18486 return true;
18488 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18489 return false;
18491 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18494 /* Return true if destination reg of SET_BODY is shift count of
18495 USE_BODY. */
18497 static bool
18498 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18500 rtx set_dest;
18501 rtx shift_rtx;
18502 int i;
18504 /* Retrieve destination of SET_BODY. */
18505 switch (GET_CODE (set_body))
18507 case SET:
18508 set_dest = SET_DEST (set_body);
18509 if (!set_dest || !REG_P (set_dest))
18510 return false;
18511 break;
18512 case PARALLEL:
18513 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18514 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18515 use_body))
18516 return true;
18517 default:
18518 return false;
18519 break;
18522 /* Retrieve shift count of USE_BODY. */
18523 switch (GET_CODE (use_body))
18525 case SET:
18526 shift_rtx = XEXP (use_body, 1);
18527 break;
18528 case PARALLEL:
18529 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18530 if (ix86_dep_by_shift_count_body (set_body,
18531 XVECEXP (use_body, 0, i)))
18532 return true;
18533 default:
18534 return false;
18535 break;
18538 if (shift_rtx
18539 && (GET_CODE (shift_rtx) == ASHIFT
18540 || GET_CODE (shift_rtx) == LSHIFTRT
18541 || GET_CODE (shift_rtx) == ASHIFTRT
18542 || GET_CODE (shift_rtx) == ROTATE
18543 || GET_CODE (shift_rtx) == ROTATERT))
18545 rtx shift_count = XEXP (shift_rtx, 1);
18547 /* Return true if shift count is dest of SET_BODY. */
18548 if (REG_P (shift_count))
18550 /* Add check since it can be invoked before register
18551 allocation in pre-reload schedule. */
18552 if (reload_completed
18553 && true_regnum (set_dest) == true_regnum (shift_count))
18554 return true;
18555 else if (REGNO(set_dest) == REGNO(shift_count))
18556 return true;
18560 return false;
18563 /* Return true if destination reg of SET_INSN is shift count of
18564 USE_INSN. */
18566 bool
18567 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18569 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18570 PATTERN (use_insn));
18573 /* Return TRUE or FALSE depending on whether the unary operator meets the
18574 appropriate constraints. */
18576 bool
18577 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18578 enum machine_mode mode ATTRIBUTE_UNUSED,
18579 rtx operands[2])
18581 /* If one of operands is memory, source and destination must match. */
18582 if ((MEM_P (operands[0])
18583 || MEM_P (operands[1]))
18584 && ! rtx_equal_p (operands[0], operands[1]))
18585 return false;
18586 return true;
18589 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18590 are ok, keeping in mind the possible movddup alternative. */
18592 bool
18593 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18595 if (MEM_P (operands[0]))
18596 return rtx_equal_p (operands[0], operands[1 + high]);
18597 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18598 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18599 return true;
18602 /* Post-reload splitter for converting an SF or DFmode value in an
18603 SSE register into an unsigned SImode. */
18605 void
18606 ix86_split_convert_uns_si_sse (rtx operands[])
18608 enum machine_mode vecmode;
18609 rtx value, large, zero_or_two31, input, two31, x;
18611 large = operands[1];
18612 zero_or_two31 = operands[2];
18613 input = operands[3];
18614 two31 = operands[4];
18615 vecmode = GET_MODE (large);
18616 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18618 /* Load up the value into the low element. We must ensure that the other
18619 elements are valid floats -- zero is the easiest such value. */
18620 if (MEM_P (input))
18622 if (vecmode == V4SFmode)
18623 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18624 else
18625 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18627 else
18629 input = gen_rtx_REG (vecmode, REGNO (input));
18630 emit_move_insn (value, CONST0_RTX (vecmode));
18631 if (vecmode == V4SFmode)
18632 emit_insn (gen_sse_movss (value, value, input));
18633 else
18634 emit_insn (gen_sse2_movsd (value, value, input));
18637 emit_move_insn (large, two31);
18638 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18640 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18641 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18643 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18644 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18646 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18647 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18649 large = gen_rtx_REG (V4SImode, REGNO (large));
18650 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18652 x = gen_rtx_REG (V4SImode, REGNO (value));
18653 if (vecmode == V4SFmode)
18654 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18655 else
18656 emit_insn (gen_sse2_cvttpd2dq (x, value));
18657 value = x;
18659 emit_insn (gen_xorv4si3 (value, value, large));
18662 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18663 Expects the 64-bit DImode to be supplied in a pair of integral
18664 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18665 -mfpmath=sse, !optimize_size only. */
18667 void
18668 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18670 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18671 rtx int_xmm, fp_xmm;
18672 rtx biases, exponents;
18673 rtx x;
18675 int_xmm = gen_reg_rtx (V4SImode);
18676 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18677 emit_insn (gen_movdi_to_sse (int_xmm, input));
18678 else if (TARGET_SSE_SPLIT_REGS)
18680 emit_clobber (int_xmm);
18681 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18683 else
18685 x = gen_reg_rtx (V2DImode);
18686 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18687 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18690 x = gen_rtx_CONST_VECTOR (V4SImode,
18691 gen_rtvec (4, GEN_INT (0x43300000UL),
18692 GEN_INT (0x45300000UL),
18693 const0_rtx, const0_rtx));
18694 exponents = validize_mem (force_const_mem (V4SImode, x));
18696 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18697 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18699 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18700 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18701 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18702 (0x1.0p84 + double(fp_value_hi_xmm)).
18703 Note these exponents differ by 32. */
18705 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18707 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18708 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18709 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18710 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18711 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18712 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18713 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18714 biases = validize_mem (force_const_mem (V2DFmode, biases));
18715 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18717 /* Add the upper and lower DFmode values together. */
18718 if (TARGET_SSE3)
18719 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18720 else
18722 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18723 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18724 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18727 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18730 /* Not used, but eases macroization of patterns. */
18731 void
18732 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18733 rtx input ATTRIBUTE_UNUSED)
18735 gcc_unreachable ();
18738 /* Convert an unsigned SImode value into a DFmode. Only currently used
18739 for SSE, but applicable anywhere. */
18741 void
18742 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18744 REAL_VALUE_TYPE TWO31r;
18745 rtx x, fp;
18747 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18748 NULL, 1, OPTAB_DIRECT);
18750 fp = gen_reg_rtx (DFmode);
18751 emit_insn (gen_floatsidf2 (fp, x));
18753 real_ldexp (&TWO31r, &dconst1, 31);
18754 x = const_double_from_real_value (TWO31r, DFmode);
18756 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18757 if (x != target)
18758 emit_move_insn (target, x);
18761 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18762 32-bit mode; otherwise we have a direct convert instruction. */
18764 void
18765 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18767 REAL_VALUE_TYPE TWO32r;
18768 rtx fp_lo, fp_hi, x;
18770 fp_lo = gen_reg_rtx (DFmode);
18771 fp_hi = gen_reg_rtx (DFmode);
18773 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18775 real_ldexp (&TWO32r, &dconst1, 32);
18776 x = const_double_from_real_value (TWO32r, DFmode);
18777 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18779 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18781 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18782 0, OPTAB_DIRECT);
18783 if (x != target)
18784 emit_move_insn (target, x);
18787 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18788 For x86_32, -mfpmath=sse, !optimize_size only. */
18789 void
18790 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18792 REAL_VALUE_TYPE ONE16r;
18793 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18795 real_ldexp (&ONE16r, &dconst1, 16);
18796 x = const_double_from_real_value (ONE16r, SFmode);
18797 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18798 NULL, 0, OPTAB_DIRECT);
18799 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18800 NULL, 0, OPTAB_DIRECT);
18801 fp_hi = gen_reg_rtx (SFmode);
18802 fp_lo = gen_reg_rtx (SFmode);
18803 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18804 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18805 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18806 0, OPTAB_DIRECT);
18807 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18808 0, OPTAB_DIRECT);
18809 if (!rtx_equal_p (target, fp_hi))
18810 emit_move_insn (target, fp_hi);
18813 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18814 a vector of unsigned ints VAL to vector of floats TARGET. */
18816 void
18817 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18819 rtx tmp[8];
18820 REAL_VALUE_TYPE TWO16r;
18821 enum machine_mode intmode = GET_MODE (val);
18822 enum machine_mode fltmode = GET_MODE (target);
18823 rtx (*cvt) (rtx, rtx);
18825 if (intmode == V4SImode)
18826 cvt = gen_floatv4siv4sf2;
18827 else
18828 cvt = gen_floatv8siv8sf2;
18829 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18830 tmp[0] = force_reg (intmode, tmp[0]);
18831 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18832 OPTAB_DIRECT);
18833 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18834 NULL_RTX, 1, OPTAB_DIRECT);
18835 tmp[3] = gen_reg_rtx (fltmode);
18836 emit_insn (cvt (tmp[3], tmp[1]));
18837 tmp[4] = gen_reg_rtx (fltmode);
18838 emit_insn (cvt (tmp[4], tmp[2]));
18839 real_ldexp (&TWO16r, &dconst1, 16);
18840 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18841 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18842 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18843 OPTAB_DIRECT);
18844 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18845 OPTAB_DIRECT);
18846 if (tmp[7] != target)
18847 emit_move_insn (target, tmp[7]);
18850 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18851 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18852 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18853 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18856 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18858 REAL_VALUE_TYPE TWO31r;
18859 rtx two31r, tmp[4];
18860 enum machine_mode mode = GET_MODE (val);
18861 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18862 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18863 rtx (*cmp) (rtx, rtx, rtx, rtx);
18864 int i;
18866 for (i = 0; i < 3; i++)
18867 tmp[i] = gen_reg_rtx (mode);
18868 real_ldexp (&TWO31r, &dconst1, 31);
18869 two31r = const_double_from_real_value (TWO31r, scalarmode);
18870 two31r = ix86_build_const_vector (mode, 1, two31r);
18871 two31r = force_reg (mode, two31r);
18872 switch (mode)
18874 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18875 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18876 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18877 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18878 default: gcc_unreachable ();
18880 tmp[3] = gen_rtx_LE (mode, two31r, val);
18881 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18882 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18883 0, OPTAB_DIRECT);
18884 if (intmode == V4SImode || TARGET_AVX2)
18885 *xorp = expand_simple_binop (intmode, ASHIFT,
18886 gen_lowpart (intmode, tmp[0]),
18887 GEN_INT (31), NULL_RTX, 0,
18888 OPTAB_DIRECT);
18889 else
18891 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18892 two31 = ix86_build_const_vector (intmode, 1, two31);
18893 *xorp = expand_simple_binop (intmode, AND,
18894 gen_lowpart (intmode, tmp[0]),
18895 two31, NULL_RTX, 0,
18896 OPTAB_DIRECT);
18898 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18899 0, OPTAB_DIRECT);
18902 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18903 then replicate the value for all elements of the vector
18904 register. */
18907 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18909 int i, n_elt;
18910 rtvec v;
18911 enum machine_mode scalar_mode;
18913 switch (mode)
18915 case V64QImode:
18916 case V32QImode:
18917 case V16QImode:
18918 case V32HImode:
18919 case V16HImode:
18920 case V8HImode:
18921 case V16SImode:
18922 case V8SImode:
18923 case V4SImode:
18924 case V8DImode:
18925 case V4DImode:
18926 case V2DImode:
18927 gcc_assert (vect);
18928 case V16SFmode:
18929 case V8SFmode:
18930 case V4SFmode:
18931 case V8DFmode:
18932 case V4DFmode:
18933 case V2DFmode:
18934 n_elt = GET_MODE_NUNITS (mode);
18935 v = rtvec_alloc (n_elt);
18936 scalar_mode = GET_MODE_INNER (mode);
18938 RTVEC_ELT (v, 0) = value;
18940 for (i = 1; i < n_elt; ++i)
18941 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18943 return gen_rtx_CONST_VECTOR (mode, v);
18945 default:
18946 gcc_unreachable ();
18950 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18951 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18952 for an SSE register. If VECT is true, then replicate the mask for
18953 all elements of the vector register. If INVERT is true, then create
18954 a mask excluding the sign bit. */
18957 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18959 enum machine_mode vec_mode, imode;
18960 HOST_WIDE_INT hi, lo;
18961 int shift = 63;
18962 rtx v;
18963 rtx mask;
18965 /* Find the sign bit, sign extended to 2*HWI. */
18966 switch (mode)
18968 case V16SImode:
18969 case V16SFmode:
18970 case V8SImode:
18971 case V4SImode:
18972 case V8SFmode:
18973 case V4SFmode:
18974 vec_mode = mode;
18975 mode = GET_MODE_INNER (mode);
18976 imode = SImode;
18977 lo = 0x80000000, hi = lo < 0;
18978 break;
18980 case V8DImode:
18981 case V4DImode:
18982 case V2DImode:
18983 case V8DFmode:
18984 case V4DFmode:
18985 case V2DFmode:
18986 vec_mode = mode;
18987 mode = GET_MODE_INNER (mode);
18988 imode = DImode;
18989 if (HOST_BITS_PER_WIDE_INT >= 64)
18990 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18991 else
18992 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18993 break;
18995 case TImode:
18996 case TFmode:
18997 vec_mode = VOIDmode;
18998 if (HOST_BITS_PER_WIDE_INT >= 64)
19000 imode = TImode;
19001 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19003 else
19005 rtvec vec;
19007 imode = DImode;
19008 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19010 if (invert)
19012 lo = ~lo, hi = ~hi;
19013 v = constm1_rtx;
19015 else
19016 v = const0_rtx;
19018 mask = immed_double_const (lo, hi, imode);
19020 vec = gen_rtvec (2, v, mask);
19021 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19022 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19024 return v;
19026 break;
19028 default:
19029 gcc_unreachable ();
19032 if (invert)
19033 lo = ~lo, hi = ~hi;
19035 /* Force this value into the low part of a fp vector constant. */
19036 mask = immed_double_const (lo, hi, imode);
19037 mask = gen_lowpart (mode, mask);
19039 if (vec_mode == VOIDmode)
19040 return force_reg (mode, mask);
19042 v = ix86_build_const_vector (vec_mode, vect, mask);
19043 return force_reg (vec_mode, v);
19046 /* Generate code for floating point ABS or NEG. */
19048 void
19049 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19050 rtx operands[])
19052 rtx mask, set, dst, src;
19053 bool use_sse = false;
19054 bool vector_mode = VECTOR_MODE_P (mode);
19055 enum machine_mode vmode = mode;
19057 if (vector_mode)
19058 use_sse = true;
19059 else if (mode == TFmode)
19060 use_sse = true;
19061 else if (TARGET_SSE_MATH)
19063 use_sse = SSE_FLOAT_MODE_P (mode);
19064 if (mode == SFmode)
19065 vmode = V4SFmode;
19066 else if (mode == DFmode)
19067 vmode = V2DFmode;
19070 /* NEG and ABS performed with SSE use bitwise mask operations.
19071 Create the appropriate mask now. */
19072 if (use_sse)
19073 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19074 else
19075 mask = NULL_RTX;
19077 dst = operands[0];
19078 src = operands[1];
19080 set = gen_rtx_fmt_e (code, mode, src);
19081 set = gen_rtx_SET (VOIDmode, dst, set);
19083 if (mask)
19085 rtx use, clob;
19086 rtvec par;
19088 use = gen_rtx_USE (VOIDmode, mask);
19089 if (vector_mode)
19090 par = gen_rtvec (2, set, use);
19091 else
19093 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19094 par = gen_rtvec (3, set, use, clob);
19096 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19098 else
19099 emit_insn (set);
19102 /* Expand a copysign operation. Special case operand 0 being a constant. */
19104 void
19105 ix86_expand_copysign (rtx operands[])
19107 enum machine_mode mode, vmode;
19108 rtx dest, op0, op1, mask, nmask;
19110 dest = operands[0];
19111 op0 = operands[1];
19112 op1 = operands[2];
19114 mode = GET_MODE (dest);
19116 if (mode == SFmode)
19117 vmode = V4SFmode;
19118 else if (mode == DFmode)
19119 vmode = V2DFmode;
19120 else
19121 vmode = mode;
19123 if (GET_CODE (op0) == CONST_DOUBLE)
19125 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19127 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19128 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19130 if (mode == SFmode || mode == DFmode)
19132 if (op0 == CONST0_RTX (mode))
19133 op0 = CONST0_RTX (vmode);
19134 else
19136 rtx v = ix86_build_const_vector (vmode, false, op0);
19138 op0 = force_reg (vmode, v);
19141 else if (op0 != CONST0_RTX (mode))
19142 op0 = force_reg (mode, op0);
19144 mask = ix86_build_signbit_mask (vmode, 0, 0);
19146 if (mode == SFmode)
19147 copysign_insn = gen_copysignsf3_const;
19148 else if (mode == DFmode)
19149 copysign_insn = gen_copysigndf3_const;
19150 else
19151 copysign_insn = gen_copysigntf3_const;
19153 emit_insn (copysign_insn (dest, op0, op1, mask));
19155 else
19157 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19159 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19160 mask = ix86_build_signbit_mask (vmode, 0, 0);
19162 if (mode == SFmode)
19163 copysign_insn = gen_copysignsf3_var;
19164 else if (mode == DFmode)
19165 copysign_insn = gen_copysigndf3_var;
19166 else
19167 copysign_insn = gen_copysigntf3_var;
19169 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19173 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19174 be a constant, and so has already been expanded into a vector constant. */
19176 void
19177 ix86_split_copysign_const (rtx operands[])
19179 enum machine_mode mode, vmode;
19180 rtx dest, op0, mask, x;
19182 dest = operands[0];
19183 op0 = operands[1];
19184 mask = operands[3];
19186 mode = GET_MODE (dest);
19187 vmode = GET_MODE (mask);
19189 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19190 x = gen_rtx_AND (vmode, dest, mask);
19191 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19193 if (op0 != CONST0_RTX (vmode))
19195 x = gen_rtx_IOR (vmode, dest, op0);
19196 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19200 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19201 so we have to do two masks. */
19203 void
19204 ix86_split_copysign_var (rtx operands[])
19206 enum machine_mode mode, vmode;
19207 rtx dest, scratch, op0, op1, mask, nmask, x;
19209 dest = operands[0];
19210 scratch = operands[1];
19211 op0 = operands[2];
19212 op1 = operands[3];
19213 nmask = operands[4];
19214 mask = operands[5];
19216 mode = GET_MODE (dest);
19217 vmode = GET_MODE (mask);
19219 if (rtx_equal_p (op0, op1))
19221 /* Shouldn't happen often (it's useless, obviously), but when it does
19222 we'd generate incorrect code if we continue below. */
19223 emit_move_insn (dest, op0);
19224 return;
19227 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19229 gcc_assert (REGNO (op1) == REGNO (scratch));
19231 x = gen_rtx_AND (vmode, scratch, mask);
19232 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19234 dest = mask;
19235 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19236 x = gen_rtx_NOT (vmode, dest);
19237 x = gen_rtx_AND (vmode, x, op0);
19238 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19240 else
19242 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19244 x = gen_rtx_AND (vmode, scratch, mask);
19246 else /* alternative 2,4 */
19248 gcc_assert (REGNO (mask) == REGNO (scratch));
19249 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19250 x = gen_rtx_AND (vmode, scratch, op1);
19252 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19254 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19256 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19257 x = gen_rtx_AND (vmode, dest, nmask);
19259 else /* alternative 3,4 */
19261 gcc_assert (REGNO (nmask) == REGNO (dest));
19262 dest = nmask;
19263 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19264 x = gen_rtx_AND (vmode, dest, op0);
19266 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19269 x = gen_rtx_IOR (vmode, dest, scratch);
19270 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19273 /* Return TRUE or FALSE depending on whether the first SET in INSN
19274 has source and destination with matching CC modes, and that the
19275 CC mode is at least as constrained as REQ_MODE. */
19277 bool
19278 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19280 rtx set;
19281 enum machine_mode set_mode;
19283 set = PATTERN (insn);
19284 if (GET_CODE (set) == PARALLEL)
19285 set = XVECEXP (set, 0, 0);
19286 gcc_assert (GET_CODE (set) == SET);
19287 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19289 set_mode = GET_MODE (SET_DEST (set));
19290 switch (set_mode)
19292 case CCNOmode:
19293 if (req_mode != CCNOmode
19294 && (req_mode != CCmode
19295 || XEXP (SET_SRC (set), 1) != const0_rtx))
19296 return false;
19297 break;
19298 case CCmode:
19299 if (req_mode == CCGCmode)
19300 return false;
19301 /* FALLTHRU */
19302 case CCGCmode:
19303 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19304 return false;
19305 /* FALLTHRU */
19306 case CCGOCmode:
19307 if (req_mode == CCZmode)
19308 return false;
19309 /* FALLTHRU */
19310 case CCZmode:
19311 break;
19313 case CCAmode:
19314 case CCCmode:
19315 case CCOmode:
19316 case CCSmode:
19317 if (set_mode != req_mode)
19318 return false;
19319 break;
19321 default:
19322 gcc_unreachable ();
19325 return GET_MODE (SET_SRC (set)) == set_mode;
19328 /* Generate insn patterns to do an integer compare of OPERANDS. */
19330 static rtx
19331 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19333 enum machine_mode cmpmode;
19334 rtx tmp, flags;
19336 cmpmode = SELECT_CC_MODE (code, op0, op1);
19337 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19339 /* This is very simple, but making the interface the same as in the
19340 FP case makes the rest of the code easier. */
19341 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19342 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19344 /* Return the test that should be put into the flags user, i.e.
19345 the bcc, scc, or cmov instruction. */
19346 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19349 /* Figure out whether to use ordered or unordered fp comparisons.
19350 Return the appropriate mode to use. */
19352 enum machine_mode
19353 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19355 /* ??? In order to make all comparisons reversible, we do all comparisons
19356 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19357 all forms trapping and nontrapping comparisons, we can make inequality
19358 comparisons trapping again, since it results in better code when using
19359 FCOM based compares. */
19360 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19363 enum machine_mode
19364 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19366 enum machine_mode mode = GET_MODE (op0);
19368 if (SCALAR_FLOAT_MODE_P (mode))
19370 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19371 return ix86_fp_compare_mode (code);
19374 switch (code)
19376 /* Only zero flag is needed. */
19377 case EQ: /* ZF=0 */
19378 case NE: /* ZF!=0 */
19379 return CCZmode;
19380 /* Codes needing carry flag. */
19381 case GEU: /* CF=0 */
19382 case LTU: /* CF=1 */
19383 /* Detect overflow checks. They need just the carry flag. */
19384 if (GET_CODE (op0) == PLUS
19385 && rtx_equal_p (op1, XEXP (op0, 0)))
19386 return CCCmode;
19387 else
19388 return CCmode;
19389 case GTU: /* CF=0 & ZF=0 */
19390 case LEU: /* CF=1 | ZF=1 */
19391 return CCmode;
19392 /* Codes possibly doable only with sign flag when
19393 comparing against zero. */
19394 case GE: /* SF=OF or SF=0 */
19395 case LT: /* SF<>OF or SF=1 */
19396 if (op1 == const0_rtx)
19397 return CCGOCmode;
19398 else
19399 /* For other cases Carry flag is not required. */
19400 return CCGCmode;
19401 /* Codes doable only with sign flag when comparing
19402 against zero, but we miss jump instruction for it
19403 so we need to use relational tests against overflow
19404 that thus needs to be zero. */
19405 case GT: /* ZF=0 & SF=OF */
19406 case LE: /* ZF=1 | SF<>OF */
19407 if (op1 == const0_rtx)
19408 return CCNOmode;
19409 else
19410 return CCGCmode;
19411 /* strcmp pattern do (use flags) and combine may ask us for proper
19412 mode. */
19413 case USE:
19414 return CCmode;
19415 default:
19416 gcc_unreachable ();
19420 /* Return the fixed registers used for condition codes. */
19422 static bool
19423 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19425 *p1 = FLAGS_REG;
19426 *p2 = FPSR_REG;
19427 return true;
19430 /* If two condition code modes are compatible, return a condition code
19431 mode which is compatible with both. Otherwise, return
19432 VOIDmode. */
19434 static enum machine_mode
19435 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19437 if (m1 == m2)
19438 return m1;
19440 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19441 return VOIDmode;
19443 if ((m1 == CCGCmode && m2 == CCGOCmode)
19444 || (m1 == CCGOCmode && m2 == CCGCmode))
19445 return CCGCmode;
19447 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19448 return m2;
19449 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19450 return m1;
19452 switch (m1)
19454 default:
19455 gcc_unreachable ();
19457 case CCmode:
19458 case CCGCmode:
19459 case CCGOCmode:
19460 case CCNOmode:
19461 case CCAmode:
19462 case CCCmode:
19463 case CCOmode:
19464 case CCSmode:
19465 case CCZmode:
19466 switch (m2)
19468 default:
19469 return VOIDmode;
19471 case CCmode:
19472 case CCGCmode:
19473 case CCGOCmode:
19474 case CCNOmode:
19475 case CCAmode:
19476 case CCCmode:
19477 case CCOmode:
19478 case CCSmode:
19479 case CCZmode:
19480 return CCmode;
19483 case CCFPmode:
19484 case CCFPUmode:
19485 /* These are only compatible with themselves, which we already
19486 checked above. */
19487 return VOIDmode;
19492 /* Return a comparison we can do and that it is equivalent to
19493 swap_condition (code) apart possibly from orderedness.
19494 But, never change orderedness if TARGET_IEEE_FP, returning
19495 UNKNOWN in that case if necessary. */
19497 static enum rtx_code
19498 ix86_fp_swap_condition (enum rtx_code code)
19500 switch (code)
19502 case GT: /* GTU - CF=0 & ZF=0 */
19503 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19504 case GE: /* GEU - CF=0 */
19505 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19506 case UNLT: /* LTU - CF=1 */
19507 return TARGET_IEEE_FP ? UNKNOWN : GT;
19508 case UNLE: /* LEU - CF=1 | ZF=1 */
19509 return TARGET_IEEE_FP ? UNKNOWN : GE;
19510 default:
19511 return swap_condition (code);
19515 /* Return cost of comparison CODE using the best strategy for performance.
19516 All following functions do use number of instructions as a cost metrics.
19517 In future this should be tweaked to compute bytes for optimize_size and
19518 take into account performance of various instructions on various CPUs. */
19520 static int
19521 ix86_fp_comparison_cost (enum rtx_code code)
19523 int arith_cost;
19525 /* The cost of code using bit-twiddling on %ah. */
19526 switch (code)
19528 case UNLE:
19529 case UNLT:
19530 case LTGT:
19531 case GT:
19532 case GE:
19533 case UNORDERED:
19534 case ORDERED:
19535 case UNEQ:
19536 arith_cost = 4;
19537 break;
19538 case LT:
19539 case NE:
19540 case EQ:
19541 case UNGE:
19542 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19543 break;
19544 case LE:
19545 case UNGT:
19546 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19547 break;
19548 default:
19549 gcc_unreachable ();
19552 switch (ix86_fp_comparison_strategy (code))
19554 case IX86_FPCMP_COMI:
19555 return arith_cost > 4 ? 3 : 2;
19556 case IX86_FPCMP_SAHF:
19557 return arith_cost > 4 ? 4 : 3;
19558 default:
19559 return arith_cost;
19563 /* Return strategy to use for floating-point. We assume that fcomi is always
19564 preferrable where available, since that is also true when looking at size
19565 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19567 enum ix86_fpcmp_strategy
19568 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19570 /* Do fcomi/sahf based test when profitable. */
19572 if (TARGET_CMOVE)
19573 return IX86_FPCMP_COMI;
19575 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19576 return IX86_FPCMP_SAHF;
19578 return IX86_FPCMP_ARITH;
19581 /* Swap, force into registers, or otherwise massage the two operands
19582 to a fp comparison. The operands are updated in place; the new
19583 comparison code is returned. */
19585 static enum rtx_code
19586 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19588 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19589 rtx op0 = *pop0, op1 = *pop1;
19590 enum machine_mode op_mode = GET_MODE (op0);
19591 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19593 /* All of the unordered compare instructions only work on registers.
19594 The same is true of the fcomi compare instructions. The XFmode
19595 compare instructions require registers except when comparing
19596 against zero or when converting operand 1 from fixed point to
19597 floating point. */
19599 if (!is_sse
19600 && (fpcmp_mode == CCFPUmode
19601 || (op_mode == XFmode
19602 && ! (standard_80387_constant_p (op0) == 1
19603 || standard_80387_constant_p (op1) == 1)
19604 && GET_CODE (op1) != FLOAT)
19605 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19607 op0 = force_reg (op_mode, op0);
19608 op1 = force_reg (op_mode, op1);
19610 else
19612 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19613 things around if they appear profitable, otherwise force op0
19614 into a register. */
19616 if (standard_80387_constant_p (op0) == 0
19617 || (MEM_P (op0)
19618 && ! (standard_80387_constant_p (op1) == 0
19619 || MEM_P (op1))))
19621 enum rtx_code new_code = ix86_fp_swap_condition (code);
19622 if (new_code != UNKNOWN)
19624 rtx tmp;
19625 tmp = op0, op0 = op1, op1 = tmp;
19626 code = new_code;
19630 if (!REG_P (op0))
19631 op0 = force_reg (op_mode, op0);
19633 if (CONSTANT_P (op1))
19635 int tmp = standard_80387_constant_p (op1);
19636 if (tmp == 0)
19637 op1 = validize_mem (force_const_mem (op_mode, op1));
19638 else if (tmp == 1)
19640 if (TARGET_CMOVE)
19641 op1 = force_reg (op_mode, op1);
19643 else
19644 op1 = force_reg (op_mode, op1);
19648 /* Try to rearrange the comparison to make it cheaper. */
19649 if (ix86_fp_comparison_cost (code)
19650 > ix86_fp_comparison_cost (swap_condition (code))
19651 && (REG_P (op1) || can_create_pseudo_p ()))
19653 rtx tmp;
19654 tmp = op0, op0 = op1, op1 = tmp;
19655 code = swap_condition (code);
19656 if (!REG_P (op0))
19657 op0 = force_reg (op_mode, op0);
19660 *pop0 = op0;
19661 *pop1 = op1;
19662 return code;
19665 /* Convert comparison codes we use to represent FP comparison to integer
19666 code that will result in proper branch. Return UNKNOWN if no such code
19667 is available. */
19669 enum rtx_code
19670 ix86_fp_compare_code_to_integer (enum rtx_code code)
19672 switch (code)
19674 case GT:
19675 return GTU;
19676 case GE:
19677 return GEU;
19678 case ORDERED:
19679 case UNORDERED:
19680 return code;
19681 break;
19682 case UNEQ:
19683 return EQ;
19684 break;
19685 case UNLT:
19686 return LTU;
19687 break;
19688 case UNLE:
19689 return LEU;
19690 break;
19691 case LTGT:
19692 return NE;
19693 break;
19694 default:
19695 return UNKNOWN;
19699 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19701 static rtx
19702 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19704 enum machine_mode fpcmp_mode, intcmp_mode;
19705 rtx tmp, tmp2;
19707 fpcmp_mode = ix86_fp_compare_mode (code);
19708 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19710 /* Do fcomi/sahf based test when profitable. */
19711 switch (ix86_fp_comparison_strategy (code))
19713 case IX86_FPCMP_COMI:
19714 intcmp_mode = fpcmp_mode;
19715 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19716 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19717 tmp);
19718 emit_insn (tmp);
19719 break;
19721 case IX86_FPCMP_SAHF:
19722 intcmp_mode = fpcmp_mode;
19723 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19724 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19725 tmp);
19727 if (!scratch)
19728 scratch = gen_reg_rtx (HImode);
19729 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19730 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19731 break;
19733 case IX86_FPCMP_ARITH:
19734 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19735 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19736 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19737 if (!scratch)
19738 scratch = gen_reg_rtx (HImode);
19739 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19741 /* In the unordered case, we have to check C2 for NaN's, which
19742 doesn't happen to work out to anything nice combination-wise.
19743 So do some bit twiddling on the value we've got in AH to come
19744 up with an appropriate set of condition codes. */
19746 intcmp_mode = CCNOmode;
19747 switch (code)
19749 case GT:
19750 case UNGT:
19751 if (code == GT || !TARGET_IEEE_FP)
19753 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19754 code = EQ;
19756 else
19758 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19759 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19760 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19761 intcmp_mode = CCmode;
19762 code = GEU;
19764 break;
19765 case LT:
19766 case UNLT:
19767 if (code == LT && TARGET_IEEE_FP)
19769 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19770 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19771 intcmp_mode = CCmode;
19772 code = EQ;
19774 else
19776 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19777 code = NE;
19779 break;
19780 case GE:
19781 case UNGE:
19782 if (code == GE || !TARGET_IEEE_FP)
19784 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19785 code = EQ;
19787 else
19789 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19790 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19791 code = NE;
19793 break;
19794 case LE:
19795 case UNLE:
19796 if (code == LE && TARGET_IEEE_FP)
19798 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19799 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19800 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19801 intcmp_mode = CCmode;
19802 code = LTU;
19804 else
19806 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19807 code = NE;
19809 break;
19810 case EQ:
19811 case UNEQ:
19812 if (code == EQ && TARGET_IEEE_FP)
19814 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19815 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19816 intcmp_mode = CCmode;
19817 code = EQ;
19819 else
19821 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19822 code = NE;
19824 break;
19825 case NE:
19826 case LTGT:
19827 if (code == NE && TARGET_IEEE_FP)
19829 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19830 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19831 GEN_INT (0x40)));
19832 code = NE;
19834 else
19836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19837 code = EQ;
19839 break;
19841 case UNORDERED:
19842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19843 code = NE;
19844 break;
19845 case ORDERED:
19846 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19847 code = EQ;
19848 break;
19850 default:
19851 gcc_unreachable ();
19853 break;
19855 default:
19856 gcc_unreachable();
19859 /* Return the test that should be put into the flags user, i.e.
19860 the bcc, scc, or cmov instruction. */
19861 return gen_rtx_fmt_ee (code, VOIDmode,
19862 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19863 const0_rtx);
19866 static rtx
19867 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19869 rtx ret;
19871 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19872 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19874 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19876 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19877 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19879 else
19880 ret = ix86_expand_int_compare (code, op0, op1);
19882 return ret;
19885 void
19886 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19888 enum machine_mode mode = GET_MODE (op0);
19889 rtx tmp;
19891 switch (mode)
19893 case SFmode:
19894 case DFmode:
19895 case XFmode:
19896 case QImode:
19897 case HImode:
19898 case SImode:
19899 simple:
19900 tmp = ix86_expand_compare (code, op0, op1);
19901 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19902 gen_rtx_LABEL_REF (VOIDmode, label),
19903 pc_rtx);
19904 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19905 return;
19907 case DImode:
19908 if (TARGET_64BIT)
19909 goto simple;
19910 case TImode:
19911 /* Expand DImode branch into multiple compare+branch. */
19913 rtx lo[2], hi[2], label2;
19914 enum rtx_code code1, code2, code3;
19915 enum machine_mode submode;
19917 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19919 tmp = op0, op0 = op1, op1 = tmp;
19920 code = swap_condition (code);
19923 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19924 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19926 submode = mode == DImode ? SImode : DImode;
19928 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19929 avoid two branches. This costs one extra insn, so disable when
19930 optimizing for size. */
19932 if ((code == EQ || code == NE)
19933 && (!optimize_insn_for_size_p ()
19934 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19936 rtx xor0, xor1;
19938 xor1 = hi[0];
19939 if (hi[1] != const0_rtx)
19940 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19941 NULL_RTX, 0, OPTAB_WIDEN);
19943 xor0 = lo[0];
19944 if (lo[1] != const0_rtx)
19945 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19946 NULL_RTX, 0, OPTAB_WIDEN);
19948 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19949 NULL_RTX, 0, OPTAB_WIDEN);
19951 ix86_expand_branch (code, tmp, const0_rtx, label);
19952 return;
19955 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19956 op1 is a constant and the low word is zero, then we can just
19957 examine the high word. Similarly for low word -1 and
19958 less-or-equal-than or greater-than. */
19960 if (CONST_INT_P (hi[1]))
19961 switch (code)
19963 case LT: case LTU: case GE: case GEU:
19964 if (lo[1] == const0_rtx)
19966 ix86_expand_branch (code, hi[0], hi[1], label);
19967 return;
19969 break;
19970 case LE: case LEU: case GT: case GTU:
19971 if (lo[1] == constm1_rtx)
19973 ix86_expand_branch (code, hi[0], hi[1], label);
19974 return;
19976 break;
19977 default:
19978 break;
19981 /* Otherwise, we need two or three jumps. */
19983 label2 = gen_label_rtx ();
19985 code1 = code;
19986 code2 = swap_condition (code);
19987 code3 = unsigned_condition (code);
19989 switch (code)
19991 case LT: case GT: case LTU: case GTU:
19992 break;
19994 case LE: code1 = LT; code2 = GT; break;
19995 case GE: code1 = GT; code2 = LT; break;
19996 case LEU: code1 = LTU; code2 = GTU; break;
19997 case GEU: code1 = GTU; code2 = LTU; break;
19999 case EQ: code1 = UNKNOWN; code2 = NE; break;
20000 case NE: code2 = UNKNOWN; break;
20002 default:
20003 gcc_unreachable ();
20007 * a < b =>
20008 * if (hi(a) < hi(b)) goto true;
20009 * if (hi(a) > hi(b)) goto false;
20010 * if (lo(a) < lo(b)) goto true;
20011 * false:
20014 if (code1 != UNKNOWN)
20015 ix86_expand_branch (code1, hi[0], hi[1], label);
20016 if (code2 != UNKNOWN)
20017 ix86_expand_branch (code2, hi[0], hi[1], label2);
20019 ix86_expand_branch (code3, lo[0], lo[1], label);
20021 if (code2 != UNKNOWN)
20022 emit_label (label2);
20023 return;
20026 default:
20027 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20028 goto simple;
20032 /* Split branch based on floating point condition. */
20033 void
20034 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20035 rtx target1, rtx target2, rtx tmp)
20037 rtx condition;
20038 rtx i;
20040 if (target2 != pc_rtx)
20042 rtx tmp = target2;
20043 code = reverse_condition_maybe_unordered (code);
20044 target2 = target1;
20045 target1 = tmp;
20048 condition = ix86_expand_fp_compare (code, op1, op2,
20049 tmp);
20051 i = emit_jump_insn (gen_rtx_SET
20052 (VOIDmode, pc_rtx,
20053 gen_rtx_IF_THEN_ELSE (VOIDmode,
20054 condition, target1, target2)));
20055 if (split_branch_probability >= 0)
20056 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20059 void
20060 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20062 rtx ret;
20064 gcc_assert (GET_MODE (dest) == QImode);
20066 ret = ix86_expand_compare (code, op0, op1);
20067 PUT_MODE (ret, QImode);
20068 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20071 /* Expand comparison setting or clearing carry flag. Return true when
20072 successful and set pop for the operation. */
20073 static bool
20074 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20076 enum machine_mode mode =
20077 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20079 /* Do not handle double-mode compares that go through special path. */
20080 if (mode == (TARGET_64BIT ? TImode : DImode))
20081 return false;
20083 if (SCALAR_FLOAT_MODE_P (mode))
20085 rtx compare_op, compare_seq;
20087 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20089 /* Shortcut: following common codes never translate
20090 into carry flag compares. */
20091 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20092 || code == ORDERED || code == UNORDERED)
20093 return false;
20095 /* These comparisons require zero flag; swap operands so they won't. */
20096 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20097 && !TARGET_IEEE_FP)
20099 rtx tmp = op0;
20100 op0 = op1;
20101 op1 = tmp;
20102 code = swap_condition (code);
20105 /* Try to expand the comparison and verify that we end up with
20106 carry flag based comparison. This fails to be true only when
20107 we decide to expand comparison using arithmetic that is not
20108 too common scenario. */
20109 start_sequence ();
20110 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20111 compare_seq = get_insns ();
20112 end_sequence ();
20114 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20115 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20116 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20117 else
20118 code = GET_CODE (compare_op);
20120 if (code != LTU && code != GEU)
20121 return false;
20123 emit_insn (compare_seq);
20124 *pop = compare_op;
20125 return true;
20128 if (!INTEGRAL_MODE_P (mode))
20129 return false;
20131 switch (code)
20133 case LTU:
20134 case GEU:
20135 break;
20137 /* Convert a==0 into (unsigned)a<1. */
20138 case EQ:
20139 case NE:
20140 if (op1 != const0_rtx)
20141 return false;
20142 op1 = const1_rtx;
20143 code = (code == EQ ? LTU : GEU);
20144 break;
20146 /* Convert a>b into b<a or a>=b-1. */
20147 case GTU:
20148 case LEU:
20149 if (CONST_INT_P (op1))
20151 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20152 /* Bail out on overflow. We still can swap operands but that
20153 would force loading of the constant into register. */
20154 if (op1 == const0_rtx
20155 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20156 return false;
20157 code = (code == GTU ? GEU : LTU);
20159 else
20161 rtx tmp = op1;
20162 op1 = op0;
20163 op0 = tmp;
20164 code = (code == GTU ? LTU : GEU);
20166 break;
20168 /* Convert a>=0 into (unsigned)a<0x80000000. */
20169 case LT:
20170 case GE:
20171 if (mode == DImode || op1 != const0_rtx)
20172 return false;
20173 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20174 code = (code == LT ? GEU : LTU);
20175 break;
20176 case LE:
20177 case GT:
20178 if (mode == DImode || op1 != constm1_rtx)
20179 return false;
20180 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20181 code = (code == LE ? GEU : LTU);
20182 break;
20184 default:
20185 return false;
20187 /* Swapping operands may cause constant to appear as first operand. */
20188 if (!nonimmediate_operand (op0, VOIDmode))
20190 if (!can_create_pseudo_p ())
20191 return false;
20192 op0 = force_reg (mode, op0);
20194 *pop = ix86_expand_compare (code, op0, op1);
20195 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20196 return true;
20199 bool
20200 ix86_expand_int_movcc (rtx operands[])
20202 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20203 rtx compare_seq, compare_op;
20204 enum machine_mode mode = GET_MODE (operands[0]);
20205 bool sign_bit_compare_p = false;
20206 rtx op0 = XEXP (operands[1], 0);
20207 rtx op1 = XEXP (operands[1], 1);
20209 if (GET_MODE (op0) == TImode
20210 || (GET_MODE (op0) == DImode
20211 && !TARGET_64BIT))
20212 return false;
20214 start_sequence ();
20215 compare_op = ix86_expand_compare (code, op0, op1);
20216 compare_seq = get_insns ();
20217 end_sequence ();
20219 compare_code = GET_CODE (compare_op);
20221 if ((op1 == const0_rtx && (code == GE || code == LT))
20222 || (op1 == constm1_rtx && (code == GT || code == LE)))
20223 sign_bit_compare_p = true;
20225 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20226 HImode insns, we'd be swallowed in word prefix ops. */
20228 if ((mode != HImode || TARGET_FAST_PREFIX)
20229 && (mode != (TARGET_64BIT ? TImode : DImode))
20230 && CONST_INT_P (operands[2])
20231 && CONST_INT_P (operands[3]))
20233 rtx out = operands[0];
20234 HOST_WIDE_INT ct = INTVAL (operands[2]);
20235 HOST_WIDE_INT cf = INTVAL (operands[3]);
20236 HOST_WIDE_INT diff;
20238 diff = ct - cf;
20239 /* Sign bit compares are better done using shifts than we do by using
20240 sbb. */
20241 if (sign_bit_compare_p
20242 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20244 /* Detect overlap between destination and compare sources. */
20245 rtx tmp = out;
20247 if (!sign_bit_compare_p)
20249 rtx flags;
20250 bool fpcmp = false;
20252 compare_code = GET_CODE (compare_op);
20254 flags = XEXP (compare_op, 0);
20256 if (GET_MODE (flags) == CCFPmode
20257 || GET_MODE (flags) == CCFPUmode)
20259 fpcmp = true;
20260 compare_code
20261 = ix86_fp_compare_code_to_integer (compare_code);
20264 /* To simplify rest of code, restrict to the GEU case. */
20265 if (compare_code == LTU)
20267 HOST_WIDE_INT tmp = ct;
20268 ct = cf;
20269 cf = tmp;
20270 compare_code = reverse_condition (compare_code);
20271 code = reverse_condition (code);
20273 else
20275 if (fpcmp)
20276 PUT_CODE (compare_op,
20277 reverse_condition_maybe_unordered
20278 (GET_CODE (compare_op)));
20279 else
20280 PUT_CODE (compare_op,
20281 reverse_condition (GET_CODE (compare_op)));
20283 diff = ct - cf;
20285 if (reg_overlap_mentioned_p (out, op0)
20286 || reg_overlap_mentioned_p (out, op1))
20287 tmp = gen_reg_rtx (mode);
20289 if (mode == DImode)
20290 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20291 else
20292 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20293 flags, compare_op));
20295 else
20297 if (code == GT || code == GE)
20298 code = reverse_condition (code);
20299 else
20301 HOST_WIDE_INT tmp = ct;
20302 ct = cf;
20303 cf = tmp;
20304 diff = ct - cf;
20306 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20309 if (diff == 1)
20312 * cmpl op0,op1
20313 * sbbl dest,dest
20314 * [addl dest, ct]
20316 * Size 5 - 8.
20318 if (ct)
20319 tmp = expand_simple_binop (mode, PLUS,
20320 tmp, GEN_INT (ct),
20321 copy_rtx (tmp), 1, OPTAB_DIRECT);
20323 else if (cf == -1)
20326 * cmpl op0,op1
20327 * sbbl dest,dest
20328 * orl $ct, dest
20330 * Size 8.
20332 tmp = expand_simple_binop (mode, IOR,
20333 tmp, GEN_INT (ct),
20334 copy_rtx (tmp), 1, OPTAB_DIRECT);
20336 else if (diff == -1 && ct)
20339 * cmpl op0,op1
20340 * sbbl dest,dest
20341 * notl dest
20342 * [addl dest, cf]
20344 * Size 8 - 11.
20346 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20347 if (cf)
20348 tmp = expand_simple_binop (mode, PLUS,
20349 copy_rtx (tmp), GEN_INT (cf),
20350 copy_rtx (tmp), 1, OPTAB_DIRECT);
20352 else
20355 * cmpl op0,op1
20356 * sbbl dest,dest
20357 * [notl dest]
20358 * andl cf - ct, dest
20359 * [addl dest, ct]
20361 * Size 8 - 11.
20364 if (cf == 0)
20366 cf = ct;
20367 ct = 0;
20368 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20371 tmp = expand_simple_binop (mode, AND,
20372 copy_rtx (tmp),
20373 gen_int_mode (cf - ct, mode),
20374 copy_rtx (tmp), 1, OPTAB_DIRECT);
20375 if (ct)
20376 tmp = expand_simple_binop (mode, PLUS,
20377 copy_rtx (tmp), GEN_INT (ct),
20378 copy_rtx (tmp), 1, OPTAB_DIRECT);
20381 if (!rtx_equal_p (tmp, out))
20382 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20384 return true;
20387 if (diff < 0)
20389 enum machine_mode cmp_mode = GET_MODE (op0);
20391 HOST_WIDE_INT tmp;
20392 tmp = ct, ct = cf, cf = tmp;
20393 diff = -diff;
20395 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20397 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20399 /* We may be reversing unordered compare to normal compare, that
20400 is not valid in general (we may convert non-trapping condition
20401 to trapping one), however on i386 we currently emit all
20402 comparisons unordered. */
20403 compare_code = reverse_condition_maybe_unordered (compare_code);
20404 code = reverse_condition_maybe_unordered (code);
20406 else
20408 compare_code = reverse_condition (compare_code);
20409 code = reverse_condition (code);
20413 compare_code = UNKNOWN;
20414 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20415 && CONST_INT_P (op1))
20417 if (op1 == const0_rtx
20418 && (code == LT || code == GE))
20419 compare_code = code;
20420 else if (op1 == constm1_rtx)
20422 if (code == LE)
20423 compare_code = LT;
20424 else if (code == GT)
20425 compare_code = GE;
20429 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20430 if (compare_code != UNKNOWN
20431 && GET_MODE (op0) == GET_MODE (out)
20432 && (cf == -1 || ct == -1))
20434 /* If lea code below could be used, only optimize
20435 if it results in a 2 insn sequence. */
20437 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20438 || diff == 3 || diff == 5 || diff == 9)
20439 || (compare_code == LT && ct == -1)
20440 || (compare_code == GE && cf == -1))
20443 * notl op1 (if necessary)
20444 * sarl $31, op1
20445 * orl cf, op1
20447 if (ct != -1)
20449 cf = ct;
20450 ct = -1;
20451 code = reverse_condition (code);
20454 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20456 out = expand_simple_binop (mode, IOR,
20457 out, GEN_INT (cf),
20458 out, 1, OPTAB_DIRECT);
20459 if (out != operands[0])
20460 emit_move_insn (operands[0], out);
20462 return true;
20467 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20468 || diff == 3 || diff == 5 || diff == 9)
20469 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20470 && (mode != DImode
20471 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20474 * xorl dest,dest
20475 * cmpl op1,op2
20476 * setcc dest
20477 * lea cf(dest*(ct-cf)),dest
20479 * Size 14.
20481 * This also catches the degenerate setcc-only case.
20484 rtx tmp;
20485 int nops;
20487 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20489 nops = 0;
20490 /* On x86_64 the lea instruction operates on Pmode, so we need
20491 to get arithmetics done in proper mode to match. */
20492 if (diff == 1)
20493 tmp = copy_rtx (out);
20494 else
20496 rtx out1;
20497 out1 = copy_rtx (out);
20498 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20499 nops++;
20500 if (diff & 1)
20502 tmp = gen_rtx_PLUS (mode, tmp, out1);
20503 nops++;
20506 if (cf != 0)
20508 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20509 nops++;
20511 if (!rtx_equal_p (tmp, out))
20513 if (nops == 1)
20514 out = force_operand (tmp, copy_rtx (out));
20515 else
20516 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20518 if (!rtx_equal_p (out, operands[0]))
20519 emit_move_insn (operands[0], copy_rtx (out));
20521 return true;
20525 * General case: Jumpful:
20526 * xorl dest,dest cmpl op1, op2
20527 * cmpl op1, op2 movl ct, dest
20528 * setcc dest jcc 1f
20529 * decl dest movl cf, dest
20530 * andl (cf-ct),dest 1:
20531 * addl ct,dest
20533 * Size 20. Size 14.
20535 * This is reasonably steep, but branch mispredict costs are
20536 * high on modern cpus, so consider failing only if optimizing
20537 * for space.
20540 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20541 && BRANCH_COST (optimize_insn_for_speed_p (),
20542 false) >= 2)
20544 if (cf == 0)
20546 enum machine_mode cmp_mode = GET_MODE (op0);
20548 cf = ct;
20549 ct = 0;
20551 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20553 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20555 /* We may be reversing unordered compare to normal compare,
20556 that is not valid in general (we may convert non-trapping
20557 condition to trapping one), however on i386 we currently
20558 emit all comparisons unordered. */
20559 code = reverse_condition_maybe_unordered (code);
20561 else
20563 code = reverse_condition (code);
20564 if (compare_code != UNKNOWN)
20565 compare_code = reverse_condition (compare_code);
20569 if (compare_code != UNKNOWN)
20571 /* notl op1 (if needed)
20572 sarl $31, op1
20573 andl (cf-ct), op1
20574 addl ct, op1
20576 For x < 0 (resp. x <= -1) there will be no notl,
20577 so if possible swap the constants to get rid of the
20578 complement.
20579 True/false will be -1/0 while code below (store flag
20580 followed by decrement) is 0/-1, so the constants need
20581 to be exchanged once more. */
20583 if (compare_code == GE || !cf)
20585 code = reverse_condition (code);
20586 compare_code = LT;
20588 else
20590 HOST_WIDE_INT tmp = cf;
20591 cf = ct;
20592 ct = tmp;
20595 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20597 else
20599 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20601 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20602 constm1_rtx,
20603 copy_rtx (out), 1, OPTAB_DIRECT);
20606 out = expand_simple_binop (mode, AND, copy_rtx (out),
20607 gen_int_mode (cf - ct, mode),
20608 copy_rtx (out), 1, OPTAB_DIRECT);
20609 if (ct)
20610 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20611 copy_rtx (out), 1, OPTAB_DIRECT);
20612 if (!rtx_equal_p (out, operands[0]))
20613 emit_move_insn (operands[0], copy_rtx (out));
20615 return true;
20619 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20621 /* Try a few things more with specific constants and a variable. */
20623 optab op;
20624 rtx var, orig_out, out, tmp;
20626 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20627 return false;
20629 /* If one of the two operands is an interesting constant, load a
20630 constant with the above and mask it in with a logical operation. */
20632 if (CONST_INT_P (operands[2]))
20634 var = operands[3];
20635 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20636 operands[3] = constm1_rtx, op = and_optab;
20637 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20638 operands[3] = const0_rtx, op = ior_optab;
20639 else
20640 return false;
20642 else if (CONST_INT_P (operands[3]))
20644 var = operands[2];
20645 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20646 operands[2] = constm1_rtx, op = and_optab;
20647 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20648 operands[2] = const0_rtx, op = ior_optab;
20649 else
20650 return false;
20652 else
20653 return false;
20655 orig_out = operands[0];
20656 tmp = gen_reg_rtx (mode);
20657 operands[0] = tmp;
20659 /* Recurse to get the constant loaded. */
20660 if (ix86_expand_int_movcc (operands) == 0)
20661 return false;
20663 /* Mask in the interesting variable. */
20664 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20665 OPTAB_WIDEN);
20666 if (!rtx_equal_p (out, orig_out))
20667 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20669 return true;
20673 * For comparison with above,
20675 * movl cf,dest
20676 * movl ct,tmp
20677 * cmpl op1,op2
20678 * cmovcc tmp,dest
20680 * Size 15.
20683 if (! nonimmediate_operand (operands[2], mode))
20684 operands[2] = force_reg (mode, operands[2]);
20685 if (! nonimmediate_operand (operands[3], mode))
20686 operands[3] = force_reg (mode, operands[3]);
20688 if (! register_operand (operands[2], VOIDmode)
20689 && (mode == QImode
20690 || ! register_operand (operands[3], VOIDmode)))
20691 operands[2] = force_reg (mode, operands[2]);
20693 if (mode == QImode
20694 && ! register_operand (operands[3], VOIDmode))
20695 operands[3] = force_reg (mode, operands[3]);
20697 emit_insn (compare_seq);
20698 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20699 gen_rtx_IF_THEN_ELSE (mode,
20700 compare_op, operands[2],
20701 operands[3])));
20702 return true;
20705 /* Swap, force into registers, or otherwise massage the two operands
20706 to an sse comparison with a mask result. Thus we differ a bit from
20707 ix86_prepare_fp_compare_args which expects to produce a flags result.
20709 The DEST operand exists to help determine whether to commute commutative
20710 operators. The POP0/POP1 operands are updated in place. The new
20711 comparison code is returned, or UNKNOWN if not implementable. */
20713 static enum rtx_code
20714 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20715 rtx *pop0, rtx *pop1)
20717 rtx tmp;
20719 switch (code)
20721 case LTGT:
20722 case UNEQ:
20723 /* AVX supports all the needed comparisons. */
20724 if (TARGET_AVX)
20725 break;
20726 /* We have no LTGT as an operator. We could implement it with
20727 NE & ORDERED, but this requires an extra temporary. It's
20728 not clear that it's worth it. */
20729 return UNKNOWN;
20731 case LT:
20732 case LE:
20733 case UNGT:
20734 case UNGE:
20735 /* These are supported directly. */
20736 break;
20738 case EQ:
20739 case NE:
20740 case UNORDERED:
20741 case ORDERED:
20742 /* AVX has 3 operand comparisons, no need to swap anything. */
20743 if (TARGET_AVX)
20744 break;
20745 /* For commutative operators, try to canonicalize the destination
20746 operand to be first in the comparison - this helps reload to
20747 avoid extra moves. */
20748 if (!dest || !rtx_equal_p (dest, *pop1))
20749 break;
20750 /* FALLTHRU */
20752 case GE:
20753 case GT:
20754 case UNLE:
20755 case UNLT:
20756 /* These are not supported directly before AVX, and furthermore
20757 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20758 comparison operands to transform into something that is
20759 supported. */
20760 tmp = *pop0;
20761 *pop0 = *pop1;
20762 *pop1 = tmp;
20763 code = swap_condition (code);
20764 break;
20766 default:
20767 gcc_unreachable ();
20770 return code;
20773 /* Detect conditional moves that exactly match min/max operational
20774 semantics. Note that this is IEEE safe, as long as we don't
20775 interchange the operands.
20777 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20778 and TRUE if the operation is successful and instructions are emitted. */
20780 static bool
20781 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20782 rtx cmp_op1, rtx if_true, rtx if_false)
20784 enum machine_mode mode;
20785 bool is_min;
20786 rtx tmp;
20788 if (code == LT)
20790 else if (code == UNGE)
20792 tmp = if_true;
20793 if_true = if_false;
20794 if_false = tmp;
20796 else
20797 return false;
20799 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20800 is_min = true;
20801 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20802 is_min = false;
20803 else
20804 return false;
20806 mode = GET_MODE (dest);
20808 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20809 but MODE may be a vector mode and thus not appropriate. */
20810 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20812 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20813 rtvec v;
20815 if_true = force_reg (mode, if_true);
20816 v = gen_rtvec (2, if_true, if_false);
20817 tmp = gen_rtx_UNSPEC (mode, v, u);
20819 else
20821 code = is_min ? SMIN : SMAX;
20822 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20825 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20826 return true;
20829 /* Expand an sse vector comparison. Return the register with the result. */
20831 static rtx
20832 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20833 rtx op_true, rtx op_false)
20835 enum machine_mode mode = GET_MODE (dest);
20836 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20838 /* In general case result of comparison can differ from operands' type. */
20839 enum machine_mode cmp_mode;
20841 /* In AVX512F the result of comparison is an integer mask. */
20842 bool maskcmp = false;
20843 rtx x;
20845 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20847 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20848 gcc_assert (cmp_mode != BLKmode);
20850 maskcmp = true;
20852 else
20853 cmp_mode = cmp_ops_mode;
20856 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20857 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20858 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20860 if (optimize
20861 || reg_overlap_mentioned_p (dest, op_true)
20862 || reg_overlap_mentioned_p (dest, op_false))
20863 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20865 /* Compare patterns for int modes are unspec in AVX512F only. */
20866 if (maskcmp && (code == GT || code == EQ))
20868 rtx (*gen)(rtx, rtx, rtx);
20870 switch (cmp_ops_mode)
20872 case V16SImode:
20873 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20874 break;
20875 case V8DImode:
20876 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20877 break;
20878 default:
20879 gen = NULL;
20882 if (gen)
20884 emit_insn (gen (dest, cmp_op0, cmp_op1));
20885 return dest;
20888 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20890 if (cmp_mode != mode && !maskcmp)
20892 x = force_reg (cmp_ops_mode, x);
20893 convert_move (dest, x, false);
20895 else
20896 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20898 return dest;
20901 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20902 operations. This is used for both scalar and vector conditional moves. */
20904 static void
20905 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20907 enum machine_mode mode = GET_MODE (dest);
20908 enum machine_mode cmpmode = GET_MODE (cmp);
20910 /* In AVX512F the result of comparison is an integer mask. */
20911 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20913 rtx t2, t3, x;
20915 if (vector_all_ones_operand (op_true, mode)
20916 && rtx_equal_p (op_false, CONST0_RTX (mode))
20917 && !maskcmp)
20919 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20921 else if (op_false == CONST0_RTX (mode)
20922 && !maskcmp)
20924 op_true = force_reg (mode, op_true);
20925 x = gen_rtx_AND (mode, cmp, op_true);
20926 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20928 else if (op_true == CONST0_RTX (mode)
20929 && !maskcmp)
20931 op_false = force_reg (mode, op_false);
20932 x = gen_rtx_NOT (mode, cmp);
20933 x = gen_rtx_AND (mode, x, op_false);
20934 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20936 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20937 && !maskcmp)
20939 op_false = force_reg (mode, op_false);
20940 x = gen_rtx_IOR (mode, cmp, op_false);
20941 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20943 else if (TARGET_XOP
20944 && !maskcmp)
20946 op_true = force_reg (mode, op_true);
20948 if (!nonimmediate_operand (op_false, mode))
20949 op_false = force_reg (mode, op_false);
20951 emit_insn (gen_rtx_SET (mode, dest,
20952 gen_rtx_IF_THEN_ELSE (mode, cmp,
20953 op_true,
20954 op_false)));
20956 else
20958 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20959 rtx d = dest;
20961 if (!nonimmediate_operand (op_true, mode))
20962 op_true = force_reg (mode, op_true);
20964 op_false = force_reg (mode, op_false);
20966 switch (mode)
20968 case V4SFmode:
20969 if (TARGET_SSE4_1)
20970 gen = gen_sse4_1_blendvps;
20971 break;
20972 case V2DFmode:
20973 if (TARGET_SSE4_1)
20974 gen = gen_sse4_1_blendvpd;
20975 break;
20976 case V16QImode:
20977 case V8HImode:
20978 case V4SImode:
20979 case V2DImode:
20980 if (TARGET_SSE4_1)
20982 gen = gen_sse4_1_pblendvb;
20983 if (mode != V16QImode)
20984 d = gen_reg_rtx (V16QImode);
20985 op_false = gen_lowpart (V16QImode, op_false);
20986 op_true = gen_lowpart (V16QImode, op_true);
20987 cmp = gen_lowpart (V16QImode, cmp);
20989 break;
20990 case V8SFmode:
20991 if (TARGET_AVX)
20992 gen = gen_avx_blendvps256;
20993 break;
20994 case V4DFmode:
20995 if (TARGET_AVX)
20996 gen = gen_avx_blendvpd256;
20997 break;
20998 case V32QImode:
20999 case V16HImode:
21000 case V8SImode:
21001 case V4DImode:
21002 if (TARGET_AVX2)
21004 gen = gen_avx2_pblendvb;
21005 if (mode != V32QImode)
21006 d = gen_reg_rtx (V32QImode);
21007 op_false = gen_lowpart (V32QImode, op_false);
21008 op_true = gen_lowpart (V32QImode, op_true);
21009 cmp = gen_lowpart (V32QImode, cmp);
21011 break;
21013 case V16SImode:
21014 gen = gen_avx512f_blendmv16si;
21015 break;
21016 case V8DImode:
21017 gen = gen_avx512f_blendmv8di;
21018 break;
21019 case V8DFmode:
21020 gen = gen_avx512f_blendmv8df;
21021 break;
21022 case V16SFmode:
21023 gen = gen_avx512f_blendmv16sf;
21024 break;
21026 default:
21027 break;
21030 if (gen != NULL)
21032 emit_insn (gen (d, op_false, op_true, cmp));
21033 if (d != dest)
21034 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21036 else
21038 op_true = force_reg (mode, op_true);
21040 t2 = gen_reg_rtx (mode);
21041 if (optimize)
21042 t3 = gen_reg_rtx (mode);
21043 else
21044 t3 = dest;
21046 x = gen_rtx_AND (mode, op_true, cmp);
21047 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21049 x = gen_rtx_NOT (mode, cmp);
21050 x = gen_rtx_AND (mode, x, op_false);
21051 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21053 x = gen_rtx_IOR (mode, t3, t2);
21054 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21059 /* Expand a floating-point conditional move. Return true if successful. */
21061 bool
21062 ix86_expand_fp_movcc (rtx operands[])
21064 enum machine_mode mode = GET_MODE (operands[0]);
21065 enum rtx_code code = GET_CODE (operands[1]);
21066 rtx tmp, compare_op;
21067 rtx op0 = XEXP (operands[1], 0);
21068 rtx op1 = XEXP (operands[1], 1);
21070 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21072 enum machine_mode cmode;
21074 /* Since we've no cmove for sse registers, don't force bad register
21075 allocation just to gain access to it. Deny movcc when the
21076 comparison mode doesn't match the move mode. */
21077 cmode = GET_MODE (op0);
21078 if (cmode == VOIDmode)
21079 cmode = GET_MODE (op1);
21080 if (cmode != mode)
21081 return false;
21083 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21084 if (code == UNKNOWN)
21085 return false;
21087 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21088 operands[2], operands[3]))
21089 return true;
21091 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21092 operands[2], operands[3]);
21093 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21094 return true;
21097 if (GET_MODE (op0) == TImode
21098 || (GET_MODE (op0) == DImode
21099 && !TARGET_64BIT))
21100 return false;
21102 /* The floating point conditional move instructions don't directly
21103 support conditions resulting from a signed integer comparison. */
21105 compare_op = ix86_expand_compare (code, op0, op1);
21106 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21108 tmp = gen_reg_rtx (QImode);
21109 ix86_expand_setcc (tmp, code, op0, op1);
21111 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21114 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21115 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21116 operands[2], operands[3])));
21118 return true;
21121 /* Expand a floating-point vector conditional move; a vcond operation
21122 rather than a movcc operation. */
21124 bool
21125 ix86_expand_fp_vcond (rtx operands[])
21127 enum rtx_code code = GET_CODE (operands[3]);
21128 rtx cmp;
21130 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21131 &operands[4], &operands[5]);
21132 if (code == UNKNOWN)
21134 rtx temp;
21135 switch (GET_CODE (operands[3]))
21137 case LTGT:
21138 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21139 operands[5], operands[0], operands[0]);
21140 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21141 operands[5], operands[1], operands[2]);
21142 code = AND;
21143 break;
21144 case UNEQ:
21145 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21146 operands[5], operands[0], operands[0]);
21147 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21148 operands[5], operands[1], operands[2]);
21149 code = IOR;
21150 break;
21151 default:
21152 gcc_unreachable ();
21154 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21155 OPTAB_DIRECT);
21156 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21157 return true;
21160 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21161 operands[5], operands[1], operands[2]))
21162 return true;
21164 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21165 operands[1], operands[2]);
21166 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21167 return true;
21170 /* Expand a signed/unsigned integral vector conditional move. */
21172 bool
21173 ix86_expand_int_vcond (rtx operands[])
21175 enum machine_mode data_mode = GET_MODE (operands[0]);
21176 enum machine_mode mode = GET_MODE (operands[4]);
21177 enum rtx_code code = GET_CODE (operands[3]);
21178 bool negate = false;
21179 rtx x, cop0, cop1;
21181 cop0 = operands[4];
21182 cop1 = operands[5];
21184 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21185 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21186 if ((code == LT || code == GE)
21187 && data_mode == mode
21188 && cop1 == CONST0_RTX (mode)
21189 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21190 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21191 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21192 && (GET_MODE_SIZE (data_mode) == 16
21193 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21195 rtx negop = operands[2 - (code == LT)];
21196 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21197 if (negop == CONST1_RTX (data_mode))
21199 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21200 operands[0], 1, OPTAB_DIRECT);
21201 if (res != operands[0])
21202 emit_move_insn (operands[0], res);
21203 return true;
21205 else if (GET_MODE_INNER (data_mode) != DImode
21206 && vector_all_ones_operand (negop, data_mode))
21208 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21209 operands[0], 0, OPTAB_DIRECT);
21210 if (res != operands[0])
21211 emit_move_insn (operands[0], res);
21212 return true;
21216 if (!nonimmediate_operand (cop1, mode))
21217 cop1 = force_reg (mode, cop1);
21218 if (!general_operand (operands[1], data_mode))
21219 operands[1] = force_reg (data_mode, operands[1]);
21220 if (!general_operand (operands[2], data_mode))
21221 operands[2] = force_reg (data_mode, operands[2]);
21223 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21224 if (TARGET_XOP
21225 && (mode == V16QImode || mode == V8HImode
21226 || mode == V4SImode || mode == V2DImode))
21228 else
21230 /* Canonicalize the comparison to EQ, GT, GTU. */
21231 switch (code)
21233 case EQ:
21234 case GT:
21235 case GTU:
21236 break;
21238 case NE:
21239 case LE:
21240 case LEU:
21241 code = reverse_condition (code);
21242 negate = true;
21243 break;
21245 case GE:
21246 case GEU:
21247 code = reverse_condition (code);
21248 negate = true;
21249 /* FALLTHRU */
21251 case LT:
21252 case LTU:
21253 code = swap_condition (code);
21254 x = cop0, cop0 = cop1, cop1 = x;
21255 break;
21257 default:
21258 gcc_unreachable ();
21261 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21262 if (mode == V2DImode)
21264 switch (code)
21266 case EQ:
21267 /* SSE4.1 supports EQ. */
21268 if (!TARGET_SSE4_1)
21269 return false;
21270 break;
21272 case GT:
21273 case GTU:
21274 /* SSE4.2 supports GT/GTU. */
21275 if (!TARGET_SSE4_2)
21276 return false;
21277 break;
21279 default:
21280 gcc_unreachable ();
21284 /* Unsigned parallel compare is not supported by the hardware.
21285 Play some tricks to turn this into a signed comparison
21286 against 0. */
21287 if (code == GTU)
21289 cop0 = force_reg (mode, cop0);
21291 switch (mode)
21293 case V16SImode:
21294 case V8DImode:
21295 case V8SImode:
21296 case V4DImode:
21297 case V4SImode:
21298 case V2DImode:
21300 rtx t1, t2, mask;
21301 rtx (*gen_sub3) (rtx, rtx, rtx);
21303 switch (mode)
21305 case V16SImode: gen_sub3 = gen_subv16si3; break;
21306 case V8DImode: gen_sub3 = gen_subv8di3; break;
21307 case V8SImode: gen_sub3 = gen_subv8si3; break;
21308 case V4DImode: gen_sub3 = gen_subv4di3; break;
21309 case V4SImode: gen_sub3 = gen_subv4si3; break;
21310 case V2DImode: gen_sub3 = gen_subv2di3; break;
21311 default:
21312 gcc_unreachable ();
21314 /* Subtract (-(INT MAX) - 1) from both operands to make
21315 them signed. */
21316 mask = ix86_build_signbit_mask (mode, true, false);
21317 t1 = gen_reg_rtx (mode);
21318 emit_insn (gen_sub3 (t1, cop0, mask));
21320 t2 = gen_reg_rtx (mode);
21321 emit_insn (gen_sub3 (t2, cop1, mask));
21323 cop0 = t1;
21324 cop1 = t2;
21325 code = GT;
21327 break;
21329 case V32QImode:
21330 case V16HImode:
21331 case V16QImode:
21332 case V8HImode:
21333 /* Perform a parallel unsigned saturating subtraction. */
21334 x = gen_reg_rtx (mode);
21335 emit_insn (gen_rtx_SET (VOIDmode, x,
21336 gen_rtx_US_MINUS (mode, cop0, cop1)));
21338 cop0 = x;
21339 cop1 = CONST0_RTX (mode);
21340 code = EQ;
21341 negate = !negate;
21342 break;
21344 default:
21345 gcc_unreachable ();
21350 /* Allow the comparison to be done in one mode, but the movcc to
21351 happen in another mode. */
21352 if (data_mode == mode)
21354 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21355 operands[1+negate], operands[2-negate]);
21357 else
21359 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21360 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21361 operands[1+negate], operands[2-negate]);
21362 if (GET_MODE (x) == mode)
21363 x = gen_lowpart (data_mode, x);
21366 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21367 operands[2-negate]);
21368 return true;
21371 static bool
21372 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21374 enum machine_mode mode = GET_MODE (op0);
21375 switch (mode)
21377 case V16SImode:
21378 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21379 force_reg (V16SImode, mask),
21380 op1));
21381 return true;
21382 case V16SFmode:
21383 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21384 force_reg (V16SImode, mask),
21385 op1));
21386 return true;
21387 case V8DImode:
21388 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21389 force_reg (V8DImode, mask), op1));
21390 return true;
21391 case V8DFmode:
21392 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21393 force_reg (V8DImode, mask), op1));
21394 return true;
21395 default:
21396 return false;
21400 /* Expand a variable vector permutation. */
21402 void
21403 ix86_expand_vec_perm (rtx operands[])
21405 rtx target = operands[0];
21406 rtx op0 = operands[1];
21407 rtx op1 = operands[2];
21408 rtx mask = operands[3];
21409 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21410 enum machine_mode mode = GET_MODE (op0);
21411 enum machine_mode maskmode = GET_MODE (mask);
21412 int w, e, i;
21413 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21415 /* Number of elements in the vector. */
21416 w = GET_MODE_NUNITS (mode);
21417 e = GET_MODE_UNIT_SIZE (mode);
21418 gcc_assert (w <= 64);
21420 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21421 return;
21423 if (TARGET_AVX2)
21425 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21427 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21428 an constant shuffle operand. With a tiny bit of effort we can
21429 use VPERMD instead. A re-interpretation stall for V4DFmode is
21430 unfortunate but there's no avoiding it.
21431 Similarly for V16HImode we don't have instructions for variable
21432 shuffling, while for V32QImode we can use after preparing suitable
21433 masks vpshufb; vpshufb; vpermq; vpor. */
21435 if (mode == V16HImode)
21437 maskmode = mode = V32QImode;
21438 w = 32;
21439 e = 1;
21441 else
21443 maskmode = mode = V8SImode;
21444 w = 8;
21445 e = 4;
21447 t1 = gen_reg_rtx (maskmode);
21449 /* Replicate the low bits of the V4DImode mask into V8SImode:
21450 mask = { A B C D }
21451 t1 = { A A B B C C D D }. */
21452 for (i = 0; i < w / 2; ++i)
21453 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21454 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21455 vt = force_reg (maskmode, vt);
21456 mask = gen_lowpart (maskmode, mask);
21457 if (maskmode == V8SImode)
21458 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21459 else
21460 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21462 /* Multiply the shuffle indicies by two. */
21463 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21464 OPTAB_DIRECT);
21466 /* Add one to the odd shuffle indicies:
21467 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21468 for (i = 0; i < w / 2; ++i)
21470 vec[i * 2] = const0_rtx;
21471 vec[i * 2 + 1] = const1_rtx;
21473 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21474 vt = validize_mem (force_const_mem (maskmode, vt));
21475 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21476 OPTAB_DIRECT);
21478 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21479 operands[3] = mask = t1;
21480 target = gen_reg_rtx (mode);
21481 op0 = gen_lowpart (mode, op0);
21482 op1 = gen_lowpart (mode, op1);
21485 switch (mode)
21487 case V8SImode:
21488 /* The VPERMD and VPERMPS instructions already properly ignore
21489 the high bits of the shuffle elements. No need for us to
21490 perform an AND ourselves. */
21491 if (one_operand_shuffle)
21493 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21494 if (target != operands[0])
21495 emit_move_insn (operands[0],
21496 gen_lowpart (GET_MODE (operands[0]), target));
21498 else
21500 t1 = gen_reg_rtx (V8SImode);
21501 t2 = gen_reg_rtx (V8SImode);
21502 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21503 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21504 goto merge_two;
21506 return;
21508 case V8SFmode:
21509 mask = gen_lowpart (V8SImode, mask);
21510 if (one_operand_shuffle)
21511 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21512 else
21514 t1 = gen_reg_rtx (V8SFmode);
21515 t2 = gen_reg_rtx (V8SFmode);
21516 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21517 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21518 goto merge_two;
21520 return;
21522 case V4SImode:
21523 /* By combining the two 128-bit input vectors into one 256-bit
21524 input vector, we can use VPERMD and VPERMPS for the full
21525 two-operand shuffle. */
21526 t1 = gen_reg_rtx (V8SImode);
21527 t2 = gen_reg_rtx (V8SImode);
21528 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21529 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21530 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21531 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21532 return;
21534 case V4SFmode:
21535 t1 = gen_reg_rtx (V8SFmode);
21536 t2 = gen_reg_rtx (V8SImode);
21537 mask = gen_lowpart (V4SImode, mask);
21538 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21539 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21540 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21541 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21542 return;
21544 case V32QImode:
21545 t1 = gen_reg_rtx (V32QImode);
21546 t2 = gen_reg_rtx (V32QImode);
21547 t3 = gen_reg_rtx (V32QImode);
21548 vt2 = GEN_INT (-128);
21549 for (i = 0; i < 32; i++)
21550 vec[i] = vt2;
21551 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21552 vt = force_reg (V32QImode, vt);
21553 for (i = 0; i < 32; i++)
21554 vec[i] = i < 16 ? vt2 : const0_rtx;
21555 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21556 vt2 = force_reg (V32QImode, vt2);
21557 /* From mask create two adjusted masks, which contain the same
21558 bits as mask in the low 7 bits of each vector element.
21559 The first mask will have the most significant bit clear
21560 if it requests element from the same 128-bit lane
21561 and MSB set if it requests element from the other 128-bit lane.
21562 The second mask will have the opposite values of the MSB,
21563 and additionally will have its 128-bit lanes swapped.
21564 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21565 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21566 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21567 stands for other 12 bytes. */
21568 /* The bit whether element is from the same lane or the other
21569 lane is bit 4, so shift it up by 3 to the MSB position. */
21570 t5 = gen_reg_rtx (V4DImode);
21571 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21572 GEN_INT (3)));
21573 /* Clear MSB bits from the mask just in case it had them set. */
21574 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21575 /* After this t1 will have MSB set for elements from other lane. */
21576 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21577 /* Clear bits other than MSB. */
21578 emit_insn (gen_andv32qi3 (t1, t1, vt));
21579 /* Or in the lower bits from mask into t3. */
21580 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21581 /* And invert MSB bits in t1, so MSB is set for elements from the same
21582 lane. */
21583 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21584 /* Swap 128-bit lanes in t3. */
21585 t6 = gen_reg_rtx (V4DImode);
21586 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21587 const2_rtx, GEN_INT (3),
21588 const0_rtx, const1_rtx));
21589 /* And or in the lower bits from mask into t1. */
21590 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21591 if (one_operand_shuffle)
21593 /* Each of these shuffles will put 0s in places where
21594 element from the other 128-bit lane is needed, otherwise
21595 will shuffle in the requested value. */
21596 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21597 gen_lowpart (V32QImode, t6)));
21598 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21599 /* For t3 the 128-bit lanes are swapped again. */
21600 t7 = gen_reg_rtx (V4DImode);
21601 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21602 const2_rtx, GEN_INT (3),
21603 const0_rtx, const1_rtx));
21604 /* And oring both together leads to the result. */
21605 emit_insn (gen_iorv32qi3 (target, t1,
21606 gen_lowpart (V32QImode, t7)));
21607 if (target != operands[0])
21608 emit_move_insn (operands[0],
21609 gen_lowpart (GET_MODE (operands[0]), target));
21610 return;
21613 t4 = gen_reg_rtx (V32QImode);
21614 /* Similarly to the above one_operand_shuffle code,
21615 just for repeated twice for each operand. merge_two:
21616 code will merge the two results together. */
21617 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21618 gen_lowpart (V32QImode, t6)));
21619 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21620 gen_lowpart (V32QImode, t6)));
21621 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21622 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21623 t7 = gen_reg_rtx (V4DImode);
21624 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21625 const2_rtx, GEN_INT (3),
21626 const0_rtx, const1_rtx));
21627 t8 = gen_reg_rtx (V4DImode);
21628 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21629 const2_rtx, GEN_INT (3),
21630 const0_rtx, const1_rtx));
21631 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21632 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21633 t1 = t4;
21634 t2 = t3;
21635 goto merge_two;
21637 default:
21638 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21639 break;
21643 if (TARGET_XOP)
21645 /* The XOP VPPERM insn supports three inputs. By ignoring the
21646 one_operand_shuffle special case, we avoid creating another
21647 set of constant vectors in memory. */
21648 one_operand_shuffle = false;
21650 /* mask = mask & {2*w-1, ...} */
21651 vt = GEN_INT (2*w - 1);
21653 else
21655 /* mask = mask & {w-1, ...} */
21656 vt = GEN_INT (w - 1);
21659 for (i = 0; i < w; i++)
21660 vec[i] = vt;
21661 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21662 mask = expand_simple_binop (maskmode, AND, mask, vt,
21663 NULL_RTX, 0, OPTAB_DIRECT);
21665 /* For non-QImode operations, convert the word permutation control
21666 into a byte permutation control. */
21667 if (mode != V16QImode)
21669 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21670 GEN_INT (exact_log2 (e)),
21671 NULL_RTX, 0, OPTAB_DIRECT);
21673 /* Convert mask to vector of chars. */
21674 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21676 /* Replicate each of the input bytes into byte positions:
21677 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21678 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21679 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21680 for (i = 0; i < 16; ++i)
21681 vec[i] = GEN_INT (i/e * e);
21682 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21683 vt = validize_mem (force_const_mem (V16QImode, vt));
21684 if (TARGET_XOP)
21685 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21686 else
21687 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21689 /* Convert it into the byte positions by doing
21690 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21691 for (i = 0; i < 16; ++i)
21692 vec[i] = GEN_INT (i % e);
21693 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21694 vt = validize_mem (force_const_mem (V16QImode, vt));
21695 emit_insn (gen_addv16qi3 (mask, mask, vt));
21698 /* The actual shuffle operations all operate on V16QImode. */
21699 op0 = gen_lowpart (V16QImode, op0);
21700 op1 = gen_lowpart (V16QImode, op1);
21702 if (TARGET_XOP)
21704 if (GET_MODE (target) != V16QImode)
21705 target = gen_reg_rtx (V16QImode);
21706 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21707 if (target != operands[0])
21708 emit_move_insn (operands[0],
21709 gen_lowpart (GET_MODE (operands[0]), target));
21711 else if (one_operand_shuffle)
21713 if (GET_MODE (target) != V16QImode)
21714 target = gen_reg_rtx (V16QImode);
21715 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21716 if (target != operands[0])
21717 emit_move_insn (operands[0],
21718 gen_lowpart (GET_MODE (operands[0]), target));
21720 else
21722 rtx xops[6];
21723 bool ok;
21725 /* Shuffle the two input vectors independently. */
21726 t1 = gen_reg_rtx (V16QImode);
21727 t2 = gen_reg_rtx (V16QImode);
21728 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21729 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21731 merge_two:
21732 /* Then merge them together. The key is whether any given control
21733 element contained a bit set that indicates the second word. */
21734 mask = operands[3];
21735 vt = GEN_INT (w);
21736 if (maskmode == V2DImode && !TARGET_SSE4_1)
21738 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21739 more shuffle to convert the V2DI input mask into a V4SI
21740 input mask. At which point the masking that expand_int_vcond
21741 will work as desired. */
21742 rtx t3 = gen_reg_rtx (V4SImode);
21743 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21744 const0_rtx, const0_rtx,
21745 const2_rtx, const2_rtx));
21746 mask = t3;
21747 maskmode = V4SImode;
21748 e = w = 4;
21751 for (i = 0; i < w; i++)
21752 vec[i] = vt;
21753 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21754 vt = force_reg (maskmode, vt);
21755 mask = expand_simple_binop (maskmode, AND, mask, vt,
21756 NULL_RTX, 0, OPTAB_DIRECT);
21758 if (GET_MODE (target) != mode)
21759 target = gen_reg_rtx (mode);
21760 xops[0] = target;
21761 xops[1] = gen_lowpart (mode, t2);
21762 xops[2] = gen_lowpart (mode, t1);
21763 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21764 xops[4] = mask;
21765 xops[5] = vt;
21766 ok = ix86_expand_int_vcond (xops);
21767 gcc_assert (ok);
21768 if (target != operands[0])
21769 emit_move_insn (operands[0],
21770 gen_lowpart (GET_MODE (operands[0]), target));
21774 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21775 true if we should do zero extension, else sign extension. HIGH_P is
21776 true if we want the N/2 high elements, else the low elements. */
21778 void
21779 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21781 enum machine_mode imode = GET_MODE (src);
21782 rtx tmp;
21784 if (TARGET_SSE4_1)
21786 rtx (*unpack)(rtx, rtx);
21787 rtx (*extract)(rtx, rtx) = NULL;
21788 enum machine_mode halfmode = BLKmode;
21790 switch (imode)
21792 case V32QImode:
21793 if (unsigned_p)
21794 unpack = gen_avx2_zero_extendv16qiv16hi2;
21795 else
21796 unpack = gen_avx2_sign_extendv16qiv16hi2;
21797 halfmode = V16QImode;
21798 extract
21799 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21800 break;
21801 case V32HImode:
21802 if (unsigned_p)
21803 unpack = gen_avx512f_zero_extendv16hiv16si2;
21804 else
21805 unpack = gen_avx512f_sign_extendv16hiv16si2;
21806 halfmode = V16HImode;
21807 extract
21808 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21809 break;
21810 case V16HImode:
21811 if (unsigned_p)
21812 unpack = gen_avx2_zero_extendv8hiv8si2;
21813 else
21814 unpack = gen_avx2_sign_extendv8hiv8si2;
21815 halfmode = V8HImode;
21816 extract
21817 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21818 break;
21819 case V16SImode:
21820 if (unsigned_p)
21821 unpack = gen_avx512f_zero_extendv8siv8di2;
21822 else
21823 unpack = gen_avx512f_sign_extendv8siv8di2;
21824 halfmode = V8SImode;
21825 extract
21826 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21827 break;
21828 case V8SImode:
21829 if (unsigned_p)
21830 unpack = gen_avx2_zero_extendv4siv4di2;
21831 else
21832 unpack = gen_avx2_sign_extendv4siv4di2;
21833 halfmode = V4SImode;
21834 extract
21835 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21836 break;
21837 case V16QImode:
21838 if (unsigned_p)
21839 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21840 else
21841 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21842 break;
21843 case V8HImode:
21844 if (unsigned_p)
21845 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21846 else
21847 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21848 break;
21849 case V4SImode:
21850 if (unsigned_p)
21851 unpack = gen_sse4_1_zero_extendv2siv2di2;
21852 else
21853 unpack = gen_sse4_1_sign_extendv2siv2di2;
21854 break;
21855 default:
21856 gcc_unreachable ();
21859 if (GET_MODE_SIZE (imode) >= 32)
21861 tmp = gen_reg_rtx (halfmode);
21862 emit_insn (extract (tmp, src));
21864 else if (high_p)
21866 /* Shift higher 8 bytes to lower 8 bytes. */
21867 tmp = gen_reg_rtx (V1TImode);
21868 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21869 GEN_INT (64)));
21870 tmp = gen_lowpart (imode, tmp);
21872 else
21873 tmp = src;
21875 emit_insn (unpack (dest, tmp));
21877 else
21879 rtx (*unpack)(rtx, rtx, rtx);
21881 switch (imode)
21883 case V16QImode:
21884 if (high_p)
21885 unpack = gen_vec_interleave_highv16qi;
21886 else
21887 unpack = gen_vec_interleave_lowv16qi;
21888 break;
21889 case V8HImode:
21890 if (high_p)
21891 unpack = gen_vec_interleave_highv8hi;
21892 else
21893 unpack = gen_vec_interleave_lowv8hi;
21894 break;
21895 case V4SImode:
21896 if (high_p)
21897 unpack = gen_vec_interleave_highv4si;
21898 else
21899 unpack = gen_vec_interleave_lowv4si;
21900 break;
21901 default:
21902 gcc_unreachable ();
21905 if (unsigned_p)
21906 tmp = force_reg (imode, CONST0_RTX (imode));
21907 else
21908 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21909 src, pc_rtx, pc_rtx);
21911 rtx tmp2 = gen_reg_rtx (imode);
21912 emit_insn (unpack (tmp2, src, tmp));
21913 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21917 /* Expand conditional increment or decrement using adb/sbb instructions.
21918 The default case using setcc followed by the conditional move can be
21919 done by generic code. */
21920 bool
21921 ix86_expand_int_addcc (rtx operands[])
21923 enum rtx_code code = GET_CODE (operands[1]);
21924 rtx flags;
21925 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21926 rtx compare_op;
21927 rtx val = const0_rtx;
21928 bool fpcmp = false;
21929 enum machine_mode mode;
21930 rtx op0 = XEXP (operands[1], 0);
21931 rtx op1 = XEXP (operands[1], 1);
21933 if (operands[3] != const1_rtx
21934 && operands[3] != constm1_rtx)
21935 return false;
21936 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21937 return false;
21938 code = GET_CODE (compare_op);
21940 flags = XEXP (compare_op, 0);
21942 if (GET_MODE (flags) == CCFPmode
21943 || GET_MODE (flags) == CCFPUmode)
21945 fpcmp = true;
21946 code = ix86_fp_compare_code_to_integer (code);
21949 if (code != LTU)
21951 val = constm1_rtx;
21952 if (fpcmp)
21953 PUT_CODE (compare_op,
21954 reverse_condition_maybe_unordered
21955 (GET_CODE (compare_op)));
21956 else
21957 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21960 mode = GET_MODE (operands[0]);
21962 /* Construct either adc or sbb insn. */
21963 if ((code == LTU) == (operands[3] == constm1_rtx))
21965 switch (mode)
21967 case QImode:
21968 insn = gen_subqi3_carry;
21969 break;
21970 case HImode:
21971 insn = gen_subhi3_carry;
21972 break;
21973 case SImode:
21974 insn = gen_subsi3_carry;
21975 break;
21976 case DImode:
21977 insn = gen_subdi3_carry;
21978 break;
21979 default:
21980 gcc_unreachable ();
21983 else
21985 switch (mode)
21987 case QImode:
21988 insn = gen_addqi3_carry;
21989 break;
21990 case HImode:
21991 insn = gen_addhi3_carry;
21992 break;
21993 case SImode:
21994 insn = gen_addsi3_carry;
21995 break;
21996 case DImode:
21997 insn = gen_adddi3_carry;
21998 break;
21999 default:
22000 gcc_unreachable ();
22003 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22005 return true;
22009 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22010 but works for floating pointer parameters and nonoffsetable memories.
22011 For pushes, it returns just stack offsets; the values will be saved
22012 in the right order. Maximally three parts are generated. */
22014 static int
22015 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22017 int size;
22019 if (!TARGET_64BIT)
22020 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22021 else
22022 size = (GET_MODE_SIZE (mode) + 4) / 8;
22024 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22025 gcc_assert (size >= 2 && size <= 4);
22027 /* Optimize constant pool reference to immediates. This is used by fp
22028 moves, that force all constants to memory to allow combining. */
22029 if (MEM_P (operand) && MEM_READONLY_P (operand))
22031 rtx tmp = maybe_get_pool_constant (operand);
22032 if (tmp)
22033 operand = tmp;
22036 if (MEM_P (operand) && !offsettable_memref_p (operand))
22038 /* The only non-offsetable memories we handle are pushes. */
22039 int ok = push_operand (operand, VOIDmode);
22041 gcc_assert (ok);
22043 operand = copy_rtx (operand);
22044 PUT_MODE (operand, word_mode);
22045 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22046 return size;
22049 if (GET_CODE (operand) == CONST_VECTOR)
22051 enum machine_mode imode = int_mode_for_mode (mode);
22052 /* Caution: if we looked through a constant pool memory above,
22053 the operand may actually have a different mode now. That's
22054 ok, since we want to pun this all the way back to an integer. */
22055 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22056 gcc_assert (operand != NULL);
22057 mode = imode;
22060 if (!TARGET_64BIT)
22062 if (mode == DImode)
22063 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22064 else
22066 int i;
22068 if (REG_P (operand))
22070 gcc_assert (reload_completed);
22071 for (i = 0; i < size; i++)
22072 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22074 else if (offsettable_memref_p (operand))
22076 operand = adjust_address (operand, SImode, 0);
22077 parts[0] = operand;
22078 for (i = 1; i < size; i++)
22079 parts[i] = adjust_address (operand, SImode, 4 * i);
22081 else if (GET_CODE (operand) == CONST_DOUBLE)
22083 REAL_VALUE_TYPE r;
22084 long l[4];
22086 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22087 switch (mode)
22089 case TFmode:
22090 real_to_target (l, &r, mode);
22091 parts[3] = gen_int_mode (l[3], SImode);
22092 parts[2] = gen_int_mode (l[2], SImode);
22093 break;
22094 case XFmode:
22095 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22096 long double may not be 80-bit. */
22097 real_to_target (l, &r, mode);
22098 parts[2] = gen_int_mode (l[2], SImode);
22099 break;
22100 case DFmode:
22101 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22102 break;
22103 default:
22104 gcc_unreachable ();
22106 parts[1] = gen_int_mode (l[1], SImode);
22107 parts[0] = gen_int_mode (l[0], SImode);
22109 else
22110 gcc_unreachable ();
22113 else
22115 if (mode == TImode)
22116 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22117 if (mode == XFmode || mode == TFmode)
22119 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22120 if (REG_P (operand))
22122 gcc_assert (reload_completed);
22123 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22124 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22126 else if (offsettable_memref_p (operand))
22128 operand = adjust_address (operand, DImode, 0);
22129 parts[0] = operand;
22130 parts[1] = adjust_address (operand, upper_mode, 8);
22132 else if (GET_CODE (operand) == CONST_DOUBLE)
22134 REAL_VALUE_TYPE r;
22135 long l[4];
22137 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22138 real_to_target (l, &r, mode);
22140 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22141 if (HOST_BITS_PER_WIDE_INT >= 64)
22142 parts[0]
22143 = gen_int_mode
22144 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22145 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22146 DImode);
22147 else
22148 parts[0] = immed_double_const (l[0], l[1], DImode);
22150 if (upper_mode == SImode)
22151 parts[1] = gen_int_mode (l[2], SImode);
22152 else if (HOST_BITS_PER_WIDE_INT >= 64)
22153 parts[1]
22154 = gen_int_mode
22155 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22156 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22157 DImode);
22158 else
22159 parts[1] = immed_double_const (l[2], l[3], DImode);
22161 else
22162 gcc_unreachable ();
22166 return size;
22169 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22170 Return false when normal moves are needed; true when all required
22171 insns have been emitted. Operands 2-4 contain the input values
22172 int the correct order; operands 5-7 contain the output values. */
22174 void
22175 ix86_split_long_move (rtx operands[])
22177 rtx part[2][4];
22178 int nparts, i, j;
22179 int push = 0;
22180 int collisions = 0;
22181 enum machine_mode mode = GET_MODE (operands[0]);
22182 bool collisionparts[4];
22184 /* The DFmode expanders may ask us to move double.
22185 For 64bit target this is single move. By hiding the fact
22186 here we simplify i386.md splitters. */
22187 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22189 /* Optimize constant pool reference to immediates. This is used by
22190 fp moves, that force all constants to memory to allow combining. */
22192 if (MEM_P (operands[1])
22193 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22194 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22195 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22196 if (push_operand (operands[0], VOIDmode))
22198 operands[0] = copy_rtx (operands[0]);
22199 PUT_MODE (operands[0], word_mode);
22201 else
22202 operands[0] = gen_lowpart (DImode, operands[0]);
22203 operands[1] = gen_lowpart (DImode, operands[1]);
22204 emit_move_insn (operands[0], operands[1]);
22205 return;
22208 /* The only non-offsettable memory we handle is push. */
22209 if (push_operand (operands[0], VOIDmode))
22210 push = 1;
22211 else
22212 gcc_assert (!MEM_P (operands[0])
22213 || offsettable_memref_p (operands[0]));
22215 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22216 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22218 /* When emitting push, take care for source operands on the stack. */
22219 if (push && MEM_P (operands[1])
22220 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22222 rtx src_base = XEXP (part[1][nparts - 1], 0);
22224 /* Compensate for the stack decrement by 4. */
22225 if (!TARGET_64BIT && nparts == 3
22226 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22227 src_base = plus_constant (Pmode, src_base, 4);
22229 /* src_base refers to the stack pointer and is
22230 automatically decreased by emitted push. */
22231 for (i = 0; i < nparts; i++)
22232 part[1][i] = change_address (part[1][i],
22233 GET_MODE (part[1][i]), src_base);
22236 /* We need to do copy in the right order in case an address register
22237 of the source overlaps the destination. */
22238 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22240 rtx tmp;
22242 for (i = 0; i < nparts; i++)
22244 collisionparts[i]
22245 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22246 if (collisionparts[i])
22247 collisions++;
22250 /* Collision in the middle part can be handled by reordering. */
22251 if (collisions == 1 && nparts == 3 && collisionparts [1])
22253 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22254 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22256 else if (collisions == 1
22257 && nparts == 4
22258 && (collisionparts [1] || collisionparts [2]))
22260 if (collisionparts [1])
22262 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22263 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22265 else
22267 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22268 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22272 /* If there are more collisions, we can't handle it by reordering.
22273 Do an lea to the last part and use only one colliding move. */
22274 else if (collisions > 1)
22276 rtx base;
22278 collisions = 1;
22280 base = part[0][nparts - 1];
22282 /* Handle the case when the last part isn't valid for lea.
22283 Happens in 64-bit mode storing the 12-byte XFmode. */
22284 if (GET_MODE (base) != Pmode)
22285 base = gen_rtx_REG (Pmode, REGNO (base));
22287 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22288 part[1][0] = replace_equiv_address (part[1][0], base);
22289 for (i = 1; i < nparts; i++)
22291 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22292 part[1][i] = replace_equiv_address (part[1][i], tmp);
22297 if (push)
22299 if (!TARGET_64BIT)
22301 if (nparts == 3)
22303 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22304 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22305 stack_pointer_rtx, GEN_INT (-4)));
22306 emit_move_insn (part[0][2], part[1][2]);
22308 else if (nparts == 4)
22310 emit_move_insn (part[0][3], part[1][3]);
22311 emit_move_insn (part[0][2], part[1][2]);
22314 else
22316 /* In 64bit mode we don't have 32bit push available. In case this is
22317 register, it is OK - we will just use larger counterpart. We also
22318 retype memory - these comes from attempt to avoid REX prefix on
22319 moving of second half of TFmode value. */
22320 if (GET_MODE (part[1][1]) == SImode)
22322 switch (GET_CODE (part[1][1]))
22324 case MEM:
22325 part[1][1] = adjust_address (part[1][1], DImode, 0);
22326 break;
22328 case REG:
22329 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22330 break;
22332 default:
22333 gcc_unreachable ();
22336 if (GET_MODE (part[1][0]) == SImode)
22337 part[1][0] = part[1][1];
22340 emit_move_insn (part[0][1], part[1][1]);
22341 emit_move_insn (part[0][0], part[1][0]);
22342 return;
22345 /* Choose correct order to not overwrite the source before it is copied. */
22346 if ((REG_P (part[0][0])
22347 && REG_P (part[1][1])
22348 && (REGNO (part[0][0]) == REGNO (part[1][1])
22349 || (nparts == 3
22350 && REGNO (part[0][0]) == REGNO (part[1][2]))
22351 || (nparts == 4
22352 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22353 || (collisions > 0
22354 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22356 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22358 operands[2 + i] = part[0][j];
22359 operands[6 + i] = part[1][j];
22362 else
22364 for (i = 0; i < nparts; i++)
22366 operands[2 + i] = part[0][i];
22367 operands[6 + i] = part[1][i];
22371 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22372 if (optimize_insn_for_size_p ())
22374 for (j = 0; j < nparts - 1; j++)
22375 if (CONST_INT_P (operands[6 + j])
22376 && operands[6 + j] != const0_rtx
22377 && REG_P (operands[2 + j]))
22378 for (i = j; i < nparts - 1; i++)
22379 if (CONST_INT_P (operands[7 + i])
22380 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22381 operands[7 + i] = operands[2 + j];
22384 for (i = 0; i < nparts; i++)
22385 emit_move_insn (operands[2 + i], operands[6 + i]);
22387 return;
22390 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22391 left shift by a constant, either using a single shift or
22392 a sequence of add instructions. */
22394 static void
22395 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22397 rtx (*insn)(rtx, rtx, rtx);
22399 if (count == 1
22400 || (count * ix86_cost->add <= ix86_cost->shift_const
22401 && !optimize_insn_for_size_p ()))
22403 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22404 while (count-- > 0)
22405 emit_insn (insn (operand, operand, operand));
22407 else
22409 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22410 emit_insn (insn (operand, operand, GEN_INT (count)));
22414 void
22415 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22417 rtx (*gen_ashl3)(rtx, rtx, rtx);
22418 rtx (*gen_shld)(rtx, rtx, rtx);
22419 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22421 rtx low[2], high[2];
22422 int count;
22424 if (CONST_INT_P (operands[2]))
22426 split_double_mode (mode, operands, 2, low, high);
22427 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22429 if (count >= half_width)
22431 emit_move_insn (high[0], low[1]);
22432 emit_move_insn (low[0], const0_rtx);
22434 if (count > half_width)
22435 ix86_expand_ashl_const (high[0], count - half_width, mode);
22437 else
22439 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22441 if (!rtx_equal_p (operands[0], operands[1]))
22442 emit_move_insn (operands[0], operands[1]);
22444 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22445 ix86_expand_ashl_const (low[0], count, mode);
22447 return;
22450 split_double_mode (mode, operands, 1, low, high);
22452 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22454 if (operands[1] == const1_rtx)
22456 /* Assuming we've chosen a QImode capable registers, then 1 << N
22457 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22458 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22460 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22462 ix86_expand_clear (low[0]);
22463 ix86_expand_clear (high[0]);
22464 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22466 d = gen_lowpart (QImode, low[0]);
22467 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22468 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22469 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22471 d = gen_lowpart (QImode, high[0]);
22472 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22473 s = gen_rtx_NE (QImode, flags, const0_rtx);
22474 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22477 /* Otherwise, we can get the same results by manually performing
22478 a bit extract operation on bit 5/6, and then performing the two
22479 shifts. The two methods of getting 0/1 into low/high are exactly
22480 the same size. Avoiding the shift in the bit extract case helps
22481 pentium4 a bit; no one else seems to care much either way. */
22482 else
22484 enum machine_mode half_mode;
22485 rtx (*gen_lshr3)(rtx, rtx, rtx);
22486 rtx (*gen_and3)(rtx, rtx, rtx);
22487 rtx (*gen_xor3)(rtx, rtx, rtx);
22488 HOST_WIDE_INT bits;
22489 rtx x;
22491 if (mode == DImode)
22493 half_mode = SImode;
22494 gen_lshr3 = gen_lshrsi3;
22495 gen_and3 = gen_andsi3;
22496 gen_xor3 = gen_xorsi3;
22497 bits = 5;
22499 else
22501 half_mode = DImode;
22502 gen_lshr3 = gen_lshrdi3;
22503 gen_and3 = gen_anddi3;
22504 gen_xor3 = gen_xordi3;
22505 bits = 6;
22508 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22509 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22510 else
22511 x = gen_lowpart (half_mode, operands[2]);
22512 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22514 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22515 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22516 emit_move_insn (low[0], high[0]);
22517 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22520 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22521 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22522 return;
22525 if (operands[1] == constm1_rtx)
22527 /* For -1 << N, we can avoid the shld instruction, because we
22528 know that we're shifting 0...31/63 ones into a -1. */
22529 emit_move_insn (low[0], constm1_rtx);
22530 if (optimize_insn_for_size_p ())
22531 emit_move_insn (high[0], low[0]);
22532 else
22533 emit_move_insn (high[0], constm1_rtx);
22535 else
22537 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22539 if (!rtx_equal_p (operands[0], operands[1]))
22540 emit_move_insn (operands[0], operands[1]);
22542 split_double_mode (mode, operands, 1, low, high);
22543 emit_insn (gen_shld (high[0], low[0], operands[2]));
22546 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22548 if (TARGET_CMOVE && scratch)
22550 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22551 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22553 ix86_expand_clear (scratch);
22554 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22556 else
22558 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22559 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22561 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22565 void
22566 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22568 rtx (*gen_ashr3)(rtx, rtx, rtx)
22569 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22570 rtx (*gen_shrd)(rtx, rtx, rtx);
22571 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22573 rtx low[2], high[2];
22574 int count;
22576 if (CONST_INT_P (operands[2]))
22578 split_double_mode (mode, operands, 2, low, high);
22579 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22581 if (count == GET_MODE_BITSIZE (mode) - 1)
22583 emit_move_insn (high[0], high[1]);
22584 emit_insn (gen_ashr3 (high[0], high[0],
22585 GEN_INT (half_width - 1)));
22586 emit_move_insn (low[0], high[0]);
22589 else if (count >= half_width)
22591 emit_move_insn (low[0], high[1]);
22592 emit_move_insn (high[0], low[0]);
22593 emit_insn (gen_ashr3 (high[0], high[0],
22594 GEN_INT (half_width - 1)));
22596 if (count > half_width)
22597 emit_insn (gen_ashr3 (low[0], low[0],
22598 GEN_INT (count - half_width)));
22600 else
22602 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22604 if (!rtx_equal_p (operands[0], operands[1]))
22605 emit_move_insn (operands[0], operands[1]);
22607 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22608 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22611 else
22613 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22615 if (!rtx_equal_p (operands[0], operands[1]))
22616 emit_move_insn (operands[0], operands[1]);
22618 split_double_mode (mode, operands, 1, low, high);
22620 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22621 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22623 if (TARGET_CMOVE && scratch)
22625 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22626 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22628 emit_move_insn (scratch, high[0]);
22629 emit_insn (gen_ashr3 (scratch, scratch,
22630 GEN_INT (half_width - 1)));
22631 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22632 scratch));
22634 else
22636 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22637 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22639 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22644 void
22645 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22647 rtx (*gen_lshr3)(rtx, rtx, rtx)
22648 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22649 rtx (*gen_shrd)(rtx, rtx, rtx);
22650 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22652 rtx low[2], high[2];
22653 int count;
22655 if (CONST_INT_P (operands[2]))
22657 split_double_mode (mode, operands, 2, low, high);
22658 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22660 if (count >= half_width)
22662 emit_move_insn (low[0], high[1]);
22663 ix86_expand_clear (high[0]);
22665 if (count > half_width)
22666 emit_insn (gen_lshr3 (low[0], low[0],
22667 GEN_INT (count - half_width)));
22669 else
22671 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22673 if (!rtx_equal_p (operands[0], operands[1]))
22674 emit_move_insn (operands[0], operands[1]);
22676 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22677 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22680 else
22682 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22684 if (!rtx_equal_p (operands[0], operands[1]))
22685 emit_move_insn (operands[0], operands[1]);
22687 split_double_mode (mode, operands, 1, low, high);
22689 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22690 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22692 if (TARGET_CMOVE && scratch)
22694 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22695 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22697 ix86_expand_clear (scratch);
22698 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22699 scratch));
22701 else
22703 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22704 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22706 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22711 /* Predict just emitted jump instruction to be taken with probability PROB. */
22712 static void
22713 predict_jump (int prob)
22715 rtx insn = get_last_insn ();
22716 gcc_assert (JUMP_P (insn));
22717 add_int_reg_note (insn, REG_BR_PROB, prob);
22720 /* Helper function for the string operations below. Dest VARIABLE whether
22721 it is aligned to VALUE bytes. If true, jump to the label. */
22722 static rtx
22723 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22725 rtx label = gen_label_rtx ();
22726 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22727 if (GET_MODE (variable) == DImode)
22728 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22729 else
22730 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22731 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22732 1, label);
22733 if (epilogue)
22734 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22735 else
22736 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22737 return label;
22740 /* Adjust COUNTER by the VALUE. */
22741 static void
22742 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22744 rtx (*gen_add)(rtx, rtx, rtx)
22745 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22747 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22750 /* Zero extend possibly SImode EXP to Pmode register. */
22752 ix86_zero_extend_to_Pmode (rtx exp)
22754 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22757 /* Divide COUNTREG by SCALE. */
22758 static rtx
22759 scale_counter (rtx countreg, int scale)
22761 rtx sc;
22763 if (scale == 1)
22764 return countreg;
22765 if (CONST_INT_P (countreg))
22766 return GEN_INT (INTVAL (countreg) / scale);
22767 gcc_assert (REG_P (countreg));
22769 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22770 GEN_INT (exact_log2 (scale)),
22771 NULL, 1, OPTAB_DIRECT);
22772 return sc;
22775 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22776 DImode for constant loop counts. */
22778 static enum machine_mode
22779 counter_mode (rtx count_exp)
22781 if (GET_MODE (count_exp) != VOIDmode)
22782 return GET_MODE (count_exp);
22783 if (!CONST_INT_P (count_exp))
22784 return Pmode;
22785 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22786 return DImode;
22787 return SImode;
22790 /* Copy the address to a Pmode register. This is used for x32 to
22791 truncate DImode TLS address to a SImode register. */
22793 static rtx
22794 ix86_copy_addr_to_reg (rtx addr)
22796 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22797 return copy_addr_to_reg (addr);
22798 else
22800 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22801 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22805 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22806 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22807 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22808 memory by VALUE (supposed to be in MODE).
22810 The size is rounded down to whole number of chunk size moved at once.
22811 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22814 static void
22815 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22816 rtx destptr, rtx srcptr, rtx value,
22817 rtx count, enum machine_mode mode, int unroll,
22818 int expected_size, bool issetmem)
22820 rtx out_label, top_label, iter, tmp;
22821 enum machine_mode iter_mode = counter_mode (count);
22822 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22823 rtx piece_size = GEN_INT (piece_size_n);
22824 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22825 rtx size;
22826 int i;
22828 top_label = gen_label_rtx ();
22829 out_label = gen_label_rtx ();
22830 iter = gen_reg_rtx (iter_mode);
22832 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22833 NULL, 1, OPTAB_DIRECT);
22834 /* Those two should combine. */
22835 if (piece_size == const1_rtx)
22837 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22838 true, out_label);
22839 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22841 emit_move_insn (iter, const0_rtx);
22843 emit_label (top_label);
22845 tmp = convert_modes (Pmode, iter_mode, iter, true);
22847 /* This assert could be relaxed - in this case we'll need to compute
22848 smallest power of two, containing in PIECE_SIZE_N and pass it to
22849 offset_address. */
22850 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22851 destmem = offset_address (destmem, tmp, piece_size_n);
22852 destmem = adjust_address (destmem, mode, 0);
22854 if (!issetmem)
22856 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22857 srcmem = adjust_address (srcmem, mode, 0);
22859 /* When unrolling for chips that reorder memory reads and writes,
22860 we can save registers by using single temporary.
22861 Also using 4 temporaries is overkill in 32bit mode. */
22862 if (!TARGET_64BIT && 0)
22864 for (i = 0; i < unroll; i++)
22866 if (i)
22868 destmem =
22869 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22870 srcmem =
22871 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22873 emit_move_insn (destmem, srcmem);
22876 else
22878 rtx tmpreg[4];
22879 gcc_assert (unroll <= 4);
22880 for (i = 0; i < unroll; i++)
22882 tmpreg[i] = gen_reg_rtx (mode);
22883 if (i)
22885 srcmem =
22886 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22888 emit_move_insn (tmpreg[i], srcmem);
22890 for (i = 0; i < unroll; i++)
22892 if (i)
22894 destmem =
22895 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22897 emit_move_insn (destmem, tmpreg[i]);
22901 else
22902 for (i = 0; i < unroll; i++)
22904 if (i)
22905 destmem =
22906 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22907 emit_move_insn (destmem, value);
22910 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22911 true, OPTAB_LIB_WIDEN);
22912 if (tmp != iter)
22913 emit_move_insn (iter, tmp);
22915 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22916 true, top_label);
22917 if (expected_size != -1)
22919 expected_size /= GET_MODE_SIZE (mode) * unroll;
22920 if (expected_size == 0)
22921 predict_jump (0);
22922 else if (expected_size > REG_BR_PROB_BASE)
22923 predict_jump (REG_BR_PROB_BASE - 1);
22924 else
22925 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22927 else
22928 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22929 iter = ix86_zero_extend_to_Pmode (iter);
22930 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22931 true, OPTAB_LIB_WIDEN);
22932 if (tmp != destptr)
22933 emit_move_insn (destptr, tmp);
22934 if (!issetmem)
22936 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22937 true, OPTAB_LIB_WIDEN);
22938 if (tmp != srcptr)
22939 emit_move_insn (srcptr, tmp);
22941 emit_label (out_label);
22944 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22945 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22946 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22947 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22948 ORIG_VALUE is the original value passed to memset to fill the memory with.
22949 Other arguments have same meaning as for previous function. */
22951 static void
22952 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22953 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22954 rtx count,
22955 enum machine_mode mode, bool issetmem)
22957 rtx destexp;
22958 rtx srcexp;
22959 rtx countreg;
22960 HOST_WIDE_INT rounded_count;
22962 /* If possible, it is shorter to use rep movs.
22963 TODO: Maybe it is better to move this logic to decide_alg. */
22964 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22965 && (!issetmem || orig_value == const0_rtx))
22966 mode = SImode;
22968 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22969 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22971 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22972 GET_MODE_SIZE (mode)));
22973 if (mode != QImode)
22975 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22976 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22977 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22979 else
22980 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22981 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22983 rounded_count = (INTVAL (count)
22984 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22985 destmem = shallow_copy_rtx (destmem);
22986 set_mem_size (destmem, rounded_count);
22988 else if (MEM_SIZE_KNOWN_P (destmem))
22989 clear_mem_size (destmem);
22991 if (issetmem)
22993 value = force_reg (mode, gen_lowpart (mode, value));
22994 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22996 else
22998 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22999 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23000 if (mode != QImode)
23002 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23003 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23004 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23006 else
23007 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23008 if (CONST_INT_P (count))
23010 rounded_count = (INTVAL (count)
23011 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23012 srcmem = shallow_copy_rtx (srcmem);
23013 set_mem_size (srcmem, rounded_count);
23015 else
23017 if (MEM_SIZE_KNOWN_P (srcmem))
23018 clear_mem_size (srcmem);
23020 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23021 destexp, srcexp));
23025 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23026 DESTMEM.
23027 SRC is passed by pointer to be updated on return.
23028 Return value is updated DST. */
23029 static rtx
23030 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23031 HOST_WIDE_INT size_to_move)
23033 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23034 enum insn_code code;
23035 enum machine_mode move_mode;
23036 int piece_size, i;
23038 /* Find the widest mode in which we could perform moves.
23039 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23040 it until move of such size is supported. */
23041 piece_size = 1 << floor_log2 (size_to_move);
23042 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23043 code = optab_handler (mov_optab, move_mode);
23044 while (code == CODE_FOR_nothing && piece_size > 1)
23046 piece_size >>= 1;
23047 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23048 code = optab_handler (mov_optab, move_mode);
23051 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23052 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23053 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23055 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23056 move_mode = mode_for_vector (word_mode, nunits);
23057 code = optab_handler (mov_optab, move_mode);
23058 if (code == CODE_FOR_nothing)
23060 move_mode = word_mode;
23061 piece_size = GET_MODE_SIZE (move_mode);
23062 code = optab_handler (mov_optab, move_mode);
23065 gcc_assert (code != CODE_FOR_nothing);
23067 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23068 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23070 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23071 gcc_assert (size_to_move % piece_size == 0);
23072 adjust = GEN_INT (piece_size);
23073 for (i = 0; i < size_to_move; i += piece_size)
23075 /* We move from memory to memory, so we'll need to do it via
23076 a temporary register. */
23077 tempreg = gen_reg_rtx (move_mode);
23078 emit_insn (GEN_FCN (code) (tempreg, src));
23079 emit_insn (GEN_FCN (code) (dst, tempreg));
23081 emit_move_insn (destptr,
23082 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23083 emit_move_insn (srcptr,
23084 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23086 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23087 piece_size);
23088 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23089 piece_size);
23092 /* Update DST and SRC rtx. */
23093 *srcmem = src;
23094 return dst;
23097 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23098 static void
23099 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23100 rtx destptr, rtx srcptr, rtx count, int max_size)
23102 rtx src, dest;
23103 if (CONST_INT_P (count))
23105 HOST_WIDE_INT countval = INTVAL (count);
23106 HOST_WIDE_INT epilogue_size = countval % max_size;
23107 int i;
23109 /* For now MAX_SIZE should be a power of 2. This assert could be
23110 relaxed, but it'll require a bit more complicated epilogue
23111 expanding. */
23112 gcc_assert ((max_size & (max_size - 1)) == 0);
23113 for (i = max_size; i >= 1; i >>= 1)
23115 if (epilogue_size & i)
23116 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23118 return;
23120 if (max_size > 8)
23122 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23123 count, 1, OPTAB_DIRECT);
23124 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23125 count, QImode, 1, 4, false);
23126 return;
23129 /* When there are stringops, we can cheaply increase dest and src pointers.
23130 Otherwise we save code size by maintaining offset (zero is readily
23131 available from preceding rep operation) and using x86 addressing modes.
23133 if (TARGET_SINGLE_STRINGOP)
23135 if (max_size > 4)
23137 rtx label = ix86_expand_aligntest (count, 4, true);
23138 src = change_address (srcmem, SImode, srcptr);
23139 dest = change_address (destmem, SImode, destptr);
23140 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23141 emit_label (label);
23142 LABEL_NUSES (label) = 1;
23144 if (max_size > 2)
23146 rtx label = ix86_expand_aligntest (count, 2, true);
23147 src = change_address (srcmem, HImode, srcptr);
23148 dest = change_address (destmem, HImode, destptr);
23149 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23150 emit_label (label);
23151 LABEL_NUSES (label) = 1;
23153 if (max_size > 1)
23155 rtx label = ix86_expand_aligntest (count, 1, true);
23156 src = change_address (srcmem, QImode, srcptr);
23157 dest = change_address (destmem, QImode, destptr);
23158 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23159 emit_label (label);
23160 LABEL_NUSES (label) = 1;
23163 else
23165 rtx offset = force_reg (Pmode, const0_rtx);
23166 rtx tmp;
23168 if (max_size > 4)
23170 rtx label = ix86_expand_aligntest (count, 4, true);
23171 src = change_address (srcmem, SImode, srcptr);
23172 dest = change_address (destmem, SImode, destptr);
23173 emit_move_insn (dest, src);
23174 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23175 true, OPTAB_LIB_WIDEN);
23176 if (tmp != offset)
23177 emit_move_insn (offset, tmp);
23178 emit_label (label);
23179 LABEL_NUSES (label) = 1;
23181 if (max_size > 2)
23183 rtx label = ix86_expand_aligntest (count, 2, true);
23184 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23185 src = change_address (srcmem, HImode, tmp);
23186 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23187 dest = change_address (destmem, HImode, tmp);
23188 emit_move_insn (dest, src);
23189 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23190 true, OPTAB_LIB_WIDEN);
23191 if (tmp != offset)
23192 emit_move_insn (offset, tmp);
23193 emit_label (label);
23194 LABEL_NUSES (label) = 1;
23196 if (max_size > 1)
23198 rtx label = ix86_expand_aligntest (count, 1, true);
23199 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23200 src = change_address (srcmem, QImode, tmp);
23201 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23202 dest = change_address (destmem, QImode, tmp);
23203 emit_move_insn (dest, src);
23204 emit_label (label);
23205 LABEL_NUSES (label) = 1;
23210 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23211 with value PROMOTED_VAL.
23212 SRC is passed by pointer to be updated on return.
23213 Return value is updated DST. */
23214 static rtx
23215 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23216 HOST_WIDE_INT size_to_move)
23218 rtx dst = destmem, adjust;
23219 enum insn_code code;
23220 enum machine_mode move_mode;
23221 int piece_size, i;
23223 /* Find the widest mode in which we could perform moves.
23224 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23225 it until move of such size is supported. */
23226 move_mode = GET_MODE (promoted_val);
23227 if (move_mode == VOIDmode)
23228 move_mode = QImode;
23229 if (size_to_move < GET_MODE_SIZE (move_mode))
23231 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23232 promoted_val = gen_lowpart (move_mode, promoted_val);
23234 piece_size = GET_MODE_SIZE (move_mode);
23235 code = optab_handler (mov_optab, move_mode);
23236 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23238 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23240 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23241 gcc_assert (size_to_move % piece_size == 0);
23242 adjust = GEN_INT (piece_size);
23243 for (i = 0; i < size_to_move; i += piece_size)
23245 if (piece_size <= GET_MODE_SIZE (word_mode))
23247 emit_insn (gen_strset (destptr, dst, promoted_val));
23248 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23249 piece_size);
23250 continue;
23253 emit_insn (GEN_FCN (code) (dst, promoted_val));
23255 emit_move_insn (destptr,
23256 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23258 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23259 piece_size);
23262 /* Update DST rtx. */
23263 return dst;
23265 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23266 static void
23267 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23268 rtx count, int max_size)
23270 count =
23271 expand_simple_binop (counter_mode (count), AND, count,
23272 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23273 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23274 gen_lowpart (QImode, value), count, QImode,
23275 1, max_size / 2, true);
23278 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23279 static void
23280 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23281 rtx count, int max_size)
23283 rtx dest;
23285 if (CONST_INT_P (count))
23287 HOST_WIDE_INT countval = INTVAL (count);
23288 HOST_WIDE_INT epilogue_size = countval % max_size;
23289 int i;
23291 /* For now MAX_SIZE should be a power of 2. This assert could be
23292 relaxed, but it'll require a bit more complicated epilogue
23293 expanding. */
23294 gcc_assert ((max_size & (max_size - 1)) == 0);
23295 for (i = max_size; i >= 1; i >>= 1)
23297 if (epilogue_size & i)
23299 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23300 destmem = emit_memset (destmem, destptr, vec_value, i);
23301 else
23302 destmem = emit_memset (destmem, destptr, value, i);
23305 return;
23307 if (max_size > 32)
23309 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23310 return;
23312 if (max_size > 16)
23314 rtx label = ix86_expand_aligntest (count, 16, true);
23315 if (TARGET_64BIT)
23317 dest = change_address (destmem, DImode, destptr);
23318 emit_insn (gen_strset (destptr, dest, value));
23319 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23320 emit_insn (gen_strset (destptr, dest, value));
23322 else
23324 dest = change_address (destmem, SImode, destptr);
23325 emit_insn (gen_strset (destptr, dest, value));
23326 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23327 emit_insn (gen_strset (destptr, dest, value));
23328 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23329 emit_insn (gen_strset (destptr, dest, value));
23330 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23331 emit_insn (gen_strset (destptr, dest, value));
23333 emit_label (label);
23334 LABEL_NUSES (label) = 1;
23336 if (max_size > 8)
23338 rtx label = ix86_expand_aligntest (count, 8, true);
23339 if (TARGET_64BIT)
23341 dest = change_address (destmem, DImode, destptr);
23342 emit_insn (gen_strset (destptr, dest, value));
23344 else
23346 dest = change_address (destmem, SImode, destptr);
23347 emit_insn (gen_strset (destptr, dest, value));
23348 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23349 emit_insn (gen_strset (destptr, dest, value));
23351 emit_label (label);
23352 LABEL_NUSES (label) = 1;
23354 if (max_size > 4)
23356 rtx label = ix86_expand_aligntest (count, 4, true);
23357 dest = change_address (destmem, SImode, destptr);
23358 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23359 emit_label (label);
23360 LABEL_NUSES (label) = 1;
23362 if (max_size > 2)
23364 rtx label = ix86_expand_aligntest (count, 2, true);
23365 dest = change_address (destmem, HImode, destptr);
23366 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23367 emit_label (label);
23368 LABEL_NUSES (label) = 1;
23370 if (max_size > 1)
23372 rtx label = ix86_expand_aligntest (count, 1, true);
23373 dest = change_address (destmem, QImode, destptr);
23374 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23375 emit_label (label);
23376 LABEL_NUSES (label) = 1;
23380 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23381 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23382 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23383 ignored.
23384 Return value is updated DESTMEM. */
23385 static rtx
23386 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23387 rtx destptr, rtx srcptr, rtx value,
23388 rtx vec_value, rtx count, int align,
23389 int desired_alignment, bool issetmem)
23391 int i;
23392 for (i = 1; i < desired_alignment; i <<= 1)
23394 if (align <= i)
23396 rtx label = ix86_expand_aligntest (destptr, i, false);
23397 if (issetmem)
23399 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23400 destmem = emit_memset (destmem, destptr, vec_value, i);
23401 else
23402 destmem = emit_memset (destmem, destptr, value, i);
23404 else
23405 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23406 ix86_adjust_counter (count, i);
23407 emit_label (label);
23408 LABEL_NUSES (label) = 1;
23409 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23412 return destmem;
23415 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23416 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23417 and jump to DONE_LABEL. */
23418 static void
23419 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23420 rtx destptr, rtx srcptr,
23421 rtx value, rtx vec_value,
23422 rtx count, int size,
23423 rtx done_label, bool issetmem)
23425 rtx label = ix86_expand_aligntest (count, size, false);
23426 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23427 rtx modesize;
23428 int n;
23430 /* If we do not have vector value to copy, we must reduce size. */
23431 if (issetmem)
23433 if (!vec_value)
23435 if (GET_MODE (value) == VOIDmode && size > 8)
23436 mode = Pmode;
23437 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23438 mode = GET_MODE (value);
23440 else
23441 mode = GET_MODE (vec_value), value = vec_value;
23443 else
23445 /* Choose appropriate vector mode. */
23446 if (size >= 32)
23447 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23448 else if (size >= 16)
23449 mode = TARGET_SSE ? V16QImode : DImode;
23450 srcmem = change_address (srcmem, mode, srcptr);
23452 destmem = change_address (destmem, mode, destptr);
23453 modesize = GEN_INT (GET_MODE_SIZE (mode));
23454 gcc_assert (GET_MODE_SIZE (mode) <= size);
23455 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23457 if (issetmem)
23458 emit_move_insn (destmem, gen_lowpart (mode, value));
23459 else
23461 emit_move_insn (destmem, srcmem);
23462 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23464 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23467 destmem = offset_address (destmem, count, 1);
23468 destmem = offset_address (destmem, GEN_INT (-2 * size),
23469 GET_MODE_SIZE (mode));
23470 if (!issetmem)
23472 srcmem = offset_address (srcmem, count, 1);
23473 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23474 GET_MODE_SIZE (mode));
23476 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23478 if (issetmem)
23479 emit_move_insn (destmem, gen_lowpart (mode, value));
23480 else
23482 emit_move_insn (destmem, srcmem);
23483 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23485 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23487 emit_jump_insn (gen_jump (done_label));
23488 emit_barrier ();
23490 emit_label (label);
23491 LABEL_NUSES (label) = 1;
23494 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23495 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23496 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23497 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23498 DONE_LABEL is a label after the whole copying sequence. The label is created
23499 on demand if *DONE_LABEL is NULL.
23500 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23501 bounds after the initial copies.
23503 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23504 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23505 we will dispatch to a library call for large blocks.
23507 In pseudocode we do:
23509 if (COUNT < SIZE)
23511 Assume that SIZE is 4. Bigger sizes are handled analogously
23512 if (COUNT & 4)
23514 copy 4 bytes from SRCPTR to DESTPTR
23515 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23516 goto done_label
23518 if (!COUNT)
23519 goto done_label;
23520 copy 1 byte from SRCPTR to DESTPTR
23521 if (COUNT & 2)
23523 copy 2 bytes from SRCPTR to DESTPTR
23524 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23527 else
23529 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23530 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23532 OLD_DESPTR = DESTPTR;
23533 Align DESTPTR up to DESIRED_ALIGN
23534 SRCPTR += DESTPTR - OLD_DESTPTR
23535 COUNT -= DEST_PTR - OLD_DESTPTR
23536 if (DYNAMIC_CHECK)
23537 Round COUNT down to multiple of SIZE
23538 << optional caller supplied zero size guard is here >>
23539 << optional caller suppplied dynamic check is here >>
23540 << caller supplied main copy loop is here >>
23542 done_label:
23544 static void
23545 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23546 rtx *destptr, rtx *srcptr,
23547 enum machine_mode mode,
23548 rtx value, rtx vec_value,
23549 rtx *count,
23550 rtx *done_label,
23551 int size,
23552 int desired_align,
23553 int align,
23554 unsigned HOST_WIDE_INT *min_size,
23555 bool dynamic_check,
23556 bool issetmem)
23558 rtx loop_label = NULL, label;
23559 int n;
23560 rtx modesize;
23561 int prolog_size = 0;
23562 rtx mode_value;
23564 /* Chose proper value to copy. */
23565 if (issetmem && VECTOR_MODE_P (mode))
23566 mode_value = vec_value;
23567 else
23568 mode_value = value;
23569 gcc_assert (GET_MODE_SIZE (mode) <= size);
23571 /* See if block is big or small, handle small blocks. */
23572 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23574 int size2 = size;
23575 loop_label = gen_label_rtx ();
23577 if (!*done_label)
23578 *done_label = gen_label_rtx ();
23580 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23581 1, loop_label);
23582 size2 >>= 1;
23584 /* Handle sizes > 3. */
23585 for (;size2 > 2; size2 >>= 1)
23586 expand_small_movmem_or_setmem (destmem, srcmem,
23587 *destptr, *srcptr,
23588 value, vec_value,
23589 *count,
23590 size2, *done_label, issetmem);
23591 /* Nothing to copy? Jump to DONE_LABEL if so */
23592 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23593 1, *done_label);
23595 /* Do a byte copy. */
23596 destmem = change_address (destmem, QImode, *destptr);
23597 if (issetmem)
23598 emit_move_insn (destmem, gen_lowpart (QImode, value));
23599 else
23601 srcmem = change_address (srcmem, QImode, *srcptr);
23602 emit_move_insn (destmem, srcmem);
23605 /* Handle sizes 2 and 3. */
23606 label = ix86_expand_aligntest (*count, 2, false);
23607 destmem = change_address (destmem, HImode, *destptr);
23608 destmem = offset_address (destmem, *count, 1);
23609 destmem = offset_address (destmem, GEN_INT (-2), 2);
23610 if (issetmem)
23611 emit_move_insn (destmem, gen_lowpart (HImode, value));
23612 else
23614 srcmem = change_address (srcmem, HImode, *srcptr);
23615 srcmem = offset_address (srcmem, *count, 1);
23616 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23617 emit_move_insn (destmem, srcmem);
23620 emit_label (label);
23621 LABEL_NUSES (label) = 1;
23622 emit_jump_insn (gen_jump (*done_label));
23623 emit_barrier ();
23625 else
23626 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23627 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23629 /* Start memcpy for COUNT >= SIZE. */
23630 if (loop_label)
23632 emit_label (loop_label);
23633 LABEL_NUSES (loop_label) = 1;
23636 /* Copy first desired_align bytes. */
23637 if (!issetmem)
23638 srcmem = change_address (srcmem, mode, *srcptr);
23639 destmem = change_address (destmem, mode, *destptr);
23640 modesize = GEN_INT (GET_MODE_SIZE (mode));
23641 for (n = 0; prolog_size < desired_align - align; n++)
23643 if (issetmem)
23644 emit_move_insn (destmem, mode_value);
23645 else
23647 emit_move_insn (destmem, srcmem);
23648 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23650 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23651 prolog_size += GET_MODE_SIZE (mode);
23655 /* Copy last SIZE bytes. */
23656 destmem = offset_address (destmem, *count, 1);
23657 destmem = offset_address (destmem,
23658 GEN_INT (-size - prolog_size),
23660 if (issetmem)
23661 emit_move_insn (destmem, mode_value);
23662 else
23664 srcmem = offset_address (srcmem, *count, 1);
23665 srcmem = offset_address (srcmem,
23666 GEN_INT (-size - prolog_size),
23668 emit_move_insn (destmem, srcmem);
23670 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23672 destmem = offset_address (destmem, modesize, 1);
23673 if (issetmem)
23674 emit_move_insn (destmem, mode_value);
23675 else
23677 srcmem = offset_address (srcmem, modesize, 1);
23678 emit_move_insn (destmem, srcmem);
23682 /* Align destination. */
23683 if (desired_align > 1 && desired_align > align)
23685 rtx saveddest = *destptr;
23687 gcc_assert (desired_align <= size);
23688 /* Align destptr up, place it to new register. */
23689 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23690 GEN_INT (prolog_size),
23691 NULL_RTX, 1, OPTAB_DIRECT);
23692 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23693 GEN_INT (-desired_align),
23694 *destptr, 1, OPTAB_DIRECT);
23695 /* See how many bytes we skipped. */
23696 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23697 *destptr,
23698 saveddest, 1, OPTAB_DIRECT);
23699 /* Adjust srcptr and count. */
23700 if (!issetmem)
23701 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23702 *srcptr, 1, OPTAB_DIRECT);
23703 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23704 saveddest, *count, 1, OPTAB_DIRECT);
23705 /* We copied at most size + prolog_size. */
23706 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23707 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23708 else
23709 *min_size = 0;
23711 /* Our loops always round down the bock size, but for dispatch to library
23712 we need precise value. */
23713 if (dynamic_check)
23714 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23715 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23717 else
23719 gcc_assert (prolog_size == 0);
23720 /* Decrease count, so we won't end up copying last word twice. */
23721 if (!CONST_INT_P (*count))
23722 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23723 constm1_rtx, *count, 1, OPTAB_DIRECT);
23724 else
23725 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23726 if (*min_size)
23727 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23732 /* This function is like the previous one, except here we know how many bytes
23733 need to be copied. That allows us to update alignment not only of DST, which
23734 is returned, but also of SRC, which is passed as a pointer for that
23735 reason. */
23736 static rtx
23737 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23738 rtx srcreg, rtx value, rtx vec_value,
23739 int desired_align, int align_bytes,
23740 bool issetmem)
23742 rtx src = NULL;
23743 rtx orig_dst = dst;
23744 rtx orig_src = NULL;
23745 int piece_size = 1;
23746 int copied_bytes = 0;
23748 if (!issetmem)
23750 gcc_assert (srcp != NULL);
23751 src = *srcp;
23752 orig_src = src;
23755 for (piece_size = 1;
23756 piece_size <= desired_align && copied_bytes < align_bytes;
23757 piece_size <<= 1)
23759 if (align_bytes & piece_size)
23761 if (issetmem)
23763 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23764 dst = emit_memset (dst, destreg, vec_value, piece_size);
23765 else
23766 dst = emit_memset (dst, destreg, value, piece_size);
23768 else
23769 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23770 copied_bytes += piece_size;
23773 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23774 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23775 if (MEM_SIZE_KNOWN_P (orig_dst))
23776 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23778 if (!issetmem)
23780 int src_align_bytes = get_mem_align_offset (src, desired_align
23781 * BITS_PER_UNIT);
23782 if (src_align_bytes >= 0)
23783 src_align_bytes = desired_align - src_align_bytes;
23784 if (src_align_bytes >= 0)
23786 unsigned int src_align;
23787 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23789 if ((src_align_bytes & (src_align - 1))
23790 == (align_bytes & (src_align - 1)))
23791 break;
23793 if (src_align > (unsigned int) desired_align)
23794 src_align = desired_align;
23795 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23796 set_mem_align (src, src_align * BITS_PER_UNIT);
23798 if (MEM_SIZE_KNOWN_P (orig_src))
23799 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23800 *srcp = src;
23803 return dst;
23806 /* Return true if ALG can be used in current context.
23807 Assume we expand memset if MEMSET is true. */
23808 static bool
23809 alg_usable_p (enum stringop_alg alg, bool memset)
23811 if (alg == no_stringop)
23812 return false;
23813 if (alg == vector_loop)
23814 return TARGET_SSE || TARGET_AVX;
23815 /* Algorithms using the rep prefix want at least edi and ecx;
23816 additionally, memset wants eax and memcpy wants esi. Don't
23817 consider such algorithms if the user has appropriated those
23818 registers for their own purposes. */
23819 if (alg == rep_prefix_1_byte
23820 || alg == rep_prefix_4_byte
23821 || alg == rep_prefix_8_byte)
23822 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23823 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23824 return true;
23827 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23828 static enum stringop_alg
23829 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23830 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23831 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23833 const struct stringop_algs * algs;
23834 bool optimize_for_speed;
23835 int max = 0;
23836 const struct processor_costs *cost;
23837 int i;
23838 bool any_alg_usable_p = false;
23840 *noalign = false;
23841 *dynamic_check = -1;
23843 /* Even if the string operation call is cold, we still might spend a lot
23844 of time processing large blocks. */
23845 if (optimize_function_for_size_p (cfun)
23846 || (optimize_insn_for_size_p ()
23847 && (max_size < 256
23848 || (expected_size != -1 && expected_size < 256))))
23849 optimize_for_speed = false;
23850 else
23851 optimize_for_speed = true;
23853 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23854 if (memset)
23855 algs = &cost->memset[TARGET_64BIT != 0];
23856 else
23857 algs = &cost->memcpy[TARGET_64BIT != 0];
23859 /* See maximal size for user defined algorithm. */
23860 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23862 enum stringop_alg candidate = algs->size[i].alg;
23863 bool usable = alg_usable_p (candidate, memset);
23864 any_alg_usable_p |= usable;
23866 if (candidate != libcall && candidate && usable)
23867 max = algs->size[i].max;
23870 /* If expected size is not known but max size is small enough
23871 so inline version is a win, set expected size into
23872 the range. */
23873 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23874 && expected_size == -1)
23875 expected_size = min_size / 2 + max_size / 2;
23877 /* If user specified the algorithm, honnor it if possible. */
23878 if (ix86_stringop_alg != no_stringop
23879 && alg_usable_p (ix86_stringop_alg, memset))
23880 return ix86_stringop_alg;
23881 /* rep; movq or rep; movl is the smallest variant. */
23882 else if (!optimize_for_speed)
23884 *noalign = true;
23885 if (!count || (count & 3) || (memset && !zero_memset))
23886 return alg_usable_p (rep_prefix_1_byte, memset)
23887 ? rep_prefix_1_byte : loop_1_byte;
23888 else
23889 return alg_usable_p (rep_prefix_4_byte, memset)
23890 ? rep_prefix_4_byte : loop;
23892 /* Very tiny blocks are best handled via the loop, REP is expensive to
23893 setup. */
23894 else if (expected_size != -1 && expected_size < 4)
23895 return loop_1_byte;
23896 else if (expected_size != -1)
23898 enum stringop_alg alg = libcall;
23899 bool alg_noalign = false;
23900 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23902 /* We get here if the algorithms that were not libcall-based
23903 were rep-prefix based and we are unable to use rep prefixes
23904 based on global register usage. Break out of the loop and
23905 use the heuristic below. */
23906 if (algs->size[i].max == 0)
23907 break;
23908 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23910 enum stringop_alg candidate = algs->size[i].alg;
23912 if (candidate != libcall && alg_usable_p (candidate, memset))
23914 alg = candidate;
23915 alg_noalign = algs->size[i].noalign;
23917 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23918 last non-libcall inline algorithm. */
23919 if (TARGET_INLINE_ALL_STRINGOPS)
23921 /* When the current size is best to be copied by a libcall,
23922 but we are still forced to inline, run the heuristic below
23923 that will pick code for medium sized blocks. */
23924 if (alg != libcall)
23926 *noalign = alg_noalign;
23927 return alg;
23929 break;
23931 else if (alg_usable_p (candidate, memset))
23933 *noalign = algs->size[i].noalign;
23934 return candidate;
23939 /* When asked to inline the call anyway, try to pick meaningful choice.
23940 We look for maximal size of block that is faster to copy by hand and
23941 take blocks of at most of that size guessing that average size will
23942 be roughly half of the block.
23944 If this turns out to be bad, we might simply specify the preferred
23945 choice in ix86_costs. */
23946 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23947 && (algs->unknown_size == libcall
23948 || !alg_usable_p (algs->unknown_size, memset)))
23950 enum stringop_alg alg;
23952 /* If there aren't any usable algorithms, then recursing on
23953 smaller sizes isn't going to find anything. Just return the
23954 simple byte-at-a-time copy loop. */
23955 if (!any_alg_usable_p)
23957 /* Pick something reasonable. */
23958 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23959 *dynamic_check = 128;
23960 return loop_1_byte;
23962 if (max <= 0)
23963 max = 4096;
23964 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23965 zero_memset, dynamic_check, noalign);
23966 gcc_assert (*dynamic_check == -1);
23967 gcc_assert (alg != libcall);
23968 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23969 *dynamic_check = max;
23970 return alg;
23972 return (alg_usable_p (algs->unknown_size, memset)
23973 ? algs->unknown_size : libcall);
23976 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23977 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23978 static int
23979 decide_alignment (int align,
23980 enum stringop_alg alg,
23981 int expected_size,
23982 enum machine_mode move_mode)
23984 int desired_align = 0;
23986 gcc_assert (alg != no_stringop);
23988 if (alg == libcall)
23989 return 0;
23990 if (move_mode == VOIDmode)
23991 return 0;
23993 desired_align = GET_MODE_SIZE (move_mode);
23994 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23995 copying whole cacheline at once. */
23996 if (TARGET_PENTIUMPRO
23997 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23998 desired_align = 8;
24000 if (optimize_size)
24001 desired_align = 1;
24002 if (desired_align < align)
24003 desired_align = align;
24004 if (expected_size != -1 && expected_size < 4)
24005 desired_align = align;
24007 return desired_align;
24011 /* Helper function for memcpy. For QImode value 0xXY produce
24012 0xXYXYXYXY of wide specified by MODE. This is essentially
24013 a * 0x10101010, but we can do slightly better than
24014 synth_mult by unwinding the sequence by hand on CPUs with
24015 slow multiply. */
24016 static rtx
24017 promote_duplicated_reg (enum machine_mode mode, rtx val)
24019 enum machine_mode valmode = GET_MODE (val);
24020 rtx tmp;
24021 int nops = mode == DImode ? 3 : 2;
24023 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24024 if (val == const0_rtx)
24025 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24026 if (CONST_INT_P (val))
24028 HOST_WIDE_INT v = INTVAL (val) & 255;
24030 v |= v << 8;
24031 v |= v << 16;
24032 if (mode == DImode)
24033 v |= (v << 16) << 16;
24034 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24037 if (valmode == VOIDmode)
24038 valmode = QImode;
24039 if (valmode != QImode)
24040 val = gen_lowpart (QImode, val);
24041 if (mode == QImode)
24042 return val;
24043 if (!TARGET_PARTIAL_REG_STALL)
24044 nops--;
24045 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24046 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24047 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24048 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24050 rtx reg = convert_modes (mode, QImode, val, true);
24051 tmp = promote_duplicated_reg (mode, const1_rtx);
24052 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24053 OPTAB_DIRECT);
24055 else
24057 rtx reg = convert_modes (mode, QImode, val, true);
24059 if (!TARGET_PARTIAL_REG_STALL)
24060 if (mode == SImode)
24061 emit_insn (gen_movsi_insv_1 (reg, reg));
24062 else
24063 emit_insn (gen_movdi_insv_1 (reg, reg));
24064 else
24066 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24067 NULL, 1, OPTAB_DIRECT);
24068 reg =
24069 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24071 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24072 NULL, 1, OPTAB_DIRECT);
24073 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24074 if (mode == SImode)
24075 return reg;
24076 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24077 NULL, 1, OPTAB_DIRECT);
24078 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24079 return reg;
24083 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24084 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24085 alignment from ALIGN to DESIRED_ALIGN. */
24086 static rtx
24087 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24088 int align)
24090 rtx promoted_val;
24092 if (TARGET_64BIT
24093 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24094 promoted_val = promote_duplicated_reg (DImode, val);
24095 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24096 promoted_val = promote_duplicated_reg (SImode, val);
24097 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24098 promoted_val = promote_duplicated_reg (HImode, val);
24099 else
24100 promoted_val = val;
24102 return promoted_val;
24105 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24106 operations when profitable. The code depends upon architecture, block size
24107 and alignment, but always has one of the following overall structures:
24109 Aligned move sequence:
24111 1) Prologue guard: Conditional that jumps up to epilogues for small
24112 blocks that can be handled by epilogue alone. This is faster
24113 but also needed for correctness, since prologue assume the block
24114 is larger than the desired alignment.
24116 Optional dynamic check for size and libcall for large
24117 blocks is emitted here too, with -minline-stringops-dynamically.
24119 2) Prologue: copy first few bytes in order to get destination
24120 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24121 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24122 copied. We emit either a jump tree on power of two sized
24123 blocks, or a byte loop.
24125 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24126 with specified algorithm.
24128 4) Epilogue: code copying tail of the block that is too small to be
24129 handled by main body (or up to size guarded by prologue guard).
24131 Misaligned move sequence
24133 1) missaligned move prologue/epilogue containing:
24134 a) Prologue handling small memory blocks and jumping to done_label
24135 (skipped if blocks are known to be large enough)
24136 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24137 needed by single possibly misaligned move
24138 (skipped if alignment is not needed)
24139 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24141 2) Zero size guard dispatching to done_label, if needed
24143 3) dispatch to library call, if needed,
24145 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24146 with specified algorithm. */
24147 bool
24148 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24149 rtx align_exp, rtx expected_align_exp,
24150 rtx expected_size_exp, rtx min_size_exp,
24151 rtx max_size_exp, rtx probable_max_size_exp,
24152 bool issetmem)
24154 rtx destreg;
24155 rtx srcreg = NULL;
24156 rtx label = NULL;
24157 rtx tmp;
24158 rtx jump_around_label = NULL;
24159 HOST_WIDE_INT align = 1;
24160 unsigned HOST_WIDE_INT count = 0;
24161 HOST_WIDE_INT expected_size = -1;
24162 int size_needed = 0, epilogue_size_needed;
24163 int desired_align = 0, align_bytes = 0;
24164 enum stringop_alg alg;
24165 rtx promoted_val = NULL;
24166 rtx vec_promoted_val = NULL;
24167 bool force_loopy_epilogue = false;
24168 int dynamic_check;
24169 bool need_zero_guard = false;
24170 bool noalign;
24171 enum machine_mode move_mode = VOIDmode;
24172 int unroll_factor = 1;
24173 /* TODO: Once value ranges are available, fill in proper data. */
24174 unsigned HOST_WIDE_INT min_size = 0;
24175 unsigned HOST_WIDE_INT max_size = -1;
24176 unsigned HOST_WIDE_INT probable_max_size = -1;
24177 bool misaligned_prologue_used = false;
24179 if (CONST_INT_P (align_exp))
24180 align = INTVAL (align_exp);
24181 /* i386 can do misaligned access on reasonably increased cost. */
24182 if (CONST_INT_P (expected_align_exp)
24183 && INTVAL (expected_align_exp) > align)
24184 align = INTVAL (expected_align_exp);
24185 /* ALIGN is the minimum of destination and source alignment, but we care here
24186 just about destination alignment. */
24187 else if (!issetmem
24188 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24189 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24191 if (CONST_INT_P (count_exp))
24193 min_size = max_size = probable_max_size = count = expected_size
24194 = INTVAL (count_exp);
24195 /* When COUNT is 0, there is nothing to do. */
24196 if (!count)
24197 return true;
24199 else
24201 if (min_size_exp)
24202 min_size = INTVAL (min_size_exp);
24203 if (max_size_exp)
24204 max_size = INTVAL (max_size_exp);
24205 if (probable_max_size_exp)
24206 probable_max_size = INTVAL (probable_max_size_exp);
24207 if (CONST_INT_P (expected_size_exp))
24208 expected_size = INTVAL (expected_size_exp);
24211 /* Make sure we don't need to care about overflow later on. */
24212 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24213 return false;
24215 /* Step 0: Decide on preferred algorithm, desired alignment and
24216 size of chunks to be copied by main loop. */
24217 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24218 issetmem,
24219 issetmem && val_exp == const0_rtx,
24220 &dynamic_check, &noalign);
24221 if (alg == libcall)
24222 return false;
24223 gcc_assert (alg != no_stringop);
24225 /* For now vector-version of memset is generated only for memory zeroing, as
24226 creating of promoted vector value is very cheap in this case. */
24227 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24228 alg = unrolled_loop;
24230 if (!count)
24231 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24232 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24233 if (!issetmem)
24234 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24236 unroll_factor = 1;
24237 move_mode = word_mode;
24238 switch (alg)
24240 case libcall:
24241 case no_stringop:
24242 case last_alg:
24243 gcc_unreachable ();
24244 case loop_1_byte:
24245 need_zero_guard = true;
24246 move_mode = QImode;
24247 break;
24248 case loop:
24249 need_zero_guard = true;
24250 break;
24251 case unrolled_loop:
24252 need_zero_guard = true;
24253 unroll_factor = (TARGET_64BIT ? 4 : 2);
24254 break;
24255 case vector_loop:
24256 need_zero_guard = true;
24257 unroll_factor = 4;
24258 /* Find the widest supported mode. */
24259 move_mode = word_mode;
24260 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24261 != CODE_FOR_nothing)
24262 move_mode = GET_MODE_WIDER_MODE (move_mode);
24264 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24265 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24266 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24268 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24269 move_mode = mode_for_vector (word_mode, nunits);
24270 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24271 move_mode = word_mode;
24273 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24274 break;
24275 case rep_prefix_8_byte:
24276 move_mode = DImode;
24277 break;
24278 case rep_prefix_4_byte:
24279 move_mode = SImode;
24280 break;
24281 case rep_prefix_1_byte:
24282 move_mode = QImode;
24283 break;
24285 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24286 epilogue_size_needed = size_needed;
24288 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24289 if (!TARGET_ALIGN_STRINGOPS || noalign)
24290 align = desired_align;
24292 /* Step 1: Prologue guard. */
24294 /* Alignment code needs count to be in register. */
24295 if (CONST_INT_P (count_exp) && desired_align > align)
24297 if (INTVAL (count_exp) > desired_align
24298 && INTVAL (count_exp) > size_needed)
24300 align_bytes
24301 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24302 if (align_bytes <= 0)
24303 align_bytes = 0;
24304 else
24305 align_bytes = desired_align - align_bytes;
24307 if (align_bytes == 0)
24308 count_exp = force_reg (counter_mode (count_exp), count_exp);
24310 gcc_assert (desired_align >= 1 && align >= 1);
24312 /* Misaligned move sequences handle both prologue and epilogue at once.
24313 Default code generation results in a smaller code for large alignments
24314 and also avoids redundant job when sizes are known precisely. */
24315 misaligned_prologue_used
24316 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24317 && MAX (desired_align, epilogue_size_needed) <= 32
24318 && desired_align <= epilogue_size_needed
24319 && ((desired_align > align && !align_bytes)
24320 || (!count && epilogue_size_needed > 1)));
24322 /* Do the cheap promotion to allow better CSE across the
24323 main loop and epilogue (ie one load of the big constant in the
24324 front of all code.
24325 For now the misaligned move sequences do not have fast path
24326 without broadcasting. */
24327 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24329 if (alg == vector_loop)
24331 gcc_assert (val_exp == const0_rtx);
24332 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24333 promoted_val = promote_duplicated_reg_to_size (val_exp,
24334 GET_MODE_SIZE (word_mode),
24335 desired_align, align);
24337 else
24339 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24340 desired_align, align);
24343 /* Misaligned move sequences handles both prologues and epilogues at once.
24344 Default code generation results in smaller code for large alignments and
24345 also avoids redundant job when sizes are known precisely. */
24346 if (misaligned_prologue_used)
24348 /* Misaligned move prologue handled small blocks by itself. */
24349 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24350 (dst, src, &destreg, &srcreg,
24351 move_mode, promoted_val, vec_promoted_val,
24352 &count_exp,
24353 &jump_around_label,
24354 desired_align < align
24355 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24356 desired_align, align, &min_size, dynamic_check, issetmem);
24357 if (!issetmem)
24358 src = change_address (src, BLKmode, srcreg);
24359 dst = change_address (dst, BLKmode, destreg);
24360 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24361 epilogue_size_needed = 0;
24362 if (need_zero_guard && !min_size)
24364 /* It is possible that we copied enough so the main loop will not
24365 execute. */
24366 gcc_assert (size_needed > 1);
24367 if (jump_around_label == NULL_RTX)
24368 jump_around_label = gen_label_rtx ();
24369 emit_cmp_and_jump_insns (count_exp,
24370 GEN_INT (size_needed),
24371 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24372 if (expected_size == -1
24373 || expected_size < (desired_align - align) / 2 + size_needed)
24374 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24375 else
24376 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24379 /* Ensure that alignment prologue won't copy past end of block. */
24380 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24382 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24383 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24384 Make sure it is power of 2. */
24385 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24387 /* To improve performance of small blocks, we jump around the VAL
24388 promoting mode. This mean that if the promoted VAL is not constant,
24389 we might not use it in the epilogue and have to use byte
24390 loop variant. */
24391 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24392 force_loopy_epilogue = true;
24393 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24394 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24396 /* If main algorithm works on QImode, no epilogue is needed.
24397 For small sizes just don't align anything. */
24398 if (size_needed == 1)
24399 desired_align = align;
24400 else
24401 goto epilogue;
24403 else if (!count
24404 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24406 label = gen_label_rtx ();
24407 emit_cmp_and_jump_insns (count_exp,
24408 GEN_INT (epilogue_size_needed),
24409 LTU, 0, counter_mode (count_exp), 1, label);
24410 if (expected_size == -1 || expected_size < epilogue_size_needed)
24411 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24412 else
24413 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24417 /* Emit code to decide on runtime whether library call or inline should be
24418 used. */
24419 if (dynamic_check != -1)
24421 if (!issetmem && CONST_INT_P (count_exp))
24423 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24425 emit_block_move_via_libcall (dst, src, count_exp, false);
24426 count_exp = const0_rtx;
24427 goto epilogue;
24430 else
24432 rtx hot_label = gen_label_rtx ();
24433 if (jump_around_label == NULL_RTX)
24434 jump_around_label = gen_label_rtx ();
24435 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24436 LEU, 0, counter_mode (count_exp),
24437 1, hot_label);
24438 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24439 if (issetmem)
24440 set_storage_via_libcall (dst, count_exp, val_exp, false);
24441 else
24442 emit_block_move_via_libcall (dst, src, count_exp, false);
24443 emit_jump (jump_around_label);
24444 emit_label (hot_label);
24448 /* Step 2: Alignment prologue. */
24449 /* Do the expensive promotion once we branched off the small blocks. */
24450 if (issetmem && !promoted_val)
24451 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24452 desired_align, align);
24454 if (desired_align > align && !misaligned_prologue_used)
24456 if (align_bytes == 0)
24458 /* Except for the first move in prologue, we no longer know
24459 constant offset in aliasing info. It don't seems to worth
24460 the pain to maintain it for the first move, so throw away
24461 the info early. */
24462 dst = change_address (dst, BLKmode, destreg);
24463 if (!issetmem)
24464 src = change_address (src, BLKmode, srcreg);
24465 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24466 promoted_val, vec_promoted_val,
24467 count_exp, align, desired_align,
24468 issetmem);
24469 /* At most desired_align - align bytes are copied. */
24470 if (min_size < (unsigned)(desired_align - align))
24471 min_size = 0;
24472 else
24473 min_size -= desired_align - align;
24475 else
24477 /* If we know how many bytes need to be stored before dst is
24478 sufficiently aligned, maintain aliasing info accurately. */
24479 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24480 srcreg,
24481 promoted_val,
24482 vec_promoted_val,
24483 desired_align,
24484 align_bytes,
24485 issetmem);
24487 count_exp = plus_constant (counter_mode (count_exp),
24488 count_exp, -align_bytes);
24489 count -= align_bytes;
24490 min_size -= align_bytes;
24491 max_size -= align_bytes;
24493 if (need_zero_guard
24494 && !min_size
24495 && (count < (unsigned HOST_WIDE_INT) size_needed
24496 || (align_bytes == 0
24497 && count < ((unsigned HOST_WIDE_INT) size_needed
24498 + desired_align - align))))
24500 /* It is possible that we copied enough so the main loop will not
24501 execute. */
24502 gcc_assert (size_needed > 1);
24503 if (label == NULL_RTX)
24504 label = gen_label_rtx ();
24505 emit_cmp_and_jump_insns (count_exp,
24506 GEN_INT (size_needed),
24507 LTU, 0, counter_mode (count_exp), 1, label);
24508 if (expected_size == -1
24509 || expected_size < (desired_align - align) / 2 + size_needed)
24510 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24511 else
24512 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24515 if (label && size_needed == 1)
24517 emit_label (label);
24518 LABEL_NUSES (label) = 1;
24519 label = NULL;
24520 epilogue_size_needed = 1;
24521 if (issetmem)
24522 promoted_val = val_exp;
24524 else if (label == NULL_RTX && !misaligned_prologue_used)
24525 epilogue_size_needed = size_needed;
24527 /* Step 3: Main loop. */
24529 switch (alg)
24531 case libcall:
24532 case no_stringop:
24533 case last_alg:
24534 gcc_unreachable ();
24535 case loop_1_byte:
24536 case loop:
24537 case unrolled_loop:
24538 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24539 count_exp, move_mode, unroll_factor,
24540 expected_size, issetmem);
24541 break;
24542 case vector_loop:
24543 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24544 vec_promoted_val, count_exp, move_mode,
24545 unroll_factor, expected_size, issetmem);
24546 break;
24547 case rep_prefix_8_byte:
24548 case rep_prefix_4_byte:
24549 case rep_prefix_1_byte:
24550 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24551 val_exp, count_exp, move_mode, issetmem);
24552 break;
24554 /* Adjust properly the offset of src and dest memory for aliasing. */
24555 if (CONST_INT_P (count_exp))
24557 if (!issetmem)
24558 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24559 (count / size_needed) * size_needed);
24560 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24561 (count / size_needed) * size_needed);
24563 else
24565 if (!issetmem)
24566 src = change_address (src, BLKmode, srcreg);
24567 dst = change_address (dst, BLKmode, destreg);
24570 /* Step 4: Epilogue to copy the remaining bytes. */
24571 epilogue:
24572 if (label)
24574 /* When the main loop is done, COUNT_EXP might hold original count,
24575 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24576 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24577 bytes. Compensate if needed. */
24579 if (size_needed < epilogue_size_needed)
24581 tmp =
24582 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24583 GEN_INT (size_needed - 1), count_exp, 1,
24584 OPTAB_DIRECT);
24585 if (tmp != count_exp)
24586 emit_move_insn (count_exp, tmp);
24588 emit_label (label);
24589 LABEL_NUSES (label) = 1;
24592 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24594 if (force_loopy_epilogue)
24595 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24596 epilogue_size_needed);
24597 else
24599 if (issetmem)
24600 expand_setmem_epilogue (dst, destreg, promoted_val,
24601 vec_promoted_val, count_exp,
24602 epilogue_size_needed);
24603 else
24604 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24605 epilogue_size_needed);
24608 if (jump_around_label)
24609 emit_label (jump_around_label);
24610 return true;
24614 /* Expand the appropriate insns for doing strlen if not just doing
24615 repnz; scasb
24617 out = result, initialized with the start address
24618 align_rtx = alignment of the address.
24619 scratch = scratch register, initialized with the startaddress when
24620 not aligned, otherwise undefined
24622 This is just the body. It needs the initializations mentioned above and
24623 some address computing at the end. These things are done in i386.md. */
24625 static void
24626 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24628 int align;
24629 rtx tmp;
24630 rtx align_2_label = NULL_RTX;
24631 rtx align_3_label = NULL_RTX;
24632 rtx align_4_label = gen_label_rtx ();
24633 rtx end_0_label = gen_label_rtx ();
24634 rtx mem;
24635 rtx tmpreg = gen_reg_rtx (SImode);
24636 rtx scratch = gen_reg_rtx (SImode);
24637 rtx cmp;
24639 align = 0;
24640 if (CONST_INT_P (align_rtx))
24641 align = INTVAL (align_rtx);
24643 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24645 /* Is there a known alignment and is it less than 4? */
24646 if (align < 4)
24648 rtx scratch1 = gen_reg_rtx (Pmode);
24649 emit_move_insn (scratch1, out);
24650 /* Is there a known alignment and is it not 2? */
24651 if (align != 2)
24653 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24654 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24656 /* Leave just the 3 lower bits. */
24657 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24658 NULL_RTX, 0, OPTAB_WIDEN);
24660 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24661 Pmode, 1, align_4_label);
24662 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24663 Pmode, 1, align_2_label);
24664 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24665 Pmode, 1, align_3_label);
24667 else
24669 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24670 check if is aligned to 4 - byte. */
24672 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24673 NULL_RTX, 0, OPTAB_WIDEN);
24675 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24676 Pmode, 1, align_4_label);
24679 mem = change_address (src, QImode, out);
24681 /* Now compare the bytes. */
24683 /* Compare the first n unaligned byte on a byte per byte basis. */
24684 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24685 QImode, 1, end_0_label);
24687 /* Increment the address. */
24688 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24690 /* Not needed with an alignment of 2 */
24691 if (align != 2)
24693 emit_label (align_2_label);
24695 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24696 end_0_label);
24698 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24700 emit_label (align_3_label);
24703 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24704 end_0_label);
24706 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24709 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24710 align this loop. It gives only huge programs, but does not help to
24711 speed up. */
24712 emit_label (align_4_label);
24714 mem = change_address (src, SImode, out);
24715 emit_move_insn (scratch, mem);
24716 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24718 /* This formula yields a nonzero result iff one of the bytes is zero.
24719 This saves three branches inside loop and many cycles. */
24721 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24722 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24723 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24724 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24725 gen_int_mode (0x80808080, SImode)));
24726 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24727 align_4_label);
24729 if (TARGET_CMOVE)
24731 rtx reg = gen_reg_rtx (SImode);
24732 rtx reg2 = gen_reg_rtx (Pmode);
24733 emit_move_insn (reg, tmpreg);
24734 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24736 /* If zero is not in the first two bytes, move two bytes forward. */
24737 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24738 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24739 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24740 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24741 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24742 reg,
24743 tmpreg)));
24744 /* Emit lea manually to avoid clobbering of flags. */
24745 emit_insn (gen_rtx_SET (SImode, reg2,
24746 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24748 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24749 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24750 emit_insn (gen_rtx_SET (VOIDmode, out,
24751 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24752 reg2,
24753 out)));
24755 else
24757 rtx end_2_label = gen_label_rtx ();
24758 /* Is zero in the first two bytes? */
24760 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24761 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24762 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24763 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24764 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24765 pc_rtx);
24766 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24767 JUMP_LABEL (tmp) = end_2_label;
24769 /* Not in the first two. Move two bytes forward. */
24770 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24771 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24773 emit_label (end_2_label);
24777 /* Avoid branch in fixing the byte. */
24778 tmpreg = gen_lowpart (QImode, tmpreg);
24779 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24780 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24781 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24782 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24784 emit_label (end_0_label);
24787 /* Expand strlen. */
24789 bool
24790 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24792 rtx addr, scratch1, scratch2, scratch3, scratch4;
24794 /* The generic case of strlen expander is long. Avoid it's
24795 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24797 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24798 && !TARGET_INLINE_ALL_STRINGOPS
24799 && !optimize_insn_for_size_p ()
24800 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24801 return false;
24803 addr = force_reg (Pmode, XEXP (src, 0));
24804 scratch1 = gen_reg_rtx (Pmode);
24806 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24807 && !optimize_insn_for_size_p ())
24809 /* Well it seems that some optimizer does not combine a call like
24810 foo(strlen(bar), strlen(bar));
24811 when the move and the subtraction is done here. It does calculate
24812 the length just once when these instructions are done inside of
24813 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24814 often used and I use one fewer register for the lifetime of
24815 output_strlen_unroll() this is better. */
24817 emit_move_insn (out, addr);
24819 ix86_expand_strlensi_unroll_1 (out, src, align);
24821 /* strlensi_unroll_1 returns the address of the zero at the end of
24822 the string, like memchr(), so compute the length by subtracting
24823 the start address. */
24824 emit_insn (ix86_gen_sub3 (out, out, addr));
24826 else
24828 rtx unspec;
24830 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24831 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24832 return false;
24834 scratch2 = gen_reg_rtx (Pmode);
24835 scratch3 = gen_reg_rtx (Pmode);
24836 scratch4 = force_reg (Pmode, constm1_rtx);
24838 emit_move_insn (scratch3, addr);
24839 eoschar = force_reg (QImode, eoschar);
24841 src = replace_equiv_address_nv (src, scratch3);
24843 /* If .md starts supporting :P, this can be done in .md. */
24844 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24845 scratch4), UNSPEC_SCAS);
24846 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24847 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24848 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24850 return true;
24853 /* For given symbol (function) construct code to compute address of it's PLT
24854 entry in large x86-64 PIC model. */
24855 static rtx
24856 construct_plt_address (rtx symbol)
24858 rtx tmp, unspec;
24860 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24861 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24862 gcc_assert (Pmode == DImode);
24864 tmp = gen_reg_rtx (Pmode);
24865 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24867 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24868 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24869 return tmp;
24873 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24874 rtx callarg2,
24875 rtx pop, bool sibcall)
24877 unsigned int const cregs_size
24878 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24879 rtx vec[3 + cregs_size];
24880 rtx use = NULL, call;
24881 unsigned int vec_len = 0;
24883 if (pop == const0_rtx)
24884 pop = NULL;
24885 gcc_assert (!TARGET_64BIT || !pop);
24887 if (TARGET_MACHO && !TARGET_64BIT)
24889 #if TARGET_MACHO
24890 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24891 fnaddr = machopic_indirect_call_target (fnaddr);
24892 #endif
24894 else
24896 /* Static functions and indirect calls don't need the pic register. */
24897 if (flag_pic
24898 && (!TARGET_64BIT
24899 || (ix86_cmodel == CM_LARGE_PIC
24900 && DEFAULT_ABI != MS_ABI))
24901 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24902 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24903 use_reg (&use, pic_offset_table_rtx);
24906 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24908 rtx al = gen_rtx_REG (QImode, AX_REG);
24909 emit_move_insn (al, callarg2);
24910 use_reg (&use, al);
24913 if (ix86_cmodel == CM_LARGE_PIC
24914 && !TARGET_PECOFF
24915 && MEM_P (fnaddr)
24916 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24917 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24918 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24919 else if (sibcall
24920 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24921 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24923 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24924 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24927 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24928 if (retval)
24929 call = gen_rtx_SET (VOIDmode, retval, call);
24930 vec[vec_len++] = call;
24932 if (pop)
24934 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24935 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24936 vec[vec_len++] = pop;
24939 if (TARGET_64BIT_MS_ABI
24940 && (!callarg2 || INTVAL (callarg2) != -2))
24942 unsigned i;
24944 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24945 UNSPEC_MS_TO_SYSV_CALL);
24947 for (i = 0; i < cregs_size; i++)
24949 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24950 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24952 vec[vec_len++]
24953 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24957 if (vec_len > 1)
24958 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24959 call = emit_call_insn (call);
24960 if (use)
24961 CALL_INSN_FUNCTION_USAGE (call) = use;
24963 return call;
24966 /* Output the assembly for a call instruction. */
24968 const char *
24969 ix86_output_call_insn (rtx insn, rtx call_op)
24971 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24972 bool seh_nop_p = false;
24973 const char *xasm;
24975 if (SIBLING_CALL_P (insn))
24977 if (direct_p)
24978 xasm = "jmp\t%P0";
24979 /* SEH epilogue detection requires the indirect branch case
24980 to include REX.W. */
24981 else if (TARGET_SEH)
24982 xasm = "rex.W jmp %A0";
24983 else
24984 xasm = "jmp\t%A0";
24986 output_asm_insn (xasm, &call_op);
24987 return "";
24990 /* SEH unwinding can require an extra nop to be emitted in several
24991 circumstances. Determine if we have one of those. */
24992 if (TARGET_SEH)
24994 rtx i;
24996 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24998 /* If we get to another real insn, we don't need the nop. */
24999 if (INSN_P (i))
25000 break;
25002 /* If we get to the epilogue note, prevent a catch region from
25003 being adjacent to the standard epilogue sequence. If non-
25004 call-exceptions, we'll have done this during epilogue emission. */
25005 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25006 && !flag_non_call_exceptions
25007 && !can_throw_internal (insn))
25009 seh_nop_p = true;
25010 break;
25014 /* If we didn't find a real insn following the call, prevent the
25015 unwinder from looking into the next function. */
25016 if (i == NULL)
25017 seh_nop_p = true;
25020 if (direct_p)
25021 xasm = "call\t%P0";
25022 else
25023 xasm = "call\t%A0";
25025 output_asm_insn (xasm, &call_op);
25027 if (seh_nop_p)
25028 return "nop";
25030 return "";
25033 /* Clear stack slot assignments remembered from previous functions.
25034 This is called from INIT_EXPANDERS once before RTL is emitted for each
25035 function. */
25037 static struct machine_function *
25038 ix86_init_machine_status (void)
25040 struct machine_function *f;
25042 f = ggc_cleared_alloc<machine_function> ();
25043 f->use_fast_prologue_epilogue_nregs = -1;
25044 f->call_abi = ix86_abi;
25046 return f;
25049 /* Return a MEM corresponding to a stack slot with mode MODE.
25050 Allocate a new slot if necessary.
25052 The RTL for a function can have several slots available: N is
25053 which slot to use. */
25056 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25058 struct stack_local_entry *s;
25060 gcc_assert (n < MAX_386_STACK_LOCALS);
25062 for (s = ix86_stack_locals; s; s = s->next)
25063 if (s->mode == mode && s->n == n)
25064 return validize_mem (copy_rtx (s->rtl));
25066 s = ggc_alloc<stack_local_entry> ();
25067 s->n = n;
25068 s->mode = mode;
25069 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25071 s->next = ix86_stack_locals;
25072 ix86_stack_locals = s;
25073 return validize_mem (s->rtl);
25076 static void
25077 ix86_instantiate_decls (void)
25079 struct stack_local_entry *s;
25081 for (s = ix86_stack_locals; s; s = s->next)
25082 if (s->rtl != NULL_RTX)
25083 instantiate_decl_rtl (s->rtl);
25086 /* Check whether x86 address PARTS is a pc-relative address. */
25088 static bool
25089 rip_relative_addr_p (struct ix86_address *parts)
25091 rtx base, index, disp;
25093 base = parts->base;
25094 index = parts->index;
25095 disp = parts->disp;
25097 if (disp && !base && !index)
25099 if (TARGET_64BIT)
25101 rtx symbol = disp;
25103 if (GET_CODE (disp) == CONST)
25104 symbol = XEXP (disp, 0);
25105 if (GET_CODE (symbol) == PLUS
25106 && CONST_INT_P (XEXP (symbol, 1)))
25107 symbol = XEXP (symbol, 0);
25109 if (GET_CODE (symbol) == LABEL_REF
25110 || (GET_CODE (symbol) == SYMBOL_REF
25111 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25112 || (GET_CODE (symbol) == UNSPEC
25113 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25114 || XINT (symbol, 1) == UNSPEC_PCREL
25115 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25116 return true;
25119 return false;
25122 /* Calculate the length of the memory address in the instruction encoding.
25123 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25124 or other prefixes. We never generate addr32 prefix for LEA insn. */
25127 memory_address_length (rtx addr, bool lea)
25129 struct ix86_address parts;
25130 rtx base, index, disp;
25131 int len;
25132 int ok;
25134 if (GET_CODE (addr) == PRE_DEC
25135 || GET_CODE (addr) == POST_INC
25136 || GET_CODE (addr) == PRE_MODIFY
25137 || GET_CODE (addr) == POST_MODIFY)
25138 return 0;
25140 ok = ix86_decompose_address (addr, &parts);
25141 gcc_assert (ok);
25143 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25145 /* If this is not LEA instruction, add the length of addr32 prefix. */
25146 if (TARGET_64BIT && !lea
25147 && (SImode_address_operand (addr, VOIDmode)
25148 || (parts.base && GET_MODE (parts.base) == SImode)
25149 || (parts.index && GET_MODE (parts.index) == SImode)))
25150 len++;
25152 base = parts.base;
25153 index = parts.index;
25154 disp = parts.disp;
25156 if (base && GET_CODE (base) == SUBREG)
25157 base = SUBREG_REG (base);
25158 if (index && GET_CODE (index) == SUBREG)
25159 index = SUBREG_REG (index);
25161 gcc_assert (base == NULL_RTX || REG_P (base));
25162 gcc_assert (index == NULL_RTX || REG_P (index));
25164 /* Rule of thumb:
25165 - esp as the base always wants an index,
25166 - ebp as the base always wants a displacement,
25167 - r12 as the base always wants an index,
25168 - r13 as the base always wants a displacement. */
25170 /* Register Indirect. */
25171 if (base && !index && !disp)
25173 /* esp (for its index) and ebp (for its displacement) need
25174 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25175 code. */
25176 if (base == arg_pointer_rtx
25177 || base == frame_pointer_rtx
25178 || REGNO (base) == SP_REG
25179 || REGNO (base) == BP_REG
25180 || REGNO (base) == R12_REG
25181 || REGNO (base) == R13_REG)
25182 len++;
25185 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25186 is not disp32, but disp32(%rip), so for disp32
25187 SIB byte is needed, unless print_operand_address
25188 optimizes it into disp32(%rip) or (%rip) is implied
25189 by UNSPEC. */
25190 else if (disp && !base && !index)
25192 len += 4;
25193 if (rip_relative_addr_p (&parts))
25194 len++;
25196 else
25198 /* Find the length of the displacement constant. */
25199 if (disp)
25201 if (base && satisfies_constraint_K (disp))
25202 len += 1;
25203 else
25204 len += 4;
25206 /* ebp always wants a displacement. Similarly r13. */
25207 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25208 len++;
25210 /* An index requires the two-byte modrm form.... */
25211 if (index
25212 /* ...like esp (or r12), which always wants an index. */
25213 || base == arg_pointer_rtx
25214 || base == frame_pointer_rtx
25215 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25216 len++;
25219 return len;
25222 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25223 is set, expect that insn have 8bit immediate alternative. */
25225 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25227 int len = 0;
25228 int i;
25229 extract_insn_cached (insn);
25230 for (i = recog_data.n_operands - 1; i >= 0; --i)
25231 if (CONSTANT_P (recog_data.operand[i]))
25233 enum attr_mode mode = get_attr_mode (insn);
25235 gcc_assert (!len);
25236 if (shortform && CONST_INT_P (recog_data.operand[i]))
25238 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25239 switch (mode)
25241 case MODE_QI:
25242 len = 1;
25243 continue;
25244 case MODE_HI:
25245 ival = trunc_int_for_mode (ival, HImode);
25246 break;
25247 case MODE_SI:
25248 ival = trunc_int_for_mode (ival, SImode);
25249 break;
25250 default:
25251 break;
25253 if (IN_RANGE (ival, -128, 127))
25255 len = 1;
25256 continue;
25259 switch (mode)
25261 case MODE_QI:
25262 len = 1;
25263 break;
25264 case MODE_HI:
25265 len = 2;
25266 break;
25267 case MODE_SI:
25268 len = 4;
25269 break;
25270 /* Immediates for DImode instructions are encoded
25271 as 32bit sign extended values. */
25272 case MODE_DI:
25273 len = 4;
25274 break;
25275 default:
25276 fatal_insn ("unknown insn mode", insn);
25279 return len;
25282 /* Compute default value for "length_address" attribute. */
25284 ix86_attr_length_address_default (rtx insn)
25286 int i;
25288 if (get_attr_type (insn) == TYPE_LEA)
25290 rtx set = PATTERN (insn), addr;
25292 if (GET_CODE (set) == PARALLEL)
25293 set = XVECEXP (set, 0, 0);
25295 gcc_assert (GET_CODE (set) == SET);
25297 addr = SET_SRC (set);
25299 return memory_address_length (addr, true);
25302 extract_insn_cached (insn);
25303 for (i = recog_data.n_operands - 1; i >= 0; --i)
25304 if (MEM_P (recog_data.operand[i]))
25306 constrain_operands_cached (reload_completed);
25307 if (which_alternative != -1)
25309 const char *constraints = recog_data.constraints[i];
25310 int alt = which_alternative;
25312 while (*constraints == '=' || *constraints == '+')
25313 constraints++;
25314 while (alt-- > 0)
25315 while (*constraints++ != ',')
25317 /* Skip ignored operands. */
25318 if (*constraints == 'X')
25319 continue;
25321 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25323 return 0;
25326 /* Compute default value for "length_vex" attribute. It includes
25327 2 or 3 byte VEX prefix and 1 opcode byte. */
25330 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25332 int i;
25334 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25335 byte VEX prefix. */
25336 if (!has_0f_opcode || has_vex_w)
25337 return 3 + 1;
25339 /* We can always use 2 byte VEX prefix in 32bit. */
25340 if (!TARGET_64BIT)
25341 return 2 + 1;
25343 extract_insn_cached (insn);
25345 for (i = recog_data.n_operands - 1; i >= 0; --i)
25346 if (REG_P (recog_data.operand[i]))
25348 /* REX.W bit uses 3 byte VEX prefix. */
25349 if (GET_MODE (recog_data.operand[i]) == DImode
25350 && GENERAL_REG_P (recog_data.operand[i]))
25351 return 3 + 1;
25353 else
25355 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25356 if (MEM_P (recog_data.operand[i])
25357 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25358 return 3 + 1;
25361 return 2 + 1;
25364 /* Return the maximum number of instructions a cpu can issue. */
25366 static int
25367 ix86_issue_rate (void)
25369 switch (ix86_tune)
25371 case PROCESSOR_PENTIUM:
25372 case PROCESSOR_BONNELL:
25373 case PROCESSOR_SILVERMONT:
25374 case PROCESSOR_INTEL:
25375 case PROCESSOR_K6:
25376 case PROCESSOR_BTVER2:
25377 case PROCESSOR_PENTIUM4:
25378 case PROCESSOR_NOCONA:
25379 return 2;
25381 case PROCESSOR_PENTIUMPRO:
25382 case PROCESSOR_ATHLON:
25383 case PROCESSOR_K8:
25384 case PROCESSOR_AMDFAM10:
25385 case PROCESSOR_GENERIC:
25386 case PROCESSOR_BTVER1:
25387 return 3;
25389 case PROCESSOR_BDVER1:
25390 case PROCESSOR_BDVER2:
25391 case PROCESSOR_BDVER3:
25392 case PROCESSOR_BDVER4:
25393 case PROCESSOR_CORE2:
25394 case PROCESSOR_NEHALEM:
25395 case PROCESSOR_SANDYBRIDGE:
25396 case PROCESSOR_HASWELL:
25397 return 4;
25399 default:
25400 return 1;
25404 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25405 by DEP_INSN and nothing set by DEP_INSN. */
25407 static bool
25408 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25410 rtx set, set2;
25412 /* Simplify the test for uninteresting insns. */
25413 if (insn_type != TYPE_SETCC
25414 && insn_type != TYPE_ICMOV
25415 && insn_type != TYPE_FCMOV
25416 && insn_type != TYPE_IBR)
25417 return false;
25419 if ((set = single_set (dep_insn)) != 0)
25421 set = SET_DEST (set);
25422 set2 = NULL_RTX;
25424 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25425 && XVECLEN (PATTERN (dep_insn), 0) == 2
25426 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25427 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25429 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25430 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25432 else
25433 return false;
25435 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25436 return false;
25438 /* This test is true if the dependent insn reads the flags but
25439 not any other potentially set register. */
25440 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25441 return false;
25443 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25444 return false;
25446 return true;
25449 /* Return true iff USE_INSN has a memory address with operands set by
25450 SET_INSN. */
25452 bool
25453 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25455 int i;
25456 extract_insn_cached (use_insn);
25457 for (i = recog_data.n_operands - 1; i >= 0; --i)
25458 if (MEM_P (recog_data.operand[i]))
25460 rtx addr = XEXP (recog_data.operand[i], 0);
25461 return modified_in_p (addr, set_insn) != 0;
25463 return false;
25466 /* Helper function for exact_store_load_dependency.
25467 Return true if addr is found in insn. */
25468 static bool
25469 exact_dependency_1 (rtx addr, rtx insn)
25471 enum rtx_code code;
25472 const char *format_ptr;
25473 int i, j;
25475 code = GET_CODE (insn);
25476 switch (code)
25478 case MEM:
25479 if (rtx_equal_p (addr, insn))
25480 return true;
25481 break;
25482 case REG:
25483 CASE_CONST_ANY:
25484 case SYMBOL_REF:
25485 case CODE_LABEL:
25486 case PC:
25487 case CC0:
25488 case EXPR_LIST:
25489 return false;
25490 default:
25491 break;
25494 format_ptr = GET_RTX_FORMAT (code);
25495 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25497 switch (*format_ptr++)
25499 case 'e':
25500 if (exact_dependency_1 (addr, XEXP (insn, i)))
25501 return true;
25502 break;
25503 case 'E':
25504 for (j = 0; j < XVECLEN (insn, i); j++)
25505 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25506 return true;
25507 break;
25510 return false;
25513 /* Return true if there exists exact dependency for store & load, i.e.
25514 the same memory address is used in them. */
25515 static bool
25516 exact_store_load_dependency (rtx store, rtx load)
25518 rtx set1, set2;
25520 set1 = single_set (store);
25521 if (!set1)
25522 return false;
25523 if (!MEM_P (SET_DEST (set1)))
25524 return false;
25525 set2 = single_set (load);
25526 if (!set2)
25527 return false;
25528 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25529 return true;
25530 return false;
25533 static int
25534 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25536 enum attr_type insn_type, dep_insn_type;
25537 enum attr_memory memory;
25538 rtx set, set2;
25539 int dep_insn_code_number;
25541 /* Anti and output dependencies have zero cost on all CPUs. */
25542 if (REG_NOTE_KIND (link) != 0)
25543 return 0;
25545 dep_insn_code_number = recog_memoized (dep_insn);
25547 /* If we can't recognize the insns, we can't really do anything. */
25548 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25549 return cost;
25551 insn_type = get_attr_type (insn);
25552 dep_insn_type = get_attr_type (dep_insn);
25554 switch (ix86_tune)
25556 case PROCESSOR_PENTIUM:
25557 /* Address Generation Interlock adds a cycle of latency. */
25558 if (insn_type == TYPE_LEA)
25560 rtx addr = PATTERN (insn);
25562 if (GET_CODE (addr) == PARALLEL)
25563 addr = XVECEXP (addr, 0, 0);
25565 gcc_assert (GET_CODE (addr) == SET);
25567 addr = SET_SRC (addr);
25568 if (modified_in_p (addr, dep_insn))
25569 cost += 1;
25571 else if (ix86_agi_dependent (dep_insn, insn))
25572 cost += 1;
25574 /* ??? Compares pair with jump/setcc. */
25575 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25576 cost = 0;
25578 /* Floating point stores require value to be ready one cycle earlier. */
25579 if (insn_type == TYPE_FMOV
25580 && get_attr_memory (insn) == MEMORY_STORE
25581 && !ix86_agi_dependent (dep_insn, insn))
25582 cost += 1;
25583 break;
25585 case PROCESSOR_PENTIUMPRO:
25586 /* INT->FP conversion is expensive. */
25587 if (get_attr_fp_int_src (dep_insn))
25588 cost += 5;
25590 /* There is one cycle extra latency between an FP op and a store. */
25591 if (insn_type == TYPE_FMOV
25592 && (set = single_set (dep_insn)) != NULL_RTX
25593 && (set2 = single_set (insn)) != NULL_RTX
25594 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25595 && MEM_P (SET_DEST (set2)))
25596 cost += 1;
25598 memory = get_attr_memory (insn);
25600 /* Show ability of reorder buffer to hide latency of load by executing
25601 in parallel with previous instruction in case
25602 previous instruction is not needed to compute the address. */
25603 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25604 && !ix86_agi_dependent (dep_insn, insn))
25606 /* Claim moves to take one cycle, as core can issue one load
25607 at time and the next load can start cycle later. */
25608 if (dep_insn_type == TYPE_IMOV
25609 || dep_insn_type == TYPE_FMOV)
25610 cost = 1;
25611 else if (cost > 1)
25612 cost--;
25614 break;
25616 case PROCESSOR_K6:
25617 /* The esp dependency is resolved before
25618 the instruction is really finished. */
25619 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25620 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25621 return 1;
25623 /* INT->FP conversion is expensive. */
25624 if (get_attr_fp_int_src (dep_insn))
25625 cost += 5;
25627 memory = get_attr_memory (insn);
25629 /* Show ability of reorder buffer to hide latency of load by executing
25630 in parallel with previous instruction in case
25631 previous instruction is not needed to compute the address. */
25632 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25633 && !ix86_agi_dependent (dep_insn, insn))
25635 /* Claim moves to take one cycle, as core can issue one load
25636 at time and the next load can start cycle later. */
25637 if (dep_insn_type == TYPE_IMOV
25638 || dep_insn_type == TYPE_FMOV)
25639 cost = 1;
25640 else if (cost > 2)
25641 cost -= 2;
25642 else
25643 cost = 1;
25645 break;
25647 case PROCESSOR_AMDFAM10:
25648 case PROCESSOR_BDVER1:
25649 case PROCESSOR_BDVER2:
25650 case PROCESSOR_BDVER3:
25651 case PROCESSOR_BDVER4:
25652 case PROCESSOR_BTVER1:
25653 case PROCESSOR_BTVER2:
25654 case PROCESSOR_GENERIC:
25655 /* Stack engine allows to execute push&pop instructions in parall. */
25656 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25657 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25658 return 0;
25659 /* FALLTHRU */
25661 case PROCESSOR_ATHLON:
25662 case PROCESSOR_K8:
25663 memory = get_attr_memory (insn);
25665 /* Show ability of reorder buffer to hide latency of load by executing
25666 in parallel with previous instruction in case
25667 previous instruction is not needed to compute the address. */
25668 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25669 && !ix86_agi_dependent (dep_insn, insn))
25671 enum attr_unit unit = get_attr_unit (insn);
25672 int loadcost = 3;
25674 /* Because of the difference between the length of integer and
25675 floating unit pipeline preparation stages, the memory operands
25676 for floating point are cheaper.
25678 ??? For Athlon it the difference is most probably 2. */
25679 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25680 loadcost = 3;
25681 else
25682 loadcost = TARGET_ATHLON ? 2 : 0;
25684 if (cost >= loadcost)
25685 cost -= loadcost;
25686 else
25687 cost = 0;
25689 break;
25691 case PROCESSOR_CORE2:
25692 case PROCESSOR_NEHALEM:
25693 case PROCESSOR_SANDYBRIDGE:
25694 case PROCESSOR_HASWELL:
25695 /* Stack engine allows to execute push&pop instructions in parall. */
25696 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25697 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25698 return 0;
25700 memory = get_attr_memory (insn);
25702 /* Show ability of reorder buffer to hide latency of load by executing
25703 in parallel with previous instruction in case
25704 previous instruction is not needed to compute the address. */
25705 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25706 && !ix86_agi_dependent (dep_insn, insn))
25708 if (cost >= 4)
25709 cost -= 4;
25710 else
25711 cost = 0;
25713 break;
25715 case PROCESSOR_SILVERMONT:
25716 case PROCESSOR_INTEL:
25717 if (!reload_completed)
25718 return cost;
25720 /* Increase cost of integer loads. */
25721 memory = get_attr_memory (dep_insn);
25722 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25724 enum attr_unit unit = get_attr_unit (dep_insn);
25725 if (unit == UNIT_INTEGER && cost == 1)
25727 if (memory == MEMORY_LOAD)
25728 cost = 3;
25729 else
25731 /* Increase cost of ld/st for short int types only
25732 because of store forwarding issue. */
25733 rtx set = single_set (dep_insn);
25734 if (set && (GET_MODE (SET_DEST (set)) == QImode
25735 || GET_MODE (SET_DEST (set)) == HImode))
25737 /* Increase cost of store/load insn if exact
25738 dependence exists and it is load insn. */
25739 enum attr_memory insn_memory = get_attr_memory (insn);
25740 if (insn_memory == MEMORY_LOAD
25741 && exact_store_load_dependency (dep_insn, insn))
25742 cost = 3;
25748 default:
25749 break;
25752 return cost;
25755 /* How many alternative schedules to try. This should be as wide as the
25756 scheduling freedom in the DFA, but no wider. Making this value too
25757 large results extra work for the scheduler. */
25759 static int
25760 ia32_multipass_dfa_lookahead (void)
25762 switch (ix86_tune)
25764 case PROCESSOR_PENTIUM:
25765 return 2;
25767 case PROCESSOR_PENTIUMPRO:
25768 case PROCESSOR_K6:
25769 return 1;
25771 case PROCESSOR_BDVER1:
25772 case PROCESSOR_BDVER2:
25773 case PROCESSOR_BDVER3:
25774 case PROCESSOR_BDVER4:
25775 /* We use lookahead value 4 for BD both before and after reload
25776 schedules. Plan is to have value 8 included for O3. */
25777 return 4;
25779 case PROCESSOR_CORE2:
25780 case PROCESSOR_NEHALEM:
25781 case PROCESSOR_SANDYBRIDGE:
25782 case PROCESSOR_HASWELL:
25783 case PROCESSOR_BONNELL:
25784 case PROCESSOR_SILVERMONT:
25785 case PROCESSOR_INTEL:
25786 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25787 as many instructions can be executed on a cycle, i.e.,
25788 issue_rate. I wonder why tuning for many CPUs does not do this. */
25789 if (reload_completed)
25790 return ix86_issue_rate ();
25791 /* Don't use lookahead for pre-reload schedule to save compile time. */
25792 return 0;
25794 default:
25795 return 0;
25799 /* Return true if target platform supports macro-fusion. */
25801 static bool
25802 ix86_macro_fusion_p ()
25804 return TARGET_FUSE_CMP_AND_BRANCH;
25807 /* Check whether current microarchitecture support macro fusion
25808 for insn pair "CONDGEN + CONDJMP". Refer to
25809 "Intel Architectures Optimization Reference Manual". */
25811 static bool
25812 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25814 rtx src, dest;
25815 rtx single_set = single_set (condgen);
25816 enum rtx_code ccode;
25817 rtx compare_set = NULL_RTX, test_if, cond;
25818 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25820 if (get_attr_type (condgen) != TYPE_TEST
25821 && get_attr_type (condgen) != TYPE_ICMP
25822 && get_attr_type (condgen) != TYPE_INCDEC
25823 && get_attr_type (condgen) != TYPE_ALU)
25824 return false;
25826 if (single_set == NULL_RTX
25827 && !TARGET_FUSE_ALU_AND_BRANCH)
25828 return false;
25830 if (single_set != NULL_RTX)
25831 compare_set = single_set;
25832 else
25834 int i;
25835 rtx pat = PATTERN (condgen);
25836 for (i = 0; i < XVECLEN (pat, 0); i++)
25837 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25839 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25840 if (GET_CODE (set_src) == COMPARE)
25841 compare_set = XVECEXP (pat, 0, i);
25842 else
25843 alu_set = XVECEXP (pat, 0, i);
25846 if (compare_set == NULL_RTX)
25847 return false;
25848 src = SET_SRC (compare_set);
25849 if (GET_CODE (src) != COMPARE)
25850 return false;
25852 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25853 supported. */
25854 if ((MEM_P (XEXP (src, 0))
25855 && CONST_INT_P (XEXP (src, 1)))
25856 || (MEM_P (XEXP (src, 1))
25857 && CONST_INT_P (XEXP (src, 0))))
25858 return false;
25860 /* No fusion for RIP-relative address. */
25861 if (MEM_P (XEXP (src, 0)))
25862 addr = XEXP (XEXP (src, 0), 0);
25863 else if (MEM_P (XEXP (src, 1)))
25864 addr = XEXP (XEXP (src, 1), 0);
25866 if (addr) {
25867 ix86_address parts;
25868 int ok = ix86_decompose_address (addr, &parts);
25869 gcc_assert (ok);
25871 if (rip_relative_addr_p (&parts))
25872 return false;
25875 test_if = SET_SRC (pc_set (condjmp));
25876 cond = XEXP (test_if, 0);
25877 ccode = GET_CODE (cond);
25878 /* Check whether conditional jump use Sign or Overflow Flags. */
25879 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25880 && (ccode == GE
25881 || ccode == GT
25882 || ccode == LE
25883 || ccode == LT))
25884 return false;
25886 /* Return true for TYPE_TEST and TYPE_ICMP. */
25887 if (get_attr_type (condgen) == TYPE_TEST
25888 || get_attr_type (condgen) == TYPE_ICMP)
25889 return true;
25891 /* The following is the case that macro-fusion for alu + jmp. */
25892 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25893 return false;
25895 /* No fusion for alu op with memory destination operand. */
25896 dest = SET_DEST (alu_set);
25897 if (MEM_P (dest))
25898 return false;
25900 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25901 supported. */
25902 if (get_attr_type (condgen) == TYPE_INCDEC
25903 && (ccode == GEU
25904 || ccode == GTU
25905 || ccode == LEU
25906 || ccode == LTU))
25907 return false;
25909 return true;
25912 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25913 execution. It is applied if
25914 (1) IMUL instruction is on the top of list;
25915 (2) There exists the only producer of independent IMUL instruction in
25916 ready list.
25917 Return index of IMUL producer if it was found and -1 otherwise. */
25918 static int
25919 do_reorder_for_imul (rtx *ready, int n_ready)
25921 rtx insn, set, insn1, insn2;
25922 sd_iterator_def sd_it;
25923 dep_t dep;
25924 int index = -1;
25925 int i;
25927 if (!TARGET_BONNELL)
25928 return index;
25930 /* Check that IMUL instruction is on the top of ready list. */
25931 insn = ready[n_ready - 1];
25932 set = single_set (insn);
25933 if (!set)
25934 return index;
25935 if (!(GET_CODE (SET_SRC (set)) == MULT
25936 && GET_MODE (SET_SRC (set)) == SImode))
25937 return index;
25939 /* Search for producer of independent IMUL instruction. */
25940 for (i = n_ready - 2; i >= 0; i--)
25942 insn = ready[i];
25943 if (!NONDEBUG_INSN_P (insn))
25944 continue;
25945 /* Skip IMUL instruction. */
25946 insn2 = PATTERN (insn);
25947 if (GET_CODE (insn2) == PARALLEL)
25948 insn2 = XVECEXP (insn2, 0, 0);
25949 if (GET_CODE (insn2) == SET
25950 && GET_CODE (SET_SRC (insn2)) == MULT
25951 && GET_MODE (SET_SRC (insn2)) == SImode)
25952 continue;
25954 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25956 rtx con;
25957 con = DEP_CON (dep);
25958 if (!NONDEBUG_INSN_P (con))
25959 continue;
25960 insn1 = PATTERN (con);
25961 if (GET_CODE (insn1) == PARALLEL)
25962 insn1 = XVECEXP (insn1, 0, 0);
25964 if (GET_CODE (insn1) == SET
25965 && GET_CODE (SET_SRC (insn1)) == MULT
25966 && GET_MODE (SET_SRC (insn1)) == SImode)
25968 sd_iterator_def sd_it1;
25969 dep_t dep1;
25970 /* Check if there is no other dependee for IMUL. */
25971 index = i;
25972 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25974 rtx pro;
25975 pro = DEP_PRO (dep1);
25976 if (!NONDEBUG_INSN_P (pro))
25977 continue;
25978 if (pro != insn)
25979 index = -1;
25981 if (index >= 0)
25982 break;
25985 if (index >= 0)
25986 break;
25988 return index;
25991 /* Try to find the best candidate on the top of ready list if two insns
25992 have the same priority - candidate is best if its dependees were
25993 scheduled earlier. Applied for Silvermont only.
25994 Return true if top 2 insns must be interchanged. */
25995 static bool
25996 swap_top_of_ready_list (rtx *ready, int n_ready)
25998 rtx top = ready[n_ready - 1];
25999 rtx next = ready[n_ready - 2];
26000 rtx set;
26001 sd_iterator_def sd_it;
26002 dep_t dep;
26003 int clock1 = -1;
26004 int clock2 = -1;
26005 #define INSN_TICK(INSN) (HID (INSN)->tick)
26007 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26008 return false;
26010 if (!NONDEBUG_INSN_P (top))
26011 return false;
26012 if (!NONJUMP_INSN_P (top))
26013 return false;
26014 if (!NONDEBUG_INSN_P (next))
26015 return false;
26016 if (!NONJUMP_INSN_P (next))
26017 return false;
26018 set = single_set (top);
26019 if (!set)
26020 return false;
26021 set = single_set (next);
26022 if (!set)
26023 return false;
26025 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26027 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26028 return false;
26029 /* Determine winner more precise. */
26030 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26032 rtx pro;
26033 pro = DEP_PRO (dep);
26034 if (!NONDEBUG_INSN_P (pro))
26035 continue;
26036 if (INSN_TICK (pro) > clock1)
26037 clock1 = INSN_TICK (pro);
26039 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26041 rtx pro;
26042 pro = DEP_PRO (dep);
26043 if (!NONDEBUG_INSN_P (pro))
26044 continue;
26045 if (INSN_TICK (pro) > clock2)
26046 clock2 = INSN_TICK (pro);
26049 if (clock1 == clock2)
26051 /* Determine winner - load must win. */
26052 enum attr_memory memory1, memory2;
26053 memory1 = get_attr_memory (top);
26054 memory2 = get_attr_memory (next);
26055 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26056 return true;
26058 return (bool) (clock2 < clock1);
26060 return false;
26061 #undef INSN_TICK
26064 /* Perform possible reodering of ready list for Atom/Silvermont only.
26065 Return issue rate. */
26066 static int
26067 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26068 int clock_var)
26070 int issue_rate = -1;
26071 int n_ready = *pn_ready;
26072 int i;
26073 rtx insn;
26074 int index = -1;
26076 /* Set up issue rate. */
26077 issue_rate = ix86_issue_rate ();
26079 /* Do reodering for BONNELL/SILVERMONT only. */
26080 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26081 return issue_rate;
26083 /* Nothing to do if ready list contains only 1 instruction. */
26084 if (n_ready <= 1)
26085 return issue_rate;
26087 /* Do reodering for post-reload scheduler only. */
26088 if (!reload_completed)
26089 return issue_rate;
26091 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26093 if (sched_verbose > 1)
26094 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26095 INSN_UID (ready[index]));
26097 /* Put IMUL producer (ready[index]) at the top of ready list. */
26098 insn = ready[index];
26099 for (i = index; i < n_ready - 1; i++)
26100 ready[i] = ready[i + 1];
26101 ready[n_ready - 1] = insn;
26102 return issue_rate;
26104 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26106 if (sched_verbose > 1)
26107 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26108 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26109 /* Swap 2 top elements of ready list. */
26110 insn = ready[n_ready - 1];
26111 ready[n_ready - 1] = ready[n_ready - 2];
26112 ready[n_ready - 2] = insn;
26114 return issue_rate;
26117 static bool
26118 ix86_class_likely_spilled_p (reg_class_t);
26120 /* Returns true if lhs of insn is HW function argument register and set up
26121 is_spilled to true if it is likely spilled HW register. */
26122 static bool
26123 insn_is_function_arg (rtx insn, bool* is_spilled)
26125 rtx dst;
26127 if (!NONDEBUG_INSN_P (insn))
26128 return false;
26129 /* Call instructions are not movable, ignore it. */
26130 if (CALL_P (insn))
26131 return false;
26132 insn = PATTERN (insn);
26133 if (GET_CODE (insn) == PARALLEL)
26134 insn = XVECEXP (insn, 0, 0);
26135 if (GET_CODE (insn) != SET)
26136 return false;
26137 dst = SET_DEST (insn);
26138 if (REG_P (dst) && HARD_REGISTER_P (dst)
26139 && ix86_function_arg_regno_p (REGNO (dst)))
26141 /* Is it likely spilled HW register? */
26142 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26143 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26144 *is_spilled = true;
26145 return true;
26147 return false;
26150 /* Add output dependencies for chain of function adjacent arguments if only
26151 there is a move to likely spilled HW register. Return first argument
26152 if at least one dependence was added or NULL otherwise. */
26153 static rtx
26154 add_parameter_dependencies (rtx call, rtx head)
26156 rtx insn;
26157 rtx last = call;
26158 rtx first_arg = NULL;
26159 bool is_spilled = false;
26161 head = PREV_INSN (head);
26163 /* Find nearest to call argument passing instruction. */
26164 while (true)
26166 last = PREV_INSN (last);
26167 if (last == head)
26168 return NULL;
26169 if (!NONDEBUG_INSN_P (last))
26170 continue;
26171 if (insn_is_function_arg (last, &is_spilled))
26172 break;
26173 return NULL;
26176 first_arg = last;
26177 while (true)
26179 insn = PREV_INSN (last);
26180 if (!INSN_P (insn))
26181 break;
26182 if (insn == head)
26183 break;
26184 if (!NONDEBUG_INSN_P (insn))
26186 last = insn;
26187 continue;
26189 if (insn_is_function_arg (insn, &is_spilled))
26191 /* Add output depdendence between two function arguments if chain
26192 of output arguments contains likely spilled HW registers. */
26193 if (is_spilled)
26194 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26195 first_arg = last = insn;
26197 else
26198 break;
26200 if (!is_spilled)
26201 return NULL;
26202 return first_arg;
26205 /* Add output or anti dependency from insn to first_arg to restrict its code
26206 motion. */
26207 static void
26208 avoid_func_arg_motion (rtx first_arg, rtx insn)
26210 rtx set;
26211 rtx tmp;
26213 set = single_set (insn);
26214 if (!set)
26215 return;
26216 tmp = SET_DEST (set);
26217 if (REG_P (tmp))
26219 /* Add output dependency to the first function argument. */
26220 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26221 return;
26223 /* Add anti dependency. */
26224 add_dependence (first_arg, insn, REG_DEP_ANTI);
26227 /* Avoid cross block motion of function argument through adding dependency
26228 from the first non-jump instruction in bb. */
26229 static void
26230 add_dependee_for_func_arg (rtx arg, basic_block bb)
26232 rtx insn = BB_END (bb);
26234 while (insn)
26236 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26238 rtx set = single_set (insn);
26239 if (set)
26241 avoid_func_arg_motion (arg, insn);
26242 return;
26245 if (insn == BB_HEAD (bb))
26246 return;
26247 insn = PREV_INSN (insn);
26251 /* Hook for pre-reload schedule - avoid motion of function arguments
26252 passed in likely spilled HW registers. */
26253 static void
26254 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26256 rtx insn;
26257 rtx first_arg = NULL;
26258 if (reload_completed)
26259 return;
26260 while (head != tail && DEBUG_INSN_P (head))
26261 head = NEXT_INSN (head);
26262 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26263 if (INSN_P (insn) && CALL_P (insn))
26265 first_arg = add_parameter_dependencies (insn, head);
26266 if (first_arg)
26268 /* Add dependee for first argument to predecessors if only
26269 region contains more than one block. */
26270 basic_block bb = BLOCK_FOR_INSN (insn);
26271 int rgn = CONTAINING_RGN (bb->index);
26272 int nr_blks = RGN_NR_BLOCKS (rgn);
26273 /* Skip trivial regions and region head blocks that can have
26274 predecessors outside of region. */
26275 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26277 edge e;
26278 edge_iterator ei;
26280 /* Regions are SCCs with the exception of selective
26281 scheduling with pipelining of outer blocks enabled.
26282 So also check that immediate predecessors of a non-head
26283 block are in the same region. */
26284 FOR_EACH_EDGE (e, ei, bb->preds)
26286 /* Avoid creating of loop-carried dependencies through
26287 using topological ordering in the region. */
26288 if (rgn == CONTAINING_RGN (e->src->index)
26289 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26290 add_dependee_for_func_arg (first_arg, e->src);
26293 insn = first_arg;
26294 if (insn == head)
26295 break;
26298 else if (first_arg)
26299 avoid_func_arg_motion (first_arg, insn);
26302 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26303 HW registers to maximum, to schedule them at soon as possible. These are
26304 moves from function argument registers at the top of the function entry
26305 and moves from function return value registers after call. */
26306 static int
26307 ix86_adjust_priority (rtx insn, int priority)
26309 rtx set;
26311 if (reload_completed)
26312 return priority;
26314 if (!NONDEBUG_INSN_P (insn))
26315 return priority;
26317 set = single_set (insn);
26318 if (set)
26320 rtx tmp = SET_SRC (set);
26321 if (REG_P (tmp)
26322 && HARD_REGISTER_P (tmp)
26323 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26324 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26325 return current_sched_info->sched_max_insns_priority;
26328 return priority;
26331 /* Model decoder of Core 2/i7.
26332 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26333 track the instruction fetch block boundaries and make sure that long
26334 (9+ bytes) instructions are assigned to D0. */
26336 /* Maximum length of an insn that can be handled by
26337 a secondary decoder unit. '8' for Core 2/i7. */
26338 static int core2i7_secondary_decoder_max_insn_size;
26340 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26341 '16' for Core 2/i7. */
26342 static int core2i7_ifetch_block_size;
26344 /* Maximum number of instructions decoder can handle per cycle.
26345 '6' for Core 2/i7. */
26346 static int core2i7_ifetch_block_max_insns;
26348 typedef struct ix86_first_cycle_multipass_data_ *
26349 ix86_first_cycle_multipass_data_t;
26350 typedef const struct ix86_first_cycle_multipass_data_ *
26351 const_ix86_first_cycle_multipass_data_t;
26353 /* A variable to store target state across calls to max_issue within
26354 one cycle. */
26355 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26356 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26358 /* Initialize DATA. */
26359 static void
26360 core2i7_first_cycle_multipass_init (void *_data)
26362 ix86_first_cycle_multipass_data_t data
26363 = (ix86_first_cycle_multipass_data_t) _data;
26365 data->ifetch_block_len = 0;
26366 data->ifetch_block_n_insns = 0;
26367 data->ready_try_change = NULL;
26368 data->ready_try_change_size = 0;
26371 /* Advancing the cycle; reset ifetch block counts. */
26372 static void
26373 core2i7_dfa_post_advance_cycle (void)
26375 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26377 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26379 data->ifetch_block_len = 0;
26380 data->ifetch_block_n_insns = 0;
26383 static int min_insn_size (rtx);
26385 /* Filter out insns from ready_try that the core will not be able to issue
26386 on current cycle due to decoder. */
26387 static void
26388 core2i7_first_cycle_multipass_filter_ready_try
26389 (const_ix86_first_cycle_multipass_data_t data,
26390 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26392 while (n_ready--)
26394 rtx insn;
26395 int insn_size;
26397 if (ready_try[n_ready])
26398 continue;
26400 insn = get_ready_element (n_ready);
26401 insn_size = min_insn_size (insn);
26403 if (/* If this is a too long an insn for a secondary decoder ... */
26404 (!first_cycle_insn_p
26405 && insn_size > core2i7_secondary_decoder_max_insn_size)
26406 /* ... or it would not fit into the ifetch block ... */
26407 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26408 /* ... or the decoder is full already ... */
26409 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26410 /* ... mask the insn out. */
26412 ready_try[n_ready] = 1;
26414 if (data->ready_try_change)
26415 bitmap_set_bit (data->ready_try_change, n_ready);
26420 /* Prepare for a new round of multipass lookahead scheduling. */
26421 static void
26422 core2i7_first_cycle_multipass_begin (void *_data,
26423 signed char *ready_try, int n_ready,
26424 bool first_cycle_insn_p)
26426 ix86_first_cycle_multipass_data_t data
26427 = (ix86_first_cycle_multipass_data_t) _data;
26428 const_ix86_first_cycle_multipass_data_t prev_data
26429 = ix86_first_cycle_multipass_data;
26431 /* Restore the state from the end of the previous round. */
26432 data->ifetch_block_len = prev_data->ifetch_block_len;
26433 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26435 /* Filter instructions that cannot be issued on current cycle due to
26436 decoder restrictions. */
26437 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26438 first_cycle_insn_p);
26441 /* INSN is being issued in current solution. Account for its impact on
26442 the decoder model. */
26443 static void
26444 core2i7_first_cycle_multipass_issue (void *_data,
26445 signed char *ready_try, int n_ready,
26446 rtx insn, const void *_prev_data)
26448 ix86_first_cycle_multipass_data_t data
26449 = (ix86_first_cycle_multipass_data_t) _data;
26450 const_ix86_first_cycle_multipass_data_t prev_data
26451 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26453 int insn_size = min_insn_size (insn);
26455 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26456 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26457 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26458 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26460 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26461 if (!data->ready_try_change)
26463 data->ready_try_change = sbitmap_alloc (n_ready);
26464 data->ready_try_change_size = n_ready;
26466 else if (data->ready_try_change_size < n_ready)
26468 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26469 n_ready, 0);
26470 data->ready_try_change_size = n_ready;
26472 bitmap_clear (data->ready_try_change);
26474 /* Filter out insns from ready_try that the core will not be able to issue
26475 on current cycle due to decoder. */
26476 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26477 false);
26480 /* Revert the effect on ready_try. */
26481 static void
26482 core2i7_first_cycle_multipass_backtrack (const void *_data,
26483 signed char *ready_try,
26484 int n_ready ATTRIBUTE_UNUSED)
26486 const_ix86_first_cycle_multipass_data_t data
26487 = (const_ix86_first_cycle_multipass_data_t) _data;
26488 unsigned int i = 0;
26489 sbitmap_iterator sbi;
26491 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26492 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26494 ready_try[i] = 0;
26498 /* Save the result of multipass lookahead scheduling for the next round. */
26499 static void
26500 core2i7_first_cycle_multipass_end (const void *_data)
26502 const_ix86_first_cycle_multipass_data_t data
26503 = (const_ix86_first_cycle_multipass_data_t) _data;
26504 ix86_first_cycle_multipass_data_t next_data
26505 = ix86_first_cycle_multipass_data;
26507 if (data != NULL)
26509 next_data->ifetch_block_len = data->ifetch_block_len;
26510 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26514 /* Deallocate target data. */
26515 static void
26516 core2i7_first_cycle_multipass_fini (void *_data)
26518 ix86_first_cycle_multipass_data_t data
26519 = (ix86_first_cycle_multipass_data_t) _data;
26521 if (data->ready_try_change)
26523 sbitmap_free (data->ready_try_change);
26524 data->ready_try_change = NULL;
26525 data->ready_try_change_size = 0;
26529 /* Prepare for scheduling pass. */
26530 static void
26531 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26532 int verbose ATTRIBUTE_UNUSED,
26533 int max_uid ATTRIBUTE_UNUSED)
26535 /* Install scheduling hooks for current CPU. Some of these hooks are used
26536 in time-critical parts of the scheduler, so we only set them up when
26537 they are actually used. */
26538 switch (ix86_tune)
26540 case PROCESSOR_CORE2:
26541 case PROCESSOR_NEHALEM:
26542 case PROCESSOR_SANDYBRIDGE:
26543 case PROCESSOR_HASWELL:
26544 /* Do not perform multipass scheduling for pre-reload schedule
26545 to save compile time. */
26546 if (reload_completed)
26548 targetm.sched.dfa_post_advance_cycle
26549 = core2i7_dfa_post_advance_cycle;
26550 targetm.sched.first_cycle_multipass_init
26551 = core2i7_first_cycle_multipass_init;
26552 targetm.sched.first_cycle_multipass_begin
26553 = core2i7_first_cycle_multipass_begin;
26554 targetm.sched.first_cycle_multipass_issue
26555 = core2i7_first_cycle_multipass_issue;
26556 targetm.sched.first_cycle_multipass_backtrack
26557 = core2i7_first_cycle_multipass_backtrack;
26558 targetm.sched.first_cycle_multipass_end
26559 = core2i7_first_cycle_multipass_end;
26560 targetm.sched.first_cycle_multipass_fini
26561 = core2i7_first_cycle_multipass_fini;
26563 /* Set decoder parameters. */
26564 core2i7_secondary_decoder_max_insn_size = 8;
26565 core2i7_ifetch_block_size = 16;
26566 core2i7_ifetch_block_max_insns = 6;
26567 break;
26569 /* ... Fall through ... */
26570 default:
26571 targetm.sched.dfa_post_advance_cycle = NULL;
26572 targetm.sched.first_cycle_multipass_init = NULL;
26573 targetm.sched.first_cycle_multipass_begin = NULL;
26574 targetm.sched.first_cycle_multipass_issue = NULL;
26575 targetm.sched.first_cycle_multipass_backtrack = NULL;
26576 targetm.sched.first_cycle_multipass_end = NULL;
26577 targetm.sched.first_cycle_multipass_fini = NULL;
26578 break;
26583 /* Compute the alignment given to a constant that is being placed in memory.
26584 EXP is the constant and ALIGN is the alignment that the object would
26585 ordinarily have.
26586 The value of this function is used instead of that alignment to align
26587 the object. */
26590 ix86_constant_alignment (tree exp, int align)
26592 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26593 || TREE_CODE (exp) == INTEGER_CST)
26595 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26596 return 64;
26597 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26598 return 128;
26600 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26601 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26602 return BITS_PER_WORD;
26604 return align;
26607 /* Compute the alignment for a static variable.
26608 TYPE is the data type, and ALIGN is the alignment that
26609 the object would ordinarily have. The value of this function is used
26610 instead of that alignment to align the object. */
26613 ix86_data_alignment (tree type, int align, bool opt)
26615 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26616 for symbols from other compilation units or symbols that don't need
26617 to bind locally. In order to preserve some ABI compatibility with
26618 those compilers, ensure we don't decrease alignment from what we
26619 used to assume. */
26621 int max_align_compat
26622 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26624 /* A data structure, equal or greater than the size of a cache line
26625 (64 bytes in the Pentium 4 and other recent Intel processors, including
26626 processors based on Intel Core microarchitecture) should be aligned
26627 so that its base address is a multiple of a cache line size. */
26629 int max_align
26630 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26632 if (max_align < BITS_PER_WORD)
26633 max_align = BITS_PER_WORD;
26635 if (opt
26636 && AGGREGATE_TYPE_P (type)
26637 && TYPE_SIZE (type)
26638 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26640 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26641 && align < max_align_compat)
26642 align = max_align_compat;
26643 if (wi::geu_p (TYPE_SIZE (type), max_align)
26644 && align < max_align)
26645 align = max_align;
26648 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26649 to 16byte boundary. */
26650 if (TARGET_64BIT)
26652 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26653 && TYPE_SIZE (type)
26654 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26655 && wi::geu_p (TYPE_SIZE (type), 128)
26656 && align < 128)
26657 return 128;
26660 if (!opt)
26661 return align;
26663 if (TREE_CODE (type) == ARRAY_TYPE)
26665 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26666 return 64;
26667 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26668 return 128;
26670 else if (TREE_CODE (type) == COMPLEX_TYPE)
26673 if (TYPE_MODE (type) == DCmode && align < 64)
26674 return 64;
26675 if ((TYPE_MODE (type) == XCmode
26676 || TYPE_MODE (type) == TCmode) && align < 128)
26677 return 128;
26679 else if ((TREE_CODE (type) == RECORD_TYPE
26680 || TREE_CODE (type) == UNION_TYPE
26681 || TREE_CODE (type) == QUAL_UNION_TYPE)
26682 && TYPE_FIELDS (type))
26684 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26685 return 64;
26686 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26687 return 128;
26689 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26690 || TREE_CODE (type) == INTEGER_TYPE)
26692 if (TYPE_MODE (type) == DFmode && align < 64)
26693 return 64;
26694 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26695 return 128;
26698 return align;
26701 /* Compute the alignment for a local variable or a stack slot. EXP is
26702 the data type or decl itself, MODE is the widest mode available and
26703 ALIGN is the alignment that the object would ordinarily have. The
26704 value of this macro is used instead of that alignment to align the
26705 object. */
26707 unsigned int
26708 ix86_local_alignment (tree exp, enum machine_mode mode,
26709 unsigned int align)
26711 tree type, decl;
26713 if (exp && DECL_P (exp))
26715 type = TREE_TYPE (exp);
26716 decl = exp;
26718 else
26720 type = exp;
26721 decl = NULL;
26724 /* Don't do dynamic stack realignment for long long objects with
26725 -mpreferred-stack-boundary=2. */
26726 if (!TARGET_64BIT
26727 && align == 64
26728 && ix86_preferred_stack_boundary < 64
26729 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26730 && (!type || !TYPE_USER_ALIGN (type))
26731 && (!decl || !DECL_USER_ALIGN (decl)))
26732 align = 32;
26734 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26735 register in MODE. We will return the largest alignment of XF
26736 and DF. */
26737 if (!type)
26739 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26740 align = GET_MODE_ALIGNMENT (DFmode);
26741 return align;
26744 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26745 to 16byte boundary. Exact wording is:
26747 An array uses the same alignment as its elements, except that a local or
26748 global array variable of length at least 16 bytes or
26749 a C99 variable-length array variable always has alignment of at least 16 bytes.
26751 This was added to allow use of aligned SSE instructions at arrays. This
26752 rule is meant for static storage (where compiler can not do the analysis
26753 by itself). We follow it for automatic variables only when convenient.
26754 We fully control everything in the function compiled and functions from
26755 other unit can not rely on the alignment.
26757 Exclude va_list type. It is the common case of local array where
26758 we can not benefit from the alignment.
26760 TODO: Probably one should optimize for size only when var is not escaping. */
26761 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26762 && TARGET_SSE)
26764 if (AGGREGATE_TYPE_P (type)
26765 && (va_list_type_node == NULL_TREE
26766 || (TYPE_MAIN_VARIANT (type)
26767 != TYPE_MAIN_VARIANT (va_list_type_node)))
26768 && TYPE_SIZE (type)
26769 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26770 && wi::geu_p (TYPE_SIZE (type), 16)
26771 && align < 128)
26772 return 128;
26774 if (TREE_CODE (type) == ARRAY_TYPE)
26776 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26777 return 64;
26778 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26779 return 128;
26781 else if (TREE_CODE (type) == COMPLEX_TYPE)
26783 if (TYPE_MODE (type) == DCmode && align < 64)
26784 return 64;
26785 if ((TYPE_MODE (type) == XCmode
26786 || TYPE_MODE (type) == TCmode) && align < 128)
26787 return 128;
26789 else if ((TREE_CODE (type) == RECORD_TYPE
26790 || TREE_CODE (type) == UNION_TYPE
26791 || TREE_CODE (type) == QUAL_UNION_TYPE)
26792 && TYPE_FIELDS (type))
26794 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26795 return 64;
26796 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26797 return 128;
26799 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26800 || TREE_CODE (type) == INTEGER_TYPE)
26803 if (TYPE_MODE (type) == DFmode && align < 64)
26804 return 64;
26805 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26806 return 128;
26808 return align;
26811 /* Compute the minimum required alignment for dynamic stack realignment
26812 purposes for a local variable, parameter or a stack slot. EXP is
26813 the data type or decl itself, MODE is its mode and ALIGN is the
26814 alignment that the object would ordinarily have. */
26816 unsigned int
26817 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26818 unsigned int align)
26820 tree type, decl;
26822 if (exp && DECL_P (exp))
26824 type = TREE_TYPE (exp);
26825 decl = exp;
26827 else
26829 type = exp;
26830 decl = NULL;
26833 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26834 return align;
26836 /* Don't do dynamic stack realignment for long long objects with
26837 -mpreferred-stack-boundary=2. */
26838 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26839 && (!type || !TYPE_USER_ALIGN (type))
26840 && (!decl || !DECL_USER_ALIGN (decl)))
26841 return 32;
26843 return align;
26846 /* Find a location for the static chain incoming to a nested function.
26847 This is a register, unless all free registers are used by arguments. */
26849 static rtx
26850 ix86_static_chain (const_tree fndecl, bool incoming_p)
26852 unsigned regno;
26854 if (!DECL_STATIC_CHAIN (fndecl))
26855 return NULL;
26857 if (TARGET_64BIT)
26859 /* We always use R10 in 64-bit mode. */
26860 regno = R10_REG;
26862 else
26864 tree fntype;
26865 unsigned int ccvt;
26867 /* By default in 32-bit mode we use ECX to pass the static chain. */
26868 regno = CX_REG;
26870 fntype = TREE_TYPE (fndecl);
26871 ccvt = ix86_get_callcvt (fntype);
26872 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26874 /* Fastcall functions use ecx/edx for arguments, which leaves
26875 us with EAX for the static chain.
26876 Thiscall functions use ecx for arguments, which also
26877 leaves us with EAX for the static chain. */
26878 regno = AX_REG;
26880 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26882 /* Thiscall functions use ecx for arguments, which leaves
26883 us with EAX and EDX for the static chain.
26884 We are using for abi-compatibility EAX. */
26885 regno = AX_REG;
26887 else if (ix86_function_regparm (fntype, fndecl) == 3)
26889 /* For regparm 3, we have no free call-clobbered registers in
26890 which to store the static chain. In order to implement this,
26891 we have the trampoline push the static chain to the stack.
26892 However, we can't push a value below the return address when
26893 we call the nested function directly, so we have to use an
26894 alternate entry point. For this we use ESI, and have the
26895 alternate entry point push ESI, so that things appear the
26896 same once we're executing the nested function. */
26897 if (incoming_p)
26899 if (fndecl == current_function_decl)
26900 ix86_static_chain_on_stack = true;
26901 return gen_frame_mem (SImode,
26902 plus_constant (Pmode,
26903 arg_pointer_rtx, -8));
26905 regno = SI_REG;
26909 return gen_rtx_REG (Pmode, regno);
26912 /* Emit RTL insns to initialize the variable parts of a trampoline.
26913 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26914 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26915 to be passed to the target function. */
26917 static void
26918 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26920 rtx mem, fnaddr;
26921 int opcode;
26922 int offset = 0;
26924 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26926 if (TARGET_64BIT)
26928 int size;
26930 /* Load the function address to r11. Try to load address using
26931 the shorter movl instead of movabs. We may want to support
26932 movq for kernel mode, but kernel does not use trampolines at
26933 the moment. FNADDR is a 32bit address and may not be in
26934 DImode when ptr_mode == SImode. Always use movl in this
26935 case. */
26936 if (ptr_mode == SImode
26937 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26939 fnaddr = copy_addr_to_reg (fnaddr);
26941 mem = adjust_address (m_tramp, HImode, offset);
26942 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26944 mem = adjust_address (m_tramp, SImode, offset + 2);
26945 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26946 offset += 6;
26948 else
26950 mem = adjust_address (m_tramp, HImode, offset);
26951 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26953 mem = adjust_address (m_tramp, DImode, offset + 2);
26954 emit_move_insn (mem, fnaddr);
26955 offset += 10;
26958 /* Load static chain using movabs to r10. Use the shorter movl
26959 instead of movabs when ptr_mode == SImode. */
26960 if (ptr_mode == SImode)
26962 opcode = 0xba41;
26963 size = 6;
26965 else
26967 opcode = 0xba49;
26968 size = 10;
26971 mem = adjust_address (m_tramp, HImode, offset);
26972 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26974 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26975 emit_move_insn (mem, chain_value);
26976 offset += size;
26978 /* Jump to r11; the last (unused) byte is a nop, only there to
26979 pad the write out to a single 32-bit store. */
26980 mem = adjust_address (m_tramp, SImode, offset);
26981 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26982 offset += 4;
26984 else
26986 rtx disp, chain;
26988 /* Depending on the static chain location, either load a register
26989 with a constant, or push the constant to the stack. All of the
26990 instructions are the same size. */
26991 chain = ix86_static_chain (fndecl, true);
26992 if (REG_P (chain))
26994 switch (REGNO (chain))
26996 case AX_REG:
26997 opcode = 0xb8; break;
26998 case CX_REG:
26999 opcode = 0xb9; break;
27000 default:
27001 gcc_unreachable ();
27004 else
27005 opcode = 0x68;
27007 mem = adjust_address (m_tramp, QImode, offset);
27008 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27010 mem = adjust_address (m_tramp, SImode, offset + 1);
27011 emit_move_insn (mem, chain_value);
27012 offset += 5;
27014 mem = adjust_address (m_tramp, QImode, offset);
27015 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27017 mem = adjust_address (m_tramp, SImode, offset + 1);
27019 /* Compute offset from the end of the jmp to the target function.
27020 In the case in which the trampoline stores the static chain on
27021 the stack, we need to skip the first insn which pushes the
27022 (call-saved) register static chain; this push is 1 byte. */
27023 offset += 5;
27024 disp = expand_binop (SImode, sub_optab, fnaddr,
27025 plus_constant (Pmode, XEXP (m_tramp, 0),
27026 offset - (MEM_P (chain) ? 1 : 0)),
27027 NULL_RTX, 1, OPTAB_DIRECT);
27028 emit_move_insn (mem, disp);
27031 gcc_assert (offset <= TRAMPOLINE_SIZE);
27033 #ifdef HAVE_ENABLE_EXECUTE_STACK
27034 #ifdef CHECK_EXECUTE_STACK_ENABLED
27035 if (CHECK_EXECUTE_STACK_ENABLED)
27036 #endif
27037 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27038 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27039 #endif
27042 /* The following file contains several enumerations and data structures
27043 built from the definitions in i386-builtin-types.def. */
27045 #include "i386-builtin-types.inc"
27047 /* Table for the ix86 builtin non-function types. */
27048 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27050 /* Retrieve an element from the above table, building some of
27051 the types lazily. */
27053 static tree
27054 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27056 unsigned int index;
27057 tree type, itype;
27059 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27061 type = ix86_builtin_type_tab[(int) tcode];
27062 if (type != NULL)
27063 return type;
27065 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27066 if (tcode <= IX86_BT_LAST_VECT)
27068 enum machine_mode mode;
27070 index = tcode - IX86_BT_LAST_PRIM - 1;
27071 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27072 mode = ix86_builtin_type_vect_mode[index];
27074 type = build_vector_type_for_mode (itype, mode);
27076 else
27078 int quals;
27080 index = tcode - IX86_BT_LAST_VECT - 1;
27081 if (tcode <= IX86_BT_LAST_PTR)
27082 quals = TYPE_UNQUALIFIED;
27083 else
27084 quals = TYPE_QUAL_CONST;
27086 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27087 if (quals != TYPE_UNQUALIFIED)
27088 itype = build_qualified_type (itype, quals);
27090 type = build_pointer_type (itype);
27093 ix86_builtin_type_tab[(int) tcode] = type;
27094 return type;
27097 /* Table for the ix86 builtin function types. */
27098 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27100 /* Retrieve an element from the above table, building some of
27101 the types lazily. */
27103 static tree
27104 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27106 tree type;
27108 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27110 type = ix86_builtin_func_type_tab[(int) tcode];
27111 if (type != NULL)
27112 return type;
27114 if (tcode <= IX86_BT_LAST_FUNC)
27116 unsigned start = ix86_builtin_func_start[(int) tcode];
27117 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27118 tree rtype, atype, args = void_list_node;
27119 unsigned i;
27121 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27122 for (i = after - 1; i > start; --i)
27124 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27125 args = tree_cons (NULL, atype, args);
27128 type = build_function_type (rtype, args);
27130 else
27132 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27133 enum ix86_builtin_func_type icode;
27135 icode = ix86_builtin_func_alias_base[index];
27136 type = ix86_get_builtin_func_type (icode);
27139 ix86_builtin_func_type_tab[(int) tcode] = type;
27140 return type;
27144 /* Codes for all the SSE/MMX builtins. */
27145 enum ix86_builtins
27147 IX86_BUILTIN_ADDPS,
27148 IX86_BUILTIN_ADDSS,
27149 IX86_BUILTIN_DIVPS,
27150 IX86_BUILTIN_DIVSS,
27151 IX86_BUILTIN_MULPS,
27152 IX86_BUILTIN_MULSS,
27153 IX86_BUILTIN_SUBPS,
27154 IX86_BUILTIN_SUBSS,
27156 IX86_BUILTIN_CMPEQPS,
27157 IX86_BUILTIN_CMPLTPS,
27158 IX86_BUILTIN_CMPLEPS,
27159 IX86_BUILTIN_CMPGTPS,
27160 IX86_BUILTIN_CMPGEPS,
27161 IX86_BUILTIN_CMPNEQPS,
27162 IX86_BUILTIN_CMPNLTPS,
27163 IX86_BUILTIN_CMPNLEPS,
27164 IX86_BUILTIN_CMPNGTPS,
27165 IX86_BUILTIN_CMPNGEPS,
27166 IX86_BUILTIN_CMPORDPS,
27167 IX86_BUILTIN_CMPUNORDPS,
27168 IX86_BUILTIN_CMPEQSS,
27169 IX86_BUILTIN_CMPLTSS,
27170 IX86_BUILTIN_CMPLESS,
27171 IX86_BUILTIN_CMPNEQSS,
27172 IX86_BUILTIN_CMPNLTSS,
27173 IX86_BUILTIN_CMPNLESS,
27174 IX86_BUILTIN_CMPORDSS,
27175 IX86_BUILTIN_CMPUNORDSS,
27177 IX86_BUILTIN_COMIEQSS,
27178 IX86_BUILTIN_COMILTSS,
27179 IX86_BUILTIN_COMILESS,
27180 IX86_BUILTIN_COMIGTSS,
27181 IX86_BUILTIN_COMIGESS,
27182 IX86_BUILTIN_COMINEQSS,
27183 IX86_BUILTIN_UCOMIEQSS,
27184 IX86_BUILTIN_UCOMILTSS,
27185 IX86_BUILTIN_UCOMILESS,
27186 IX86_BUILTIN_UCOMIGTSS,
27187 IX86_BUILTIN_UCOMIGESS,
27188 IX86_BUILTIN_UCOMINEQSS,
27190 IX86_BUILTIN_CVTPI2PS,
27191 IX86_BUILTIN_CVTPS2PI,
27192 IX86_BUILTIN_CVTSI2SS,
27193 IX86_BUILTIN_CVTSI642SS,
27194 IX86_BUILTIN_CVTSS2SI,
27195 IX86_BUILTIN_CVTSS2SI64,
27196 IX86_BUILTIN_CVTTPS2PI,
27197 IX86_BUILTIN_CVTTSS2SI,
27198 IX86_BUILTIN_CVTTSS2SI64,
27200 IX86_BUILTIN_MAXPS,
27201 IX86_BUILTIN_MAXSS,
27202 IX86_BUILTIN_MINPS,
27203 IX86_BUILTIN_MINSS,
27205 IX86_BUILTIN_LOADUPS,
27206 IX86_BUILTIN_STOREUPS,
27207 IX86_BUILTIN_MOVSS,
27209 IX86_BUILTIN_MOVHLPS,
27210 IX86_BUILTIN_MOVLHPS,
27211 IX86_BUILTIN_LOADHPS,
27212 IX86_BUILTIN_LOADLPS,
27213 IX86_BUILTIN_STOREHPS,
27214 IX86_BUILTIN_STORELPS,
27216 IX86_BUILTIN_MASKMOVQ,
27217 IX86_BUILTIN_MOVMSKPS,
27218 IX86_BUILTIN_PMOVMSKB,
27220 IX86_BUILTIN_MOVNTPS,
27221 IX86_BUILTIN_MOVNTQ,
27223 IX86_BUILTIN_LOADDQU,
27224 IX86_BUILTIN_STOREDQU,
27226 IX86_BUILTIN_PACKSSWB,
27227 IX86_BUILTIN_PACKSSDW,
27228 IX86_BUILTIN_PACKUSWB,
27230 IX86_BUILTIN_PADDB,
27231 IX86_BUILTIN_PADDW,
27232 IX86_BUILTIN_PADDD,
27233 IX86_BUILTIN_PADDQ,
27234 IX86_BUILTIN_PADDSB,
27235 IX86_BUILTIN_PADDSW,
27236 IX86_BUILTIN_PADDUSB,
27237 IX86_BUILTIN_PADDUSW,
27238 IX86_BUILTIN_PSUBB,
27239 IX86_BUILTIN_PSUBW,
27240 IX86_BUILTIN_PSUBD,
27241 IX86_BUILTIN_PSUBQ,
27242 IX86_BUILTIN_PSUBSB,
27243 IX86_BUILTIN_PSUBSW,
27244 IX86_BUILTIN_PSUBUSB,
27245 IX86_BUILTIN_PSUBUSW,
27247 IX86_BUILTIN_PAND,
27248 IX86_BUILTIN_PANDN,
27249 IX86_BUILTIN_POR,
27250 IX86_BUILTIN_PXOR,
27252 IX86_BUILTIN_PAVGB,
27253 IX86_BUILTIN_PAVGW,
27255 IX86_BUILTIN_PCMPEQB,
27256 IX86_BUILTIN_PCMPEQW,
27257 IX86_BUILTIN_PCMPEQD,
27258 IX86_BUILTIN_PCMPGTB,
27259 IX86_BUILTIN_PCMPGTW,
27260 IX86_BUILTIN_PCMPGTD,
27262 IX86_BUILTIN_PMADDWD,
27264 IX86_BUILTIN_PMAXSW,
27265 IX86_BUILTIN_PMAXUB,
27266 IX86_BUILTIN_PMINSW,
27267 IX86_BUILTIN_PMINUB,
27269 IX86_BUILTIN_PMULHUW,
27270 IX86_BUILTIN_PMULHW,
27271 IX86_BUILTIN_PMULLW,
27273 IX86_BUILTIN_PSADBW,
27274 IX86_BUILTIN_PSHUFW,
27276 IX86_BUILTIN_PSLLW,
27277 IX86_BUILTIN_PSLLD,
27278 IX86_BUILTIN_PSLLQ,
27279 IX86_BUILTIN_PSRAW,
27280 IX86_BUILTIN_PSRAD,
27281 IX86_BUILTIN_PSRLW,
27282 IX86_BUILTIN_PSRLD,
27283 IX86_BUILTIN_PSRLQ,
27284 IX86_BUILTIN_PSLLWI,
27285 IX86_BUILTIN_PSLLDI,
27286 IX86_BUILTIN_PSLLQI,
27287 IX86_BUILTIN_PSRAWI,
27288 IX86_BUILTIN_PSRADI,
27289 IX86_BUILTIN_PSRLWI,
27290 IX86_BUILTIN_PSRLDI,
27291 IX86_BUILTIN_PSRLQI,
27293 IX86_BUILTIN_PUNPCKHBW,
27294 IX86_BUILTIN_PUNPCKHWD,
27295 IX86_BUILTIN_PUNPCKHDQ,
27296 IX86_BUILTIN_PUNPCKLBW,
27297 IX86_BUILTIN_PUNPCKLWD,
27298 IX86_BUILTIN_PUNPCKLDQ,
27300 IX86_BUILTIN_SHUFPS,
27302 IX86_BUILTIN_RCPPS,
27303 IX86_BUILTIN_RCPSS,
27304 IX86_BUILTIN_RSQRTPS,
27305 IX86_BUILTIN_RSQRTPS_NR,
27306 IX86_BUILTIN_RSQRTSS,
27307 IX86_BUILTIN_RSQRTF,
27308 IX86_BUILTIN_SQRTPS,
27309 IX86_BUILTIN_SQRTPS_NR,
27310 IX86_BUILTIN_SQRTSS,
27312 IX86_BUILTIN_UNPCKHPS,
27313 IX86_BUILTIN_UNPCKLPS,
27315 IX86_BUILTIN_ANDPS,
27316 IX86_BUILTIN_ANDNPS,
27317 IX86_BUILTIN_ORPS,
27318 IX86_BUILTIN_XORPS,
27320 IX86_BUILTIN_EMMS,
27321 IX86_BUILTIN_LDMXCSR,
27322 IX86_BUILTIN_STMXCSR,
27323 IX86_BUILTIN_SFENCE,
27325 IX86_BUILTIN_FXSAVE,
27326 IX86_BUILTIN_FXRSTOR,
27327 IX86_BUILTIN_FXSAVE64,
27328 IX86_BUILTIN_FXRSTOR64,
27330 IX86_BUILTIN_XSAVE,
27331 IX86_BUILTIN_XRSTOR,
27332 IX86_BUILTIN_XSAVE64,
27333 IX86_BUILTIN_XRSTOR64,
27335 IX86_BUILTIN_XSAVEOPT,
27336 IX86_BUILTIN_XSAVEOPT64,
27338 IX86_BUILTIN_XSAVEC,
27339 IX86_BUILTIN_XSAVEC64,
27341 IX86_BUILTIN_XSAVES,
27342 IX86_BUILTIN_XRSTORS,
27343 IX86_BUILTIN_XSAVES64,
27344 IX86_BUILTIN_XRSTORS64,
27346 /* 3DNow! Original */
27347 IX86_BUILTIN_FEMMS,
27348 IX86_BUILTIN_PAVGUSB,
27349 IX86_BUILTIN_PF2ID,
27350 IX86_BUILTIN_PFACC,
27351 IX86_BUILTIN_PFADD,
27352 IX86_BUILTIN_PFCMPEQ,
27353 IX86_BUILTIN_PFCMPGE,
27354 IX86_BUILTIN_PFCMPGT,
27355 IX86_BUILTIN_PFMAX,
27356 IX86_BUILTIN_PFMIN,
27357 IX86_BUILTIN_PFMUL,
27358 IX86_BUILTIN_PFRCP,
27359 IX86_BUILTIN_PFRCPIT1,
27360 IX86_BUILTIN_PFRCPIT2,
27361 IX86_BUILTIN_PFRSQIT1,
27362 IX86_BUILTIN_PFRSQRT,
27363 IX86_BUILTIN_PFSUB,
27364 IX86_BUILTIN_PFSUBR,
27365 IX86_BUILTIN_PI2FD,
27366 IX86_BUILTIN_PMULHRW,
27368 /* 3DNow! Athlon Extensions */
27369 IX86_BUILTIN_PF2IW,
27370 IX86_BUILTIN_PFNACC,
27371 IX86_BUILTIN_PFPNACC,
27372 IX86_BUILTIN_PI2FW,
27373 IX86_BUILTIN_PSWAPDSI,
27374 IX86_BUILTIN_PSWAPDSF,
27376 /* SSE2 */
27377 IX86_BUILTIN_ADDPD,
27378 IX86_BUILTIN_ADDSD,
27379 IX86_BUILTIN_DIVPD,
27380 IX86_BUILTIN_DIVSD,
27381 IX86_BUILTIN_MULPD,
27382 IX86_BUILTIN_MULSD,
27383 IX86_BUILTIN_SUBPD,
27384 IX86_BUILTIN_SUBSD,
27386 IX86_BUILTIN_CMPEQPD,
27387 IX86_BUILTIN_CMPLTPD,
27388 IX86_BUILTIN_CMPLEPD,
27389 IX86_BUILTIN_CMPGTPD,
27390 IX86_BUILTIN_CMPGEPD,
27391 IX86_BUILTIN_CMPNEQPD,
27392 IX86_BUILTIN_CMPNLTPD,
27393 IX86_BUILTIN_CMPNLEPD,
27394 IX86_BUILTIN_CMPNGTPD,
27395 IX86_BUILTIN_CMPNGEPD,
27396 IX86_BUILTIN_CMPORDPD,
27397 IX86_BUILTIN_CMPUNORDPD,
27398 IX86_BUILTIN_CMPEQSD,
27399 IX86_BUILTIN_CMPLTSD,
27400 IX86_BUILTIN_CMPLESD,
27401 IX86_BUILTIN_CMPNEQSD,
27402 IX86_BUILTIN_CMPNLTSD,
27403 IX86_BUILTIN_CMPNLESD,
27404 IX86_BUILTIN_CMPORDSD,
27405 IX86_BUILTIN_CMPUNORDSD,
27407 IX86_BUILTIN_COMIEQSD,
27408 IX86_BUILTIN_COMILTSD,
27409 IX86_BUILTIN_COMILESD,
27410 IX86_BUILTIN_COMIGTSD,
27411 IX86_BUILTIN_COMIGESD,
27412 IX86_BUILTIN_COMINEQSD,
27413 IX86_BUILTIN_UCOMIEQSD,
27414 IX86_BUILTIN_UCOMILTSD,
27415 IX86_BUILTIN_UCOMILESD,
27416 IX86_BUILTIN_UCOMIGTSD,
27417 IX86_BUILTIN_UCOMIGESD,
27418 IX86_BUILTIN_UCOMINEQSD,
27420 IX86_BUILTIN_MAXPD,
27421 IX86_BUILTIN_MAXSD,
27422 IX86_BUILTIN_MINPD,
27423 IX86_BUILTIN_MINSD,
27425 IX86_BUILTIN_ANDPD,
27426 IX86_BUILTIN_ANDNPD,
27427 IX86_BUILTIN_ORPD,
27428 IX86_BUILTIN_XORPD,
27430 IX86_BUILTIN_SQRTPD,
27431 IX86_BUILTIN_SQRTSD,
27433 IX86_BUILTIN_UNPCKHPD,
27434 IX86_BUILTIN_UNPCKLPD,
27436 IX86_BUILTIN_SHUFPD,
27438 IX86_BUILTIN_LOADUPD,
27439 IX86_BUILTIN_STOREUPD,
27440 IX86_BUILTIN_MOVSD,
27442 IX86_BUILTIN_LOADHPD,
27443 IX86_BUILTIN_LOADLPD,
27445 IX86_BUILTIN_CVTDQ2PD,
27446 IX86_BUILTIN_CVTDQ2PS,
27448 IX86_BUILTIN_CVTPD2DQ,
27449 IX86_BUILTIN_CVTPD2PI,
27450 IX86_BUILTIN_CVTPD2PS,
27451 IX86_BUILTIN_CVTTPD2DQ,
27452 IX86_BUILTIN_CVTTPD2PI,
27454 IX86_BUILTIN_CVTPI2PD,
27455 IX86_BUILTIN_CVTSI2SD,
27456 IX86_BUILTIN_CVTSI642SD,
27458 IX86_BUILTIN_CVTSD2SI,
27459 IX86_BUILTIN_CVTSD2SI64,
27460 IX86_BUILTIN_CVTSD2SS,
27461 IX86_BUILTIN_CVTSS2SD,
27462 IX86_BUILTIN_CVTTSD2SI,
27463 IX86_BUILTIN_CVTTSD2SI64,
27465 IX86_BUILTIN_CVTPS2DQ,
27466 IX86_BUILTIN_CVTPS2PD,
27467 IX86_BUILTIN_CVTTPS2DQ,
27469 IX86_BUILTIN_MOVNTI,
27470 IX86_BUILTIN_MOVNTI64,
27471 IX86_BUILTIN_MOVNTPD,
27472 IX86_BUILTIN_MOVNTDQ,
27474 IX86_BUILTIN_MOVQ128,
27476 /* SSE2 MMX */
27477 IX86_BUILTIN_MASKMOVDQU,
27478 IX86_BUILTIN_MOVMSKPD,
27479 IX86_BUILTIN_PMOVMSKB128,
27481 IX86_BUILTIN_PACKSSWB128,
27482 IX86_BUILTIN_PACKSSDW128,
27483 IX86_BUILTIN_PACKUSWB128,
27485 IX86_BUILTIN_PADDB128,
27486 IX86_BUILTIN_PADDW128,
27487 IX86_BUILTIN_PADDD128,
27488 IX86_BUILTIN_PADDQ128,
27489 IX86_BUILTIN_PADDSB128,
27490 IX86_BUILTIN_PADDSW128,
27491 IX86_BUILTIN_PADDUSB128,
27492 IX86_BUILTIN_PADDUSW128,
27493 IX86_BUILTIN_PSUBB128,
27494 IX86_BUILTIN_PSUBW128,
27495 IX86_BUILTIN_PSUBD128,
27496 IX86_BUILTIN_PSUBQ128,
27497 IX86_BUILTIN_PSUBSB128,
27498 IX86_BUILTIN_PSUBSW128,
27499 IX86_BUILTIN_PSUBUSB128,
27500 IX86_BUILTIN_PSUBUSW128,
27502 IX86_BUILTIN_PAND128,
27503 IX86_BUILTIN_PANDN128,
27504 IX86_BUILTIN_POR128,
27505 IX86_BUILTIN_PXOR128,
27507 IX86_BUILTIN_PAVGB128,
27508 IX86_BUILTIN_PAVGW128,
27510 IX86_BUILTIN_PCMPEQB128,
27511 IX86_BUILTIN_PCMPEQW128,
27512 IX86_BUILTIN_PCMPEQD128,
27513 IX86_BUILTIN_PCMPGTB128,
27514 IX86_BUILTIN_PCMPGTW128,
27515 IX86_BUILTIN_PCMPGTD128,
27517 IX86_BUILTIN_PMADDWD128,
27519 IX86_BUILTIN_PMAXSW128,
27520 IX86_BUILTIN_PMAXUB128,
27521 IX86_BUILTIN_PMINSW128,
27522 IX86_BUILTIN_PMINUB128,
27524 IX86_BUILTIN_PMULUDQ,
27525 IX86_BUILTIN_PMULUDQ128,
27526 IX86_BUILTIN_PMULHUW128,
27527 IX86_BUILTIN_PMULHW128,
27528 IX86_BUILTIN_PMULLW128,
27530 IX86_BUILTIN_PSADBW128,
27531 IX86_BUILTIN_PSHUFHW,
27532 IX86_BUILTIN_PSHUFLW,
27533 IX86_BUILTIN_PSHUFD,
27535 IX86_BUILTIN_PSLLDQI128,
27536 IX86_BUILTIN_PSLLWI128,
27537 IX86_BUILTIN_PSLLDI128,
27538 IX86_BUILTIN_PSLLQI128,
27539 IX86_BUILTIN_PSRAWI128,
27540 IX86_BUILTIN_PSRADI128,
27541 IX86_BUILTIN_PSRLDQI128,
27542 IX86_BUILTIN_PSRLWI128,
27543 IX86_BUILTIN_PSRLDI128,
27544 IX86_BUILTIN_PSRLQI128,
27546 IX86_BUILTIN_PSLLDQ128,
27547 IX86_BUILTIN_PSLLW128,
27548 IX86_BUILTIN_PSLLD128,
27549 IX86_BUILTIN_PSLLQ128,
27550 IX86_BUILTIN_PSRAW128,
27551 IX86_BUILTIN_PSRAD128,
27552 IX86_BUILTIN_PSRLW128,
27553 IX86_BUILTIN_PSRLD128,
27554 IX86_BUILTIN_PSRLQ128,
27556 IX86_BUILTIN_PUNPCKHBW128,
27557 IX86_BUILTIN_PUNPCKHWD128,
27558 IX86_BUILTIN_PUNPCKHDQ128,
27559 IX86_BUILTIN_PUNPCKHQDQ128,
27560 IX86_BUILTIN_PUNPCKLBW128,
27561 IX86_BUILTIN_PUNPCKLWD128,
27562 IX86_BUILTIN_PUNPCKLDQ128,
27563 IX86_BUILTIN_PUNPCKLQDQ128,
27565 IX86_BUILTIN_CLFLUSH,
27566 IX86_BUILTIN_MFENCE,
27567 IX86_BUILTIN_LFENCE,
27568 IX86_BUILTIN_PAUSE,
27570 IX86_BUILTIN_FNSTENV,
27571 IX86_BUILTIN_FLDENV,
27572 IX86_BUILTIN_FNSTSW,
27573 IX86_BUILTIN_FNCLEX,
27575 IX86_BUILTIN_BSRSI,
27576 IX86_BUILTIN_BSRDI,
27577 IX86_BUILTIN_RDPMC,
27578 IX86_BUILTIN_RDTSC,
27579 IX86_BUILTIN_RDTSCP,
27580 IX86_BUILTIN_ROLQI,
27581 IX86_BUILTIN_ROLHI,
27582 IX86_BUILTIN_RORQI,
27583 IX86_BUILTIN_RORHI,
27585 /* SSE3. */
27586 IX86_BUILTIN_ADDSUBPS,
27587 IX86_BUILTIN_HADDPS,
27588 IX86_BUILTIN_HSUBPS,
27589 IX86_BUILTIN_MOVSHDUP,
27590 IX86_BUILTIN_MOVSLDUP,
27591 IX86_BUILTIN_ADDSUBPD,
27592 IX86_BUILTIN_HADDPD,
27593 IX86_BUILTIN_HSUBPD,
27594 IX86_BUILTIN_LDDQU,
27596 IX86_BUILTIN_MONITOR,
27597 IX86_BUILTIN_MWAIT,
27599 /* SSSE3. */
27600 IX86_BUILTIN_PHADDW,
27601 IX86_BUILTIN_PHADDD,
27602 IX86_BUILTIN_PHADDSW,
27603 IX86_BUILTIN_PHSUBW,
27604 IX86_BUILTIN_PHSUBD,
27605 IX86_BUILTIN_PHSUBSW,
27606 IX86_BUILTIN_PMADDUBSW,
27607 IX86_BUILTIN_PMULHRSW,
27608 IX86_BUILTIN_PSHUFB,
27609 IX86_BUILTIN_PSIGNB,
27610 IX86_BUILTIN_PSIGNW,
27611 IX86_BUILTIN_PSIGND,
27612 IX86_BUILTIN_PALIGNR,
27613 IX86_BUILTIN_PABSB,
27614 IX86_BUILTIN_PABSW,
27615 IX86_BUILTIN_PABSD,
27617 IX86_BUILTIN_PHADDW128,
27618 IX86_BUILTIN_PHADDD128,
27619 IX86_BUILTIN_PHADDSW128,
27620 IX86_BUILTIN_PHSUBW128,
27621 IX86_BUILTIN_PHSUBD128,
27622 IX86_BUILTIN_PHSUBSW128,
27623 IX86_BUILTIN_PMADDUBSW128,
27624 IX86_BUILTIN_PMULHRSW128,
27625 IX86_BUILTIN_PSHUFB128,
27626 IX86_BUILTIN_PSIGNB128,
27627 IX86_BUILTIN_PSIGNW128,
27628 IX86_BUILTIN_PSIGND128,
27629 IX86_BUILTIN_PALIGNR128,
27630 IX86_BUILTIN_PABSB128,
27631 IX86_BUILTIN_PABSW128,
27632 IX86_BUILTIN_PABSD128,
27634 /* AMDFAM10 - SSE4A New Instructions. */
27635 IX86_BUILTIN_MOVNTSD,
27636 IX86_BUILTIN_MOVNTSS,
27637 IX86_BUILTIN_EXTRQI,
27638 IX86_BUILTIN_EXTRQ,
27639 IX86_BUILTIN_INSERTQI,
27640 IX86_BUILTIN_INSERTQ,
27642 /* SSE4.1. */
27643 IX86_BUILTIN_BLENDPD,
27644 IX86_BUILTIN_BLENDPS,
27645 IX86_BUILTIN_BLENDVPD,
27646 IX86_BUILTIN_BLENDVPS,
27647 IX86_BUILTIN_PBLENDVB128,
27648 IX86_BUILTIN_PBLENDW128,
27650 IX86_BUILTIN_DPPD,
27651 IX86_BUILTIN_DPPS,
27653 IX86_BUILTIN_INSERTPS128,
27655 IX86_BUILTIN_MOVNTDQA,
27656 IX86_BUILTIN_MPSADBW128,
27657 IX86_BUILTIN_PACKUSDW128,
27658 IX86_BUILTIN_PCMPEQQ,
27659 IX86_BUILTIN_PHMINPOSUW128,
27661 IX86_BUILTIN_PMAXSB128,
27662 IX86_BUILTIN_PMAXSD128,
27663 IX86_BUILTIN_PMAXUD128,
27664 IX86_BUILTIN_PMAXUW128,
27666 IX86_BUILTIN_PMINSB128,
27667 IX86_BUILTIN_PMINSD128,
27668 IX86_BUILTIN_PMINUD128,
27669 IX86_BUILTIN_PMINUW128,
27671 IX86_BUILTIN_PMOVSXBW128,
27672 IX86_BUILTIN_PMOVSXBD128,
27673 IX86_BUILTIN_PMOVSXBQ128,
27674 IX86_BUILTIN_PMOVSXWD128,
27675 IX86_BUILTIN_PMOVSXWQ128,
27676 IX86_BUILTIN_PMOVSXDQ128,
27678 IX86_BUILTIN_PMOVZXBW128,
27679 IX86_BUILTIN_PMOVZXBD128,
27680 IX86_BUILTIN_PMOVZXBQ128,
27681 IX86_BUILTIN_PMOVZXWD128,
27682 IX86_BUILTIN_PMOVZXWQ128,
27683 IX86_BUILTIN_PMOVZXDQ128,
27685 IX86_BUILTIN_PMULDQ128,
27686 IX86_BUILTIN_PMULLD128,
27688 IX86_BUILTIN_ROUNDSD,
27689 IX86_BUILTIN_ROUNDSS,
27691 IX86_BUILTIN_ROUNDPD,
27692 IX86_BUILTIN_ROUNDPS,
27694 IX86_BUILTIN_FLOORPD,
27695 IX86_BUILTIN_CEILPD,
27696 IX86_BUILTIN_TRUNCPD,
27697 IX86_BUILTIN_RINTPD,
27698 IX86_BUILTIN_ROUNDPD_AZ,
27700 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27701 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27702 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27704 IX86_BUILTIN_FLOORPS,
27705 IX86_BUILTIN_CEILPS,
27706 IX86_BUILTIN_TRUNCPS,
27707 IX86_BUILTIN_RINTPS,
27708 IX86_BUILTIN_ROUNDPS_AZ,
27710 IX86_BUILTIN_FLOORPS_SFIX,
27711 IX86_BUILTIN_CEILPS_SFIX,
27712 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27714 IX86_BUILTIN_PTESTZ,
27715 IX86_BUILTIN_PTESTC,
27716 IX86_BUILTIN_PTESTNZC,
27718 IX86_BUILTIN_VEC_INIT_V2SI,
27719 IX86_BUILTIN_VEC_INIT_V4HI,
27720 IX86_BUILTIN_VEC_INIT_V8QI,
27721 IX86_BUILTIN_VEC_EXT_V2DF,
27722 IX86_BUILTIN_VEC_EXT_V2DI,
27723 IX86_BUILTIN_VEC_EXT_V4SF,
27724 IX86_BUILTIN_VEC_EXT_V4SI,
27725 IX86_BUILTIN_VEC_EXT_V8HI,
27726 IX86_BUILTIN_VEC_EXT_V2SI,
27727 IX86_BUILTIN_VEC_EXT_V4HI,
27728 IX86_BUILTIN_VEC_EXT_V16QI,
27729 IX86_BUILTIN_VEC_SET_V2DI,
27730 IX86_BUILTIN_VEC_SET_V4SF,
27731 IX86_BUILTIN_VEC_SET_V4SI,
27732 IX86_BUILTIN_VEC_SET_V8HI,
27733 IX86_BUILTIN_VEC_SET_V4HI,
27734 IX86_BUILTIN_VEC_SET_V16QI,
27736 IX86_BUILTIN_VEC_PACK_SFIX,
27737 IX86_BUILTIN_VEC_PACK_SFIX256,
27739 /* SSE4.2. */
27740 IX86_BUILTIN_CRC32QI,
27741 IX86_BUILTIN_CRC32HI,
27742 IX86_BUILTIN_CRC32SI,
27743 IX86_BUILTIN_CRC32DI,
27745 IX86_BUILTIN_PCMPESTRI128,
27746 IX86_BUILTIN_PCMPESTRM128,
27747 IX86_BUILTIN_PCMPESTRA128,
27748 IX86_BUILTIN_PCMPESTRC128,
27749 IX86_BUILTIN_PCMPESTRO128,
27750 IX86_BUILTIN_PCMPESTRS128,
27751 IX86_BUILTIN_PCMPESTRZ128,
27752 IX86_BUILTIN_PCMPISTRI128,
27753 IX86_BUILTIN_PCMPISTRM128,
27754 IX86_BUILTIN_PCMPISTRA128,
27755 IX86_BUILTIN_PCMPISTRC128,
27756 IX86_BUILTIN_PCMPISTRO128,
27757 IX86_BUILTIN_PCMPISTRS128,
27758 IX86_BUILTIN_PCMPISTRZ128,
27760 IX86_BUILTIN_PCMPGTQ,
27762 /* AES instructions */
27763 IX86_BUILTIN_AESENC128,
27764 IX86_BUILTIN_AESENCLAST128,
27765 IX86_BUILTIN_AESDEC128,
27766 IX86_BUILTIN_AESDECLAST128,
27767 IX86_BUILTIN_AESIMC128,
27768 IX86_BUILTIN_AESKEYGENASSIST128,
27770 /* PCLMUL instruction */
27771 IX86_BUILTIN_PCLMULQDQ128,
27773 /* AVX */
27774 IX86_BUILTIN_ADDPD256,
27775 IX86_BUILTIN_ADDPS256,
27776 IX86_BUILTIN_ADDSUBPD256,
27777 IX86_BUILTIN_ADDSUBPS256,
27778 IX86_BUILTIN_ANDPD256,
27779 IX86_BUILTIN_ANDPS256,
27780 IX86_BUILTIN_ANDNPD256,
27781 IX86_BUILTIN_ANDNPS256,
27782 IX86_BUILTIN_BLENDPD256,
27783 IX86_BUILTIN_BLENDPS256,
27784 IX86_BUILTIN_BLENDVPD256,
27785 IX86_BUILTIN_BLENDVPS256,
27786 IX86_BUILTIN_DIVPD256,
27787 IX86_BUILTIN_DIVPS256,
27788 IX86_BUILTIN_DPPS256,
27789 IX86_BUILTIN_HADDPD256,
27790 IX86_BUILTIN_HADDPS256,
27791 IX86_BUILTIN_HSUBPD256,
27792 IX86_BUILTIN_HSUBPS256,
27793 IX86_BUILTIN_MAXPD256,
27794 IX86_BUILTIN_MAXPS256,
27795 IX86_BUILTIN_MINPD256,
27796 IX86_BUILTIN_MINPS256,
27797 IX86_BUILTIN_MULPD256,
27798 IX86_BUILTIN_MULPS256,
27799 IX86_BUILTIN_ORPD256,
27800 IX86_BUILTIN_ORPS256,
27801 IX86_BUILTIN_SHUFPD256,
27802 IX86_BUILTIN_SHUFPS256,
27803 IX86_BUILTIN_SUBPD256,
27804 IX86_BUILTIN_SUBPS256,
27805 IX86_BUILTIN_XORPD256,
27806 IX86_BUILTIN_XORPS256,
27807 IX86_BUILTIN_CMPSD,
27808 IX86_BUILTIN_CMPSS,
27809 IX86_BUILTIN_CMPPD,
27810 IX86_BUILTIN_CMPPS,
27811 IX86_BUILTIN_CMPPD256,
27812 IX86_BUILTIN_CMPPS256,
27813 IX86_BUILTIN_CVTDQ2PD256,
27814 IX86_BUILTIN_CVTDQ2PS256,
27815 IX86_BUILTIN_CVTPD2PS256,
27816 IX86_BUILTIN_CVTPS2DQ256,
27817 IX86_BUILTIN_CVTPS2PD256,
27818 IX86_BUILTIN_CVTTPD2DQ256,
27819 IX86_BUILTIN_CVTPD2DQ256,
27820 IX86_BUILTIN_CVTTPS2DQ256,
27821 IX86_BUILTIN_EXTRACTF128PD256,
27822 IX86_BUILTIN_EXTRACTF128PS256,
27823 IX86_BUILTIN_EXTRACTF128SI256,
27824 IX86_BUILTIN_VZEROALL,
27825 IX86_BUILTIN_VZEROUPPER,
27826 IX86_BUILTIN_VPERMILVARPD,
27827 IX86_BUILTIN_VPERMILVARPS,
27828 IX86_BUILTIN_VPERMILVARPD256,
27829 IX86_BUILTIN_VPERMILVARPS256,
27830 IX86_BUILTIN_VPERMILPD,
27831 IX86_BUILTIN_VPERMILPS,
27832 IX86_BUILTIN_VPERMILPD256,
27833 IX86_BUILTIN_VPERMILPS256,
27834 IX86_BUILTIN_VPERMIL2PD,
27835 IX86_BUILTIN_VPERMIL2PS,
27836 IX86_BUILTIN_VPERMIL2PD256,
27837 IX86_BUILTIN_VPERMIL2PS256,
27838 IX86_BUILTIN_VPERM2F128PD256,
27839 IX86_BUILTIN_VPERM2F128PS256,
27840 IX86_BUILTIN_VPERM2F128SI256,
27841 IX86_BUILTIN_VBROADCASTSS,
27842 IX86_BUILTIN_VBROADCASTSD256,
27843 IX86_BUILTIN_VBROADCASTSS256,
27844 IX86_BUILTIN_VBROADCASTPD256,
27845 IX86_BUILTIN_VBROADCASTPS256,
27846 IX86_BUILTIN_VINSERTF128PD256,
27847 IX86_BUILTIN_VINSERTF128PS256,
27848 IX86_BUILTIN_VINSERTF128SI256,
27849 IX86_BUILTIN_LOADUPD256,
27850 IX86_BUILTIN_LOADUPS256,
27851 IX86_BUILTIN_STOREUPD256,
27852 IX86_BUILTIN_STOREUPS256,
27853 IX86_BUILTIN_LDDQU256,
27854 IX86_BUILTIN_MOVNTDQ256,
27855 IX86_BUILTIN_MOVNTPD256,
27856 IX86_BUILTIN_MOVNTPS256,
27857 IX86_BUILTIN_LOADDQU256,
27858 IX86_BUILTIN_STOREDQU256,
27859 IX86_BUILTIN_MASKLOADPD,
27860 IX86_BUILTIN_MASKLOADPS,
27861 IX86_BUILTIN_MASKSTOREPD,
27862 IX86_BUILTIN_MASKSTOREPS,
27863 IX86_BUILTIN_MASKLOADPD256,
27864 IX86_BUILTIN_MASKLOADPS256,
27865 IX86_BUILTIN_MASKSTOREPD256,
27866 IX86_BUILTIN_MASKSTOREPS256,
27867 IX86_BUILTIN_MOVSHDUP256,
27868 IX86_BUILTIN_MOVSLDUP256,
27869 IX86_BUILTIN_MOVDDUP256,
27871 IX86_BUILTIN_SQRTPD256,
27872 IX86_BUILTIN_SQRTPS256,
27873 IX86_BUILTIN_SQRTPS_NR256,
27874 IX86_BUILTIN_RSQRTPS256,
27875 IX86_BUILTIN_RSQRTPS_NR256,
27877 IX86_BUILTIN_RCPPS256,
27879 IX86_BUILTIN_ROUNDPD256,
27880 IX86_BUILTIN_ROUNDPS256,
27882 IX86_BUILTIN_FLOORPD256,
27883 IX86_BUILTIN_CEILPD256,
27884 IX86_BUILTIN_TRUNCPD256,
27885 IX86_BUILTIN_RINTPD256,
27886 IX86_BUILTIN_ROUNDPD_AZ256,
27888 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27889 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27890 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27892 IX86_BUILTIN_FLOORPS256,
27893 IX86_BUILTIN_CEILPS256,
27894 IX86_BUILTIN_TRUNCPS256,
27895 IX86_BUILTIN_RINTPS256,
27896 IX86_BUILTIN_ROUNDPS_AZ256,
27898 IX86_BUILTIN_FLOORPS_SFIX256,
27899 IX86_BUILTIN_CEILPS_SFIX256,
27900 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27902 IX86_BUILTIN_UNPCKHPD256,
27903 IX86_BUILTIN_UNPCKLPD256,
27904 IX86_BUILTIN_UNPCKHPS256,
27905 IX86_BUILTIN_UNPCKLPS256,
27907 IX86_BUILTIN_SI256_SI,
27908 IX86_BUILTIN_PS256_PS,
27909 IX86_BUILTIN_PD256_PD,
27910 IX86_BUILTIN_SI_SI256,
27911 IX86_BUILTIN_PS_PS256,
27912 IX86_BUILTIN_PD_PD256,
27914 IX86_BUILTIN_VTESTZPD,
27915 IX86_BUILTIN_VTESTCPD,
27916 IX86_BUILTIN_VTESTNZCPD,
27917 IX86_BUILTIN_VTESTZPS,
27918 IX86_BUILTIN_VTESTCPS,
27919 IX86_BUILTIN_VTESTNZCPS,
27920 IX86_BUILTIN_VTESTZPD256,
27921 IX86_BUILTIN_VTESTCPD256,
27922 IX86_BUILTIN_VTESTNZCPD256,
27923 IX86_BUILTIN_VTESTZPS256,
27924 IX86_BUILTIN_VTESTCPS256,
27925 IX86_BUILTIN_VTESTNZCPS256,
27926 IX86_BUILTIN_PTESTZ256,
27927 IX86_BUILTIN_PTESTC256,
27928 IX86_BUILTIN_PTESTNZC256,
27930 IX86_BUILTIN_MOVMSKPD256,
27931 IX86_BUILTIN_MOVMSKPS256,
27933 /* AVX2 */
27934 IX86_BUILTIN_MPSADBW256,
27935 IX86_BUILTIN_PABSB256,
27936 IX86_BUILTIN_PABSW256,
27937 IX86_BUILTIN_PABSD256,
27938 IX86_BUILTIN_PACKSSDW256,
27939 IX86_BUILTIN_PACKSSWB256,
27940 IX86_BUILTIN_PACKUSDW256,
27941 IX86_BUILTIN_PACKUSWB256,
27942 IX86_BUILTIN_PADDB256,
27943 IX86_BUILTIN_PADDW256,
27944 IX86_BUILTIN_PADDD256,
27945 IX86_BUILTIN_PADDQ256,
27946 IX86_BUILTIN_PADDSB256,
27947 IX86_BUILTIN_PADDSW256,
27948 IX86_BUILTIN_PADDUSB256,
27949 IX86_BUILTIN_PADDUSW256,
27950 IX86_BUILTIN_PALIGNR256,
27951 IX86_BUILTIN_AND256I,
27952 IX86_BUILTIN_ANDNOT256I,
27953 IX86_BUILTIN_PAVGB256,
27954 IX86_BUILTIN_PAVGW256,
27955 IX86_BUILTIN_PBLENDVB256,
27956 IX86_BUILTIN_PBLENDVW256,
27957 IX86_BUILTIN_PCMPEQB256,
27958 IX86_BUILTIN_PCMPEQW256,
27959 IX86_BUILTIN_PCMPEQD256,
27960 IX86_BUILTIN_PCMPEQQ256,
27961 IX86_BUILTIN_PCMPGTB256,
27962 IX86_BUILTIN_PCMPGTW256,
27963 IX86_BUILTIN_PCMPGTD256,
27964 IX86_BUILTIN_PCMPGTQ256,
27965 IX86_BUILTIN_PHADDW256,
27966 IX86_BUILTIN_PHADDD256,
27967 IX86_BUILTIN_PHADDSW256,
27968 IX86_BUILTIN_PHSUBW256,
27969 IX86_BUILTIN_PHSUBD256,
27970 IX86_BUILTIN_PHSUBSW256,
27971 IX86_BUILTIN_PMADDUBSW256,
27972 IX86_BUILTIN_PMADDWD256,
27973 IX86_BUILTIN_PMAXSB256,
27974 IX86_BUILTIN_PMAXSW256,
27975 IX86_BUILTIN_PMAXSD256,
27976 IX86_BUILTIN_PMAXUB256,
27977 IX86_BUILTIN_PMAXUW256,
27978 IX86_BUILTIN_PMAXUD256,
27979 IX86_BUILTIN_PMINSB256,
27980 IX86_BUILTIN_PMINSW256,
27981 IX86_BUILTIN_PMINSD256,
27982 IX86_BUILTIN_PMINUB256,
27983 IX86_BUILTIN_PMINUW256,
27984 IX86_BUILTIN_PMINUD256,
27985 IX86_BUILTIN_PMOVMSKB256,
27986 IX86_BUILTIN_PMOVSXBW256,
27987 IX86_BUILTIN_PMOVSXBD256,
27988 IX86_BUILTIN_PMOVSXBQ256,
27989 IX86_BUILTIN_PMOVSXWD256,
27990 IX86_BUILTIN_PMOVSXWQ256,
27991 IX86_BUILTIN_PMOVSXDQ256,
27992 IX86_BUILTIN_PMOVZXBW256,
27993 IX86_BUILTIN_PMOVZXBD256,
27994 IX86_BUILTIN_PMOVZXBQ256,
27995 IX86_BUILTIN_PMOVZXWD256,
27996 IX86_BUILTIN_PMOVZXWQ256,
27997 IX86_BUILTIN_PMOVZXDQ256,
27998 IX86_BUILTIN_PMULDQ256,
27999 IX86_BUILTIN_PMULHRSW256,
28000 IX86_BUILTIN_PMULHUW256,
28001 IX86_BUILTIN_PMULHW256,
28002 IX86_BUILTIN_PMULLW256,
28003 IX86_BUILTIN_PMULLD256,
28004 IX86_BUILTIN_PMULUDQ256,
28005 IX86_BUILTIN_POR256,
28006 IX86_BUILTIN_PSADBW256,
28007 IX86_BUILTIN_PSHUFB256,
28008 IX86_BUILTIN_PSHUFD256,
28009 IX86_BUILTIN_PSHUFHW256,
28010 IX86_BUILTIN_PSHUFLW256,
28011 IX86_BUILTIN_PSIGNB256,
28012 IX86_BUILTIN_PSIGNW256,
28013 IX86_BUILTIN_PSIGND256,
28014 IX86_BUILTIN_PSLLDQI256,
28015 IX86_BUILTIN_PSLLWI256,
28016 IX86_BUILTIN_PSLLW256,
28017 IX86_BUILTIN_PSLLDI256,
28018 IX86_BUILTIN_PSLLD256,
28019 IX86_BUILTIN_PSLLQI256,
28020 IX86_BUILTIN_PSLLQ256,
28021 IX86_BUILTIN_PSRAWI256,
28022 IX86_BUILTIN_PSRAW256,
28023 IX86_BUILTIN_PSRADI256,
28024 IX86_BUILTIN_PSRAD256,
28025 IX86_BUILTIN_PSRLDQI256,
28026 IX86_BUILTIN_PSRLWI256,
28027 IX86_BUILTIN_PSRLW256,
28028 IX86_BUILTIN_PSRLDI256,
28029 IX86_BUILTIN_PSRLD256,
28030 IX86_BUILTIN_PSRLQI256,
28031 IX86_BUILTIN_PSRLQ256,
28032 IX86_BUILTIN_PSUBB256,
28033 IX86_BUILTIN_PSUBW256,
28034 IX86_BUILTIN_PSUBD256,
28035 IX86_BUILTIN_PSUBQ256,
28036 IX86_BUILTIN_PSUBSB256,
28037 IX86_BUILTIN_PSUBSW256,
28038 IX86_BUILTIN_PSUBUSB256,
28039 IX86_BUILTIN_PSUBUSW256,
28040 IX86_BUILTIN_PUNPCKHBW256,
28041 IX86_BUILTIN_PUNPCKHWD256,
28042 IX86_BUILTIN_PUNPCKHDQ256,
28043 IX86_BUILTIN_PUNPCKHQDQ256,
28044 IX86_BUILTIN_PUNPCKLBW256,
28045 IX86_BUILTIN_PUNPCKLWD256,
28046 IX86_BUILTIN_PUNPCKLDQ256,
28047 IX86_BUILTIN_PUNPCKLQDQ256,
28048 IX86_BUILTIN_PXOR256,
28049 IX86_BUILTIN_MOVNTDQA256,
28050 IX86_BUILTIN_VBROADCASTSS_PS,
28051 IX86_BUILTIN_VBROADCASTSS_PS256,
28052 IX86_BUILTIN_VBROADCASTSD_PD256,
28053 IX86_BUILTIN_VBROADCASTSI256,
28054 IX86_BUILTIN_PBLENDD256,
28055 IX86_BUILTIN_PBLENDD128,
28056 IX86_BUILTIN_PBROADCASTB256,
28057 IX86_BUILTIN_PBROADCASTW256,
28058 IX86_BUILTIN_PBROADCASTD256,
28059 IX86_BUILTIN_PBROADCASTQ256,
28060 IX86_BUILTIN_PBROADCASTB128,
28061 IX86_BUILTIN_PBROADCASTW128,
28062 IX86_BUILTIN_PBROADCASTD128,
28063 IX86_BUILTIN_PBROADCASTQ128,
28064 IX86_BUILTIN_VPERMVARSI256,
28065 IX86_BUILTIN_VPERMDF256,
28066 IX86_BUILTIN_VPERMVARSF256,
28067 IX86_BUILTIN_VPERMDI256,
28068 IX86_BUILTIN_VPERMTI256,
28069 IX86_BUILTIN_VEXTRACT128I256,
28070 IX86_BUILTIN_VINSERT128I256,
28071 IX86_BUILTIN_MASKLOADD,
28072 IX86_BUILTIN_MASKLOADQ,
28073 IX86_BUILTIN_MASKLOADD256,
28074 IX86_BUILTIN_MASKLOADQ256,
28075 IX86_BUILTIN_MASKSTORED,
28076 IX86_BUILTIN_MASKSTOREQ,
28077 IX86_BUILTIN_MASKSTORED256,
28078 IX86_BUILTIN_MASKSTOREQ256,
28079 IX86_BUILTIN_PSLLVV4DI,
28080 IX86_BUILTIN_PSLLVV2DI,
28081 IX86_BUILTIN_PSLLVV8SI,
28082 IX86_BUILTIN_PSLLVV4SI,
28083 IX86_BUILTIN_PSRAVV8SI,
28084 IX86_BUILTIN_PSRAVV4SI,
28085 IX86_BUILTIN_PSRLVV4DI,
28086 IX86_BUILTIN_PSRLVV2DI,
28087 IX86_BUILTIN_PSRLVV8SI,
28088 IX86_BUILTIN_PSRLVV4SI,
28090 IX86_BUILTIN_GATHERSIV2DF,
28091 IX86_BUILTIN_GATHERSIV4DF,
28092 IX86_BUILTIN_GATHERDIV2DF,
28093 IX86_BUILTIN_GATHERDIV4DF,
28094 IX86_BUILTIN_GATHERSIV4SF,
28095 IX86_BUILTIN_GATHERSIV8SF,
28096 IX86_BUILTIN_GATHERDIV4SF,
28097 IX86_BUILTIN_GATHERDIV8SF,
28098 IX86_BUILTIN_GATHERSIV2DI,
28099 IX86_BUILTIN_GATHERSIV4DI,
28100 IX86_BUILTIN_GATHERDIV2DI,
28101 IX86_BUILTIN_GATHERDIV4DI,
28102 IX86_BUILTIN_GATHERSIV4SI,
28103 IX86_BUILTIN_GATHERSIV8SI,
28104 IX86_BUILTIN_GATHERDIV4SI,
28105 IX86_BUILTIN_GATHERDIV8SI,
28107 /* AVX512F */
28108 IX86_BUILTIN_ADDPD512,
28109 IX86_BUILTIN_ADDPS512,
28110 IX86_BUILTIN_ADDSD_ROUND,
28111 IX86_BUILTIN_ADDSS_ROUND,
28112 IX86_BUILTIN_ALIGND512,
28113 IX86_BUILTIN_ALIGNQ512,
28114 IX86_BUILTIN_BLENDMD512,
28115 IX86_BUILTIN_BLENDMPD512,
28116 IX86_BUILTIN_BLENDMPS512,
28117 IX86_BUILTIN_BLENDMQ512,
28118 IX86_BUILTIN_BROADCASTF32X4_512,
28119 IX86_BUILTIN_BROADCASTF64X4_512,
28120 IX86_BUILTIN_BROADCASTI32X4_512,
28121 IX86_BUILTIN_BROADCASTI64X4_512,
28122 IX86_BUILTIN_BROADCASTSD512,
28123 IX86_BUILTIN_BROADCASTSS512,
28124 IX86_BUILTIN_CMPD512,
28125 IX86_BUILTIN_CMPPD512,
28126 IX86_BUILTIN_CMPPS512,
28127 IX86_BUILTIN_CMPQ512,
28128 IX86_BUILTIN_CMPSD_MASK,
28129 IX86_BUILTIN_CMPSS_MASK,
28130 IX86_BUILTIN_COMIDF,
28131 IX86_BUILTIN_COMISF,
28132 IX86_BUILTIN_COMPRESSPD512,
28133 IX86_BUILTIN_COMPRESSPDSTORE512,
28134 IX86_BUILTIN_COMPRESSPS512,
28135 IX86_BUILTIN_COMPRESSPSSTORE512,
28136 IX86_BUILTIN_CVTDQ2PD512,
28137 IX86_BUILTIN_CVTDQ2PS512,
28138 IX86_BUILTIN_CVTPD2DQ512,
28139 IX86_BUILTIN_CVTPD2PS512,
28140 IX86_BUILTIN_CVTPD2UDQ512,
28141 IX86_BUILTIN_CVTPH2PS512,
28142 IX86_BUILTIN_CVTPS2DQ512,
28143 IX86_BUILTIN_CVTPS2PD512,
28144 IX86_BUILTIN_CVTPS2PH512,
28145 IX86_BUILTIN_CVTPS2UDQ512,
28146 IX86_BUILTIN_CVTSD2SS_ROUND,
28147 IX86_BUILTIN_CVTSI2SD64,
28148 IX86_BUILTIN_CVTSI2SS32,
28149 IX86_BUILTIN_CVTSI2SS64,
28150 IX86_BUILTIN_CVTSS2SD_ROUND,
28151 IX86_BUILTIN_CVTTPD2DQ512,
28152 IX86_BUILTIN_CVTTPD2UDQ512,
28153 IX86_BUILTIN_CVTTPS2DQ512,
28154 IX86_BUILTIN_CVTTPS2UDQ512,
28155 IX86_BUILTIN_CVTUDQ2PD512,
28156 IX86_BUILTIN_CVTUDQ2PS512,
28157 IX86_BUILTIN_CVTUSI2SD32,
28158 IX86_BUILTIN_CVTUSI2SD64,
28159 IX86_BUILTIN_CVTUSI2SS32,
28160 IX86_BUILTIN_CVTUSI2SS64,
28161 IX86_BUILTIN_DIVPD512,
28162 IX86_BUILTIN_DIVPS512,
28163 IX86_BUILTIN_DIVSD_ROUND,
28164 IX86_BUILTIN_DIVSS_ROUND,
28165 IX86_BUILTIN_EXPANDPD512,
28166 IX86_BUILTIN_EXPANDPD512Z,
28167 IX86_BUILTIN_EXPANDPDLOAD512,
28168 IX86_BUILTIN_EXPANDPDLOAD512Z,
28169 IX86_BUILTIN_EXPANDPS512,
28170 IX86_BUILTIN_EXPANDPS512Z,
28171 IX86_BUILTIN_EXPANDPSLOAD512,
28172 IX86_BUILTIN_EXPANDPSLOAD512Z,
28173 IX86_BUILTIN_EXTRACTF32X4,
28174 IX86_BUILTIN_EXTRACTF64X4,
28175 IX86_BUILTIN_EXTRACTI32X4,
28176 IX86_BUILTIN_EXTRACTI64X4,
28177 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28178 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28179 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28180 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28181 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28182 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28183 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28184 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28185 IX86_BUILTIN_GETEXPPD512,
28186 IX86_BUILTIN_GETEXPPS512,
28187 IX86_BUILTIN_GETEXPSD128,
28188 IX86_BUILTIN_GETEXPSS128,
28189 IX86_BUILTIN_GETMANTPD512,
28190 IX86_BUILTIN_GETMANTPS512,
28191 IX86_BUILTIN_GETMANTSD128,
28192 IX86_BUILTIN_GETMANTSS128,
28193 IX86_BUILTIN_INSERTF32X4,
28194 IX86_BUILTIN_INSERTF64X4,
28195 IX86_BUILTIN_INSERTI32X4,
28196 IX86_BUILTIN_INSERTI64X4,
28197 IX86_BUILTIN_LOADAPD512,
28198 IX86_BUILTIN_LOADAPS512,
28199 IX86_BUILTIN_LOADDQUDI512,
28200 IX86_BUILTIN_LOADDQUSI512,
28201 IX86_BUILTIN_LOADUPD512,
28202 IX86_BUILTIN_LOADUPS512,
28203 IX86_BUILTIN_MAXPD512,
28204 IX86_BUILTIN_MAXPS512,
28205 IX86_BUILTIN_MAXSD_ROUND,
28206 IX86_BUILTIN_MAXSS_ROUND,
28207 IX86_BUILTIN_MINPD512,
28208 IX86_BUILTIN_MINPS512,
28209 IX86_BUILTIN_MINSD_ROUND,
28210 IX86_BUILTIN_MINSS_ROUND,
28211 IX86_BUILTIN_MOVAPD512,
28212 IX86_BUILTIN_MOVAPS512,
28213 IX86_BUILTIN_MOVDDUP512,
28214 IX86_BUILTIN_MOVDQA32LOAD512,
28215 IX86_BUILTIN_MOVDQA32STORE512,
28216 IX86_BUILTIN_MOVDQA32_512,
28217 IX86_BUILTIN_MOVDQA64LOAD512,
28218 IX86_BUILTIN_MOVDQA64STORE512,
28219 IX86_BUILTIN_MOVDQA64_512,
28220 IX86_BUILTIN_MOVNTDQ512,
28221 IX86_BUILTIN_MOVNTDQA512,
28222 IX86_BUILTIN_MOVNTPD512,
28223 IX86_BUILTIN_MOVNTPS512,
28224 IX86_BUILTIN_MOVSHDUP512,
28225 IX86_BUILTIN_MOVSLDUP512,
28226 IX86_BUILTIN_MULPD512,
28227 IX86_BUILTIN_MULPS512,
28228 IX86_BUILTIN_MULSD_ROUND,
28229 IX86_BUILTIN_MULSS_ROUND,
28230 IX86_BUILTIN_PABSD512,
28231 IX86_BUILTIN_PABSQ512,
28232 IX86_BUILTIN_PADDD512,
28233 IX86_BUILTIN_PADDQ512,
28234 IX86_BUILTIN_PANDD512,
28235 IX86_BUILTIN_PANDND512,
28236 IX86_BUILTIN_PANDNQ512,
28237 IX86_BUILTIN_PANDQ512,
28238 IX86_BUILTIN_PBROADCASTD512,
28239 IX86_BUILTIN_PBROADCASTD512_GPR,
28240 IX86_BUILTIN_PBROADCASTMB512,
28241 IX86_BUILTIN_PBROADCASTMW512,
28242 IX86_BUILTIN_PBROADCASTQ512,
28243 IX86_BUILTIN_PBROADCASTQ512_GPR,
28244 IX86_BUILTIN_PBROADCASTQ512_MEM,
28245 IX86_BUILTIN_PCMPEQD512_MASK,
28246 IX86_BUILTIN_PCMPEQQ512_MASK,
28247 IX86_BUILTIN_PCMPGTD512_MASK,
28248 IX86_BUILTIN_PCMPGTQ512_MASK,
28249 IX86_BUILTIN_PCOMPRESSD512,
28250 IX86_BUILTIN_PCOMPRESSDSTORE512,
28251 IX86_BUILTIN_PCOMPRESSQ512,
28252 IX86_BUILTIN_PCOMPRESSQSTORE512,
28253 IX86_BUILTIN_PEXPANDD512,
28254 IX86_BUILTIN_PEXPANDD512Z,
28255 IX86_BUILTIN_PEXPANDDLOAD512,
28256 IX86_BUILTIN_PEXPANDDLOAD512Z,
28257 IX86_BUILTIN_PEXPANDQ512,
28258 IX86_BUILTIN_PEXPANDQ512Z,
28259 IX86_BUILTIN_PEXPANDQLOAD512,
28260 IX86_BUILTIN_PEXPANDQLOAD512Z,
28261 IX86_BUILTIN_PMAXSD512,
28262 IX86_BUILTIN_PMAXSQ512,
28263 IX86_BUILTIN_PMAXUD512,
28264 IX86_BUILTIN_PMAXUQ512,
28265 IX86_BUILTIN_PMINSD512,
28266 IX86_BUILTIN_PMINSQ512,
28267 IX86_BUILTIN_PMINUD512,
28268 IX86_BUILTIN_PMINUQ512,
28269 IX86_BUILTIN_PMOVDB512,
28270 IX86_BUILTIN_PMOVDB512_MEM,
28271 IX86_BUILTIN_PMOVDW512,
28272 IX86_BUILTIN_PMOVDW512_MEM,
28273 IX86_BUILTIN_PMOVQB512,
28274 IX86_BUILTIN_PMOVQB512_MEM,
28275 IX86_BUILTIN_PMOVQD512,
28276 IX86_BUILTIN_PMOVQD512_MEM,
28277 IX86_BUILTIN_PMOVQW512,
28278 IX86_BUILTIN_PMOVQW512_MEM,
28279 IX86_BUILTIN_PMOVSDB512,
28280 IX86_BUILTIN_PMOVSDB512_MEM,
28281 IX86_BUILTIN_PMOVSDW512,
28282 IX86_BUILTIN_PMOVSDW512_MEM,
28283 IX86_BUILTIN_PMOVSQB512,
28284 IX86_BUILTIN_PMOVSQB512_MEM,
28285 IX86_BUILTIN_PMOVSQD512,
28286 IX86_BUILTIN_PMOVSQD512_MEM,
28287 IX86_BUILTIN_PMOVSQW512,
28288 IX86_BUILTIN_PMOVSQW512_MEM,
28289 IX86_BUILTIN_PMOVSXBD512,
28290 IX86_BUILTIN_PMOVSXBQ512,
28291 IX86_BUILTIN_PMOVSXDQ512,
28292 IX86_BUILTIN_PMOVSXWD512,
28293 IX86_BUILTIN_PMOVSXWQ512,
28294 IX86_BUILTIN_PMOVUSDB512,
28295 IX86_BUILTIN_PMOVUSDB512_MEM,
28296 IX86_BUILTIN_PMOVUSDW512,
28297 IX86_BUILTIN_PMOVUSDW512_MEM,
28298 IX86_BUILTIN_PMOVUSQB512,
28299 IX86_BUILTIN_PMOVUSQB512_MEM,
28300 IX86_BUILTIN_PMOVUSQD512,
28301 IX86_BUILTIN_PMOVUSQD512_MEM,
28302 IX86_BUILTIN_PMOVUSQW512,
28303 IX86_BUILTIN_PMOVUSQW512_MEM,
28304 IX86_BUILTIN_PMOVZXBD512,
28305 IX86_BUILTIN_PMOVZXBQ512,
28306 IX86_BUILTIN_PMOVZXDQ512,
28307 IX86_BUILTIN_PMOVZXWD512,
28308 IX86_BUILTIN_PMOVZXWQ512,
28309 IX86_BUILTIN_PMULDQ512,
28310 IX86_BUILTIN_PMULLD512,
28311 IX86_BUILTIN_PMULUDQ512,
28312 IX86_BUILTIN_PORD512,
28313 IX86_BUILTIN_PORQ512,
28314 IX86_BUILTIN_PROLD512,
28315 IX86_BUILTIN_PROLQ512,
28316 IX86_BUILTIN_PROLVD512,
28317 IX86_BUILTIN_PROLVQ512,
28318 IX86_BUILTIN_PRORD512,
28319 IX86_BUILTIN_PRORQ512,
28320 IX86_BUILTIN_PRORVD512,
28321 IX86_BUILTIN_PRORVQ512,
28322 IX86_BUILTIN_PSHUFD512,
28323 IX86_BUILTIN_PSLLD512,
28324 IX86_BUILTIN_PSLLDI512,
28325 IX86_BUILTIN_PSLLQ512,
28326 IX86_BUILTIN_PSLLQI512,
28327 IX86_BUILTIN_PSLLVV16SI,
28328 IX86_BUILTIN_PSLLVV8DI,
28329 IX86_BUILTIN_PSRAD512,
28330 IX86_BUILTIN_PSRADI512,
28331 IX86_BUILTIN_PSRAQ512,
28332 IX86_BUILTIN_PSRAQI512,
28333 IX86_BUILTIN_PSRAVV16SI,
28334 IX86_BUILTIN_PSRAVV8DI,
28335 IX86_BUILTIN_PSRLD512,
28336 IX86_BUILTIN_PSRLDI512,
28337 IX86_BUILTIN_PSRLQ512,
28338 IX86_BUILTIN_PSRLQI512,
28339 IX86_BUILTIN_PSRLVV16SI,
28340 IX86_BUILTIN_PSRLVV8DI,
28341 IX86_BUILTIN_PSUBD512,
28342 IX86_BUILTIN_PSUBQ512,
28343 IX86_BUILTIN_PTESTMD512,
28344 IX86_BUILTIN_PTESTMQ512,
28345 IX86_BUILTIN_PTESTNMD512,
28346 IX86_BUILTIN_PTESTNMQ512,
28347 IX86_BUILTIN_PUNPCKHDQ512,
28348 IX86_BUILTIN_PUNPCKHQDQ512,
28349 IX86_BUILTIN_PUNPCKLDQ512,
28350 IX86_BUILTIN_PUNPCKLQDQ512,
28351 IX86_BUILTIN_PXORD512,
28352 IX86_BUILTIN_PXORQ512,
28353 IX86_BUILTIN_RCP14PD512,
28354 IX86_BUILTIN_RCP14PS512,
28355 IX86_BUILTIN_RCP14SD,
28356 IX86_BUILTIN_RCP14SS,
28357 IX86_BUILTIN_RNDSCALEPD,
28358 IX86_BUILTIN_RNDSCALEPS,
28359 IX86_BUILTIN_RNDSCALESD,
28360 IX86_BUILTIN_RNDSCALESS,
28361 IX86_BUILTIN_RSQRT14PD512,
28362 IX86_BUILTIN_RSQRT14PS512,
28363 IX86_BUILTIN_RSQRT14SD,
28364 IX86_BUILTIN_RSQRT14SS,
28365 IX86_BUILTIN_SCALEFPD512,
28366 IX86_BUILTIN_SCALEFPS512,
28367 IX86_BUILTIN_SCALEFSD,
28368 IX86_BUILTIN_SCALEFSS,
28369 IX86_BUILTIN_SHUFPD512,
28370 IX86_BUILTIN_SHUFPS512,
28371 IX86_BUILTIN_SHUF_F32x4,
28372 IX86_BUILTIN_SHUF_F64x2,
28373 IX86_BUILTIN_SHUF_I32x4,
28374 IX86_BUILTIN_SHUF_I64x2,
28375 IX86_BUILTIN_SQRTPD512,
28376 IX86_BUILTIN_SQRTPD512_MASK,
28377 IX86_BUILTIN_SQRTPS512_MASK,
28378 IX86_BUILTIN_SQRTPS_NR512,
28379 IX86_BUILTIN_SQRTSD_ROUND,
28380 IX86_BUILTIN_SQRTSS_ROUND,
28381 IX86_BUILTIN_STOREAPD512,
28382 IX86_BUILTIN_STOREAPS512,
28383 IX86_BUILTIN_STOREDQUDI512,
28384 IX86_BUILTIN_STOREDQUSI512,
28385 IX86_BUILTIN_STOREUPD512,
28386 IX86_BUILTIN_STOREUPS512,
28387 IX86_BUILTIN_SUBPD512,
28388 IX86_BUILTIN_SUBPS512,
28389 IX86_BUILTIN_SUBSD_ROUND,
28390 IX86_BUILTIN_SUBSS_ROUND,
28391 IX86_BUILTIN_UCMPD512,
28392 IX86_BUILTIN_UCMPQ512,
28393 IX86_BUILTIN_UNPCKHPD512,
28394 IX86_BUILTIN_UNPCKHPS512,
28395 IX86_BUILTIN_UNPCKLPD512,
28396 IX86_BUILTIN_UNPCKLPS512,
28397 IX86_BUILTIN_VCVTSD2SI32,
28398 IX86_BUILTIN_VCVTSD2SI64,
28399 IX86_BUILTIN_VCVTSD2USI32,
28400 IX86_BUILTIN_VCVTSD2USI64,
28401 IX86_BUILTIN_VCVTSS2SI32,
28402 IX86_BUILTIN_VCVTSS2SI64,
28403 IX86_BUILTIN_VCVTSS2USI32,
28404 IX86_BUILTIN_VCVTSS2USI64,
28405 IX86_BUILTIN_VCVTTSD2SI32,
28406 IX86_BUILTIN_VCVTTSD2SI64,
28407 IX86_BUILTIN_VCVTTSD2USI32,
28408 IX86_BUILTIN_VCVTTSD2USI64,
28409 IX86_BUILTIN_VCVTTSS2SI32,
28410 IX86_BUILTIN_VCVTTSS2SI64,
28411 IX86_BUILTIN_VCVTTSS2USI32,
28412 IX86_BUILTIN_VCVTTSS2USI64,
28413 IX86_BUILTIN_VFMADDPD512_MASK,
28414 IX86_BUILTIN_VFMADDPD512_MASK3,
28415 IX86_BUILTIN_VFMADDPD512_MASKZ,
28416 IX86_BUILTIN_VFMADDPS512_MASK,
28417 IX86_BUILTIN_VFMADDPS512_MASK3,
28418 IX86_BUILTIN_VFMADDPS512_MASKZ,
28419 IX86_BUILTIN_VFMADDSD3_ROUND,
28420 IX86_BUILTIN_VFMADDSS3_ROUND,
28421 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28422 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28423 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28424 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28425 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28426 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28427 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28428 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28429 IX86_BUILTIN_VFMSUBPD512_MASK3,
28430 IX86_BUILTIN_VFMSUBPS512_MASK3,
28431 IX86_BUILTIN_VFMSUBSD3_MASK3,
28432 IX86_BUILTIN_VFMSUBSS3_MASK3,
28433 IX86_BUILTIN_VFNMADDPD512_MASK,
28434 IX86_BUILTIN_VFNMADDPS512_MASK,
28435 IX86_BUILTIN_VFNMSUBPD512_MASK,
28436 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28437 IX86_BUILTIN_VFNMSUBPS512_MASK,
28438 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28439 IX86_BUILTIN_VPCLZCNTD512,
28440 IX86_BUILTIN_VPCLZCNTQ512,
28441 IX86_BUILTIN_VPCONFLICTD512,
28442 IX86_BUILTIN_VPCONFLICTQ512,
28443 IX86_BUILTIN_VPERMDF512,
28444 IX86_BUILTIN_VPERMDI512,
28445 IX86_BUILTIN_VPERMI2VARD512,
28446 IX86_BUILTIN_VPERMI2VARPD512,
28447 IX86_BUILTIN_VPERMI2VARPS512,
28448 IX86_BUILTIN_VPERMI2VARQ512,
28449 IX86_BUILTIN_VPERMILPD512,
28450 IX86_BUILTIN_VPERMILPS512,
28451 IX86_BUILTIN_VPERMILVARPD512,
28452 IX86_BUILTIN_VPERMILVARPS512,
28453 IX86_BUILTIN_VPERMT2VARD512,
28454 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28455 IX86_BUILTIN_VPERMT2VARPD512,
28456 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28457 IX86_BUILTIN_VPERMT2VARPS512,
28458 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28459 IX86_BUILTIN_VPERMT2VARQ512,
28460 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28461 IX86_BUILTIN_VPERMVARDF512,
28462 IX86_BUILTIN_VPERMVARDI512,
28463 IX86_BUILTIN_VPERMVARSF512,
28464 IX86_BUILTIN_VPERMVARSI512,
28465 IX86_BUILTIN_VTERNLOGD512_MASK,
28466 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28467 IX86_BUILTIN_VTERNLOGQ512_MASK,
28468 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28470 /* Mask arithmetic operations */
28471 IX86_BUILTIN_KAND16,
28472 IX86_BUILTIN_KANDN16,
28473 IX86_BUILTIN_KNOT16,
28474 IX86_BUILTIN_KOR16,
28475 IX86_BUILTIN_KORTESTC16,
28476 IX86_BUILTIN_KORTESTZ16,
28477 IX86_BUILTIN_KUNPCKBW,
28478 IX86_BUILTIN_KXNOR16,
28479 IX86_BUILTIN_KXOR16,
28480 IX86_BUILTIN_KMOV16,
28482 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28483 where all operands are 32-byte or 64-byte wide respectively. */
28484 IX86_BUILTIN_GATHERALTSIV4DF,
28485 IX86_BUILTIN_GATHERALTDIV8SF,
28486 IX86_BUILTIN_GATHERALTSIV4DI,
28487 IX86_BUILTIN_GATHERALTDIV8SI,
28488 IX86_BUILTIN_GATHER3ALTDIV16SF,
28489 IX86_BUILTIN_GATHER3ALTDIV16SI,
28490 IX86_BUILTIN_GATHER3ALTSIV8DF,
28491 IX86_BUILTIN_GATHER3ALTSIV8DI,
28492 IX86_BUILTIN_GATHER3DIV16SF,
28493 IX86_BUILTIN_GATHER3DIV16SI,
28494 IX86_BUILTIN_GATHER3DIV8DF,
28495 IX86_BUILTIN_GATHER3DIV8DI,
28496 IX86_BUILTIN_GATHER3SIV16SF,
28497 IX86_BUILTIN_GATHER3SIV16SI,
28498 IX86_BUILTIN_GATHER3SIV8DF,
28499 IX86_BUILTIN_GATHER3SIV8DI,
28500 IX86_BUILTIN_SCATTERDIV16SF,
28501 IX86_BUILTIN_SCATTERDIV16SI,
28502 IX86_BUILTIN_SCATTERDIV8DF,
28503 IX86_BUILTIN_SCATTERDIV8DI,
28504 IX86_BUILTIN_SCATTERSIV16SF,
28505 IX86_BUILTIN_SCATTERSIV16SI,
28506 IX86_BUILTIN_SCATTERSIV8DF,
28507 IX86_BUILTIN_SCATTERSIV8DI,
28509 /* AVX512PF */
28510 IX86_BUILTIN_GATHERPFQPD,
28511 IX86_BUILTIN_GATHERPFDPS,
28512 IX86_BUILTIN_GATHERPFDPD,
28513 IX86_BUILTIN_GATHERPFQPS,
28514 IX86_BUILTIN_SCATTERPFDPD,
28515 IX86_BUILTIN_SCATTERPFDPS,
28516 IX86_BUILTIN_SCATTERPFQPD,
28517 IX86_BUILTIN_SCATTERPFQPS,
28519 /* AVX-512ER */
28520 IX86_BUILTIN_EXP2PD_MASK,
28521 IX86_BUILTIN_EXP2PS_MASK,
28522 IX86_BUILTIN_EXP2PS,
28523 IX86_BUILTIN_RCP28PD,
28524 IX86_BUILTIN_RCP28PS,
28525 IX86_BUILTIN_RCP28SD,
28526 IX86_BUILTIN_RCP28SS,
28527 IX86_BUILTIN_RSQRT28PD,
28528 IX86_BUILTIN_RSQRT28PS,
28529 IX86_BUILTIN_RSQRT28SD,
28530 IX86_BUILTIN_RSQRT28SS,
28532 /* SHA builtins. */
28533 IX86_BUILTIN_SHA1MSG1,
28534 IX86_BUILTIN_SHA1MSG2,
28535 IX86_BUILTIN_SHA1NEXTE,
28536 IX86_BUILTIN_SHA1RNDS4,
28537 IX86_BUILTIN_SHA256MSG1,
28538 IX86_BUILTIN_SHA256MSG2,
28539 IX86_BUILTIN_SHA256RNDS2,
28541 /* CLFLUSHOPT instructions. */
28542 IX86_BUILTIN_CLFLUSHOPT,
28544 /* TFmode support builtins. */
28545 IX86_BUILTIN_INFQ,
28546 IX86_BUILTIN_HUGE_VALQ,
28547 IX86_BUILTIN_FABSQ,
28548 IX86_BUILTIN_COPYSIGNQ,
28550 /* Vectorizer support builtins. */
28551 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28552 IX86_BUILTIN_CPYSGNPS,
28553 IX86_BUILTIN_CPYSGNPD,
28554 IX86_BUILTIN_CPYSGNPS256,
28555 IX86_BUILTIN_CPYSGNPS512,
28556 IX86_BUILTIN_CPYSGNPD256,
28557 IX86_BUILTIN_CPYSGNPD512,
28558 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28559 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28562 /* FMA4 instructions. */
28563 IX86_BUILTIN_VFMADDSS,
28564 IX86_BUILTIN_VFMADDSD,
28565 IX86_BUILTIN_VFMADDPS,
28566 IX86_BUILTIN_VFMADDPD,
28567 IX86_BUILTIN_VFMADDPS256,
28568 IX86_BUILTIN_VFMADDPD256,
28569 IX86_BUILTIN_VFMADDSUBPS,
28570 IX86_BUILTIN_VFMADDSUBPD,
28571 IX86_BUILTIN_VFMADDSUBPS256,
28572 IX86_BUILTIN_VFMADDSUBPD256,
28574 /* FMA3 instructions. */
28575 IX86_BUILTIN_VFMADDSS3,
28576 IX86_BUILTIN_VFMADDSD3,
28578 /* XOP instructions. */
28579 IX86_BUILTIN_VPCMOV,
28580 IX86_BUILTIN_VPCMOV_V2DI,
28581 IX86_BUILTIN_VPCMOV_V4SI,
28582 IX86_BUILTIN_VPCMOV_V8HI,
28583 IX86_BUILTIN_VPCMOV_V16QI,
28584 IX86_BUILTIN_VPCMOV_V4SF,
28585 IX86_BUILTIN_VPCMOV_V2DF,
28586 IX86_BUILTIN_VPCMOV256,
28587 IX86_BUILTIN_VPCMOV_V4DI256,
28588 IX86_BUILTIN_VPCMOV_V8SI256,
28589 IX86_BUILTIN_VPCMOV_V16HI256,
28590 IX86_BUILTIN_VPCMOV_V32QI256,
28591 IX86_BUILTIN_VPCMOV_V8SF256,
28592 IX86_BUILTIN_VPCMOV_V4DF256,
28594 IX86_BUILTIN_VPPERM,
28596 IX86_BUILTIN_VPMACSSWW,
28597 IX86_BUILTIN_VPMACSWW,
28598 IX86_BUILTIN_VPMACSSWD,
28599 IX86_BUILTIN_VPMACSWD,
28600 IX86_BUILTIN_VPMACSSDD,
28601 IX86_BUILTIN_VPMACSDD,
28602 IX86_BUILTIN_VPMACSSDQL,
28603 IX86_BUILTIN_VPMACSSDQH,
28604 IX86_BUILTIN_VPMACSDQL,
28605 IX86_BUILTIN_VPMACSDQH,
28606 IX86_BUILTIN_VPMADCSSWD,
28607 IX86_BUILTIN_VPMADCSWD,
28609 IX86_BUILTIN_VPHADDBW,
28610 IX86_BUILTIN_VPHADDBD,
28611 IX86_BUILTIN_VPHADDBQ,
28612 IX86_BUILTIN_VPHADDWD,
28613 IX86_BUILTIN_VPHADDWQ,
28614 IX86_BUILTIN_VPHADDDQ,
28615 IX86_BUILTIN_VPHADDUBW,
28616 IX86_BUILTIN_VPHADDUBD,
28617 IX86_BUILTIN_VPHADDUBQ,
28618 IX86_BUILTIN_VPHADDUWD,
28619 IX86_BUILTIN_VPHADDUWQ,
28620 IX86_BUILTIN_VPHADDUDQ,
28621 IX86_BUILTIN_VPHSUBBW,
28622 IX86_BUILTIN_VPHSUBWD,
28623 IX86_BUILTIN_VPHSUBDQ,
28625 IX86_BUILTIN_VPROTB,
28626 IX86_BUILTIN_VPROTW,
28627 IX86_BUILTIN_VPROTD,
28628 IX86_BUILTIN_VPROTQ,
28629 IX86_BUILTIN_VPROTB_IMM,
28630 IX86_BUILTIN_VPROTW_IMM,
28631 IX86_BUILTIN_VPROTD_IMM,
28632 IX86_BUILTIN_VPROTQ_IMM,
28634 IX86_BUILTIN_VPSHLB,
28635 IX86_BUILTIN_VPSHLW,
28636 IX86_BUILTIN_VPSHLD,
28637 IX86_BUILTIN_VPSHLQ,
28638 IX86_BUILTIN_VPSHAB,
28639 IX86_BUILTIN_VPSHAW,
28640 IX86_BUILTIN_VPSHAD,
28641 IX86_BUILTIN_VPSHAQ,
28643 IX86_BUILTIN_VFRCZSS,
28644 IX86_BUILTIN_VFRCZSD,
28645 IX86_BUILTIN_VFRCZPS,
28646 IX86_BUILTIN_VFRCZPD,
28647 IX86_BUILTIN_VFRCZPS256,
28648 IX86_BUILTIN_VFRCZPD256,
28650 IX86_BUILTIN_VPCOMEQUB,
28651 IX86_BUILTIN_VPCOMNEUB,
28652 IX86_BUILTIN_VPCOMLTUB,
28653 IX86_BUILTIN_VPCOMLEUB,
28654 IX86_BUILTIN_VPCOMGTUB,
28655 IX86_BUILTIN_VPCOMGEUB,
28656 IX86_BUILTIN_VPCOMFALSEUB,
28657 IX86_BUILTIN_VPCOMTRUEUB,
28659 IX86_BUILTIN_VPCOMEQUW,
28660 IX86_BUILTIN_VPCOMNEUW,
28661 IX86_BUILTIN_VPCOMLTUW,
28662 IX86_BUILTIN_VPCOMLEUW,
28663 IX86_BUILTIN_VPCOMGTUW,
28664 IX86_BUILTIN_VPCOMGEUW,
28665 IX86_BUILTIN_VPCOMFALSEUW,
28666 IX86_BUILTIN_VPCOMTRUEUW,
28668 IX86_BUILTIN_VPCOMEQUD,
28669 IX86_BUILTIN_VPCOMNEUD,
28670 IX86_BUILTIN_VPCOMLTUD,
28671 IX86_BUILTIN_VPCOMLEUD,
28672 IX86_BUILTIN_VPCOMGTUD,
28673 IX86_BUILTIN_VPCOMGEUD,
28674 IX86_BUILTIN_VPCOMFALSEUD,
28675 IX86_BUILTIN_VPCOMTRUEUD,
28677 IX86_BUILTIN_VPCOMEQUQ,
28678 IX86_BUILTIN_VPCOMNEUQ,
28679 IX86_BUILTIN_VPCOMLTUQ,
28680 IX86_BUILTIN_VPCOMLEUQ,
28681 IX86_BUILTIN_VPCOMGTUQ,
28682 IX86_BUILTIN_VPCOMGEUQ,
28683 IX86_BUILTIN_VPCOMFALSEUQ,
28684 IX86_BUILTIN_VPCOMTRUEUQ,
28686 IX86_BUILTIN_VPCOMEQB,
28687 IX86_BUILTIN_VPCOMNEB,
28688 IX86_BUILTIN_VPCOMLTB,
28689 IX86_BUILTIN_VPCOMLEB,
28690 IX86_BUILTIN_VPCOMGTB,
28691 IX86_BUILTIN_VPCOMGEB,
28692 IX86_BUILTIN_VPCOMFALSEB,
28693 IX86_BUILTIN_VPCOMTRUEB,
28695 IX86_BUILTIN_VPCOMEQW,
28696 IX86_BUILTIN_VPCOMNEW,
28697 IX86_BUILTIN_VPCOMLTW,
28698 IX86_BUILTIN_VPCOMLEW,
28699 IX86_BUILTIN_VPCOMGTW,
28700 IX86_BUILTIN_VPCOMGEW,
28701 IX86_BUILTIN_VPCOMFALSEW,
28702 IX86_BUILTIN_VPCOMTRUEW,
28704 IX86_BUILTIN_VPCOMEQD,
28705 IX86_BUILTIN_VPCOMNED,
28706 IX86_BUILTIN_VPCOMLTD,
28707 IX86_BUILTIN_VPCOMLED,
28708 IX86_BUILTIN_VPCOMGTD,
28709 IX86_BUILTIN_VPCOMGED,
28710 IX86_BUILTIN_VPCOMFALSED,
28711 IX86_BUILTIN_VPCOMTRUED,
28713 IX86_BUILTIN_VPCOMEQQ,
28714 IX86_BUILTIN_VPCOMNEQ,
28715 IX86_BUILTIN_VPCOMLTQ,
28716 IX86_BUILTIN_VPCOMLEQ,
28717 IX86_BUILTIN_VPCOMGTQ,
28718 IX86_BUILTIN_VPCOMGEQ,
28719 IX86_BUILTIN_VPCOMFALSEQ,
28720 IX86_BUILTIN_VPCOMTRUEQ,
28722 /* LWP instructions. */
28723 IX86_BUILTIN_LLWPCB,
28724 IX86_BUILTIN_SLWPCB,
28725 IX86_BUILTIN_LWPVAL32,
28726 IX86_BUILTIN_LWPVAL64,
28727 IX86_BUILTIN_LWPINS32,
28728 IX86_BUILTIN_LWPINS64,
28730 IX86_BUILTIN_CLZS,
28732 /* RTM */
28733 IX86_BUILTIN_XBEGIN,
28734 IX86_BUILTIN_XEND,
28735 IX86_BUILTIN_XABORT,
28736 IX86_BUILTIN_XTEST,
28738 /* BMI instructions. */
28739 IX86_BUILTIN_BEXTR32,
28740 IX86_BUILTIN_BEXTR64,
28741 IX86_BUILTIN_CTZS,
28743 /* TBM instructions. */
28744 IX86_BUILTIN_BEXTRI32,
28745 IX86_BUILTIN_BEXTRI64,
28747 /* BMI2 instructions. */
28748 IX86_BUILTIN_BZHI32,
28749 IX86_BUILTIN_BZHI64,
28750 IX86_BUILTIN_PDEP32,
28751 IX86_BUILTIN_PDEP64,
28752 IX86_BUILTIN_PEXT32,
28753 IX86_BUILTIN_PEXT64,
28755 /* ADX instructions. */
28756 IX86_BUILTIN_ADDCARRYX32,
28757 IX86_BUILTIN_ADDCARRYX64,
28759 /* FSGSBASE instructions. */
28760 IX86_BUILTIN_RDFSBASE32,
28761 IX86_BUILTIN_RDFSBASE64,
28762 IX86_BUILTIN_RDGSBASE32,
28763 IX86_BUILTIN_RDGSBASE64,
28764 IX86_BUILTIN_WRFSBASE32,
28765 IX86_BUILTIN_WRFSBASE64,
28766 IX86_BUILTIN_WRGSBASE32,
28767 IX86_BUILTIN_WRGSBASE64,
28769 /* RDRND instructions. */
28770 IX86_BUILTIN_RDRAND16_STEP,
28771 IX86_BUILTIN_RDRAND32_STEP,
28772 IX86_BUILTIN_RDRAND64_STEP,
28774 /* RDSEED instructions. */
28775 IX86_BUILTIN_RDSEED16_STEP,
28776 IX86_BUILTIN_RDSEED32_STEP,
28777 IX86_BUILTIN_RDSEED64_STEP,
28779 /* F16C instructions. */
28780 IX86_BUILTIN_CVTPH2PS,
28781 IX86_BUILTIN_CVTPH2PS256,
28782 IX86_BUILTIN_CVTPS2PH,
28783 IX86_BUILTIN_CVTPS2PH256,
28785 /* CFString built-in for darwin */
28786 IX86_BUILTIN_CFSTRING,
28788 /* Builtins to get CPU type and supported features. */
28789 IX86_BUILTIN_CPU_INIT,
28790 IX86_BUILTIN_CPU_IS,
28791 IX86_BUILTIN_CPU_SUPPORTS,
28793 /* Read/write FLAGS register built-ins. */
28794 IX86_BUILTIN_READ_FLAGS,
28795 IX86_BUILTIN_WRITE_FLAGS,
28797 IX86_BUILTIN_MAX
28800 /* Table for the ix86 builtin decls. */
28801 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28803 /* Table of all of the builtin functions that are possible with different ISA's
28804 but are waiting to be built until a function is declared to use that
28805 ISA. */
28806 struct builtin_isa {
28807 const char *name; /* function name */
28808 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28809 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28810 bool const_p; /* true if the declaration is constant */
28811 bool set_and_not_built_p;
28814 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28817 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28818 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28819 function decl in the ix86_builtins array. Returns the function decl or
28820 NULL_TREE, if the builtin was not added.
28822 If the front end has a special hook for builtin functions, delay adding
28823 builtin functions that aren't in the current ISA until the ISA is changed
28824 with function specific optimization. Doing so, can save about 300K for the
28825 default compiler. When the builtin is expanded, check at that time whether
28826 it is valid.
28828 If the front end doesn't have a special hook, record all builtins, even if
28829 it isn't an instruction set in the current ISA in case the user uses
28830 function specific options for a different ISA, so that we don't get scope
28831 errors if a builtin is added in the middle of a function scope. */
28833 static inline tree
28834 def_builtin (HOST_WIDE_INT mask, const char *name,
28835 enum ix86_builtin_func_type tcode,
28836 enum ix86_builtins code)
28838 tree decl = NULL_TREE;
28840 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28842 ix86_builtins_isa[(int) code].isa = mask;
28844 mask &= ~OPTION_MASK_ISA_64BIT;
28845 if (mask == 0
28846 || (mask & ix86_isa_flags) != 0
28847 || (lang_hooks.builtin_function
28848 == lang_hooks.builtin_function_ext_scope))
28851 tree type = ix86_get_builtin_func_type (tcode);
28852 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28853 NULL, NULL_TREE);
28854 ix86_builtins[(int) code] = decl;
28855 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28857 else
28859 ix86_builtins[(int) code] = NULL_TREE;
28860 ix86_builtins_isa[(int) code].tcode = tcode;
28861 ix86_builtins_isa[(int) code].name = name;
28862 ix86_builtins_isa[(int) code].const_p = false;
28863 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28867 return decl;
28870 /* Like def_builtin, but also marks the function decl "const". */
28872 static inline tree
28873 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28874 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28876 tree decl = def_builtin (mask, name, tcode, code);
28877 if (decl)
28878 TREE_READONLY (decl) = 1;
28879 else
28880 ix86_builtins_isa[(int) code].const_p = true;
28882 return decl;
28885 /* Add any new builtin functions for a given ISA that may not have been
28886 declared. This saves a bit of space compared to adding all of the
28887 declarations to the tree, even if we didn't use them. */
28889 static void
28890 ix86_add_new_builtins (HOST_WIDE_INT isa)
28892 int i;
28894 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28896 if ((ix86_builtins_isa[i].isa & isa) != 0
28897 && ix86_builtins_isa[i].set_and_not_built_p)
28899 tree decl, type;
28901 /* Don't define the builtin again. */
28902 ix86_builtins_isa[i].set_and_not_built_p = false;
28904 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28905 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28906 type, i, BUILT_IN_MD, NULL,
28907 NULL_TREE);
28909 ix86_builtins[i] = decl;
28910 if (ix86_builtins_isa[i].const_p)
28911 TREE_READONLY (decl) = 1;
28916 /* Bits for builtin_description.flag. */
28918 /* Set when we don't support the comparison natively, and should
28919 swap_comparison in order to support it. */
28920 #define BUILTIN_DESC_SWAP_OPERANDS 1
28922 struct builtin_description
28924 const HOST_WIDE_INT mask;
28925 const enum insn_code icode;
28926 const char *const name;
28927 const enum ix86_builtins code;
28928 const enum rtx_code comparison;
28929 const int flag;
28932 static const struct builtin_description bdesc_comi[] =
28934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28960 static const struct builtin_description bdesc_pcmpestr[] =
28962 /* SSE4.2 */
28963 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28964 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28965 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28972 static const struct builtin_description bdesc_pcmpistr[] =
28974 /* SSE4.2 */
28975 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28976 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28977 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28978 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28980 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28981 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28984 /* Special builtins with variable number of arguments. */
28985 static const struct builtin_description bdesc_special_args[] =
28987 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28988 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28989 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28991 /* 80387 (for use internally for atomic compound assignment). */
28992 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28993 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28994 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28995 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28997 /* MMX */
28998 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29000 /* 3DNow! */
29001 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29003 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29004 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29005 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29006 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29007 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29008 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29009 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29010 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29011 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29013 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29014 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29015 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29016 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29017 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29018 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29019 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29020 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29022 /* SSE */
29023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29032 /* SSE or 3DNow!A */
29033 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29034 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29036 /* SSE2 */
29037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29044 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29051 /* SSE3 */
29052 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29054 /* SSE4.1 */
29055 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29057 /* SSE4A */
29058 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29059 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29061 /* AVX */
29062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29065 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29066 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29067 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29092 /* AVX2 */
29093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29100 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29101 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29103 /* AVX512F */
29104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29152 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29153 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29154 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29155 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29156 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29157 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29159 /* FSGSBASE */
29160 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29161 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29162 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29163 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29164 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29165 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29166 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29167 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29169 /* RTM */
29170 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29171 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29172 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29175 /* Builtins with variable number of arguments. */
29176 static const struct builtin_description bdesc_args[] =
29178 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29179 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29180 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29181 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29182 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29183 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29184 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29186 /* MMX */
29187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29250 /* 3DNow! */
29251 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29252 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29253 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29269 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29272 /* 3DNow!A */
29273 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29274 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29275 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29276 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29277 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29278 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29280 /* SSE */
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29283 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29285 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29289 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29298 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29326 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29331 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29333 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29346 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29354 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29355 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29357 /* SSE MMX or 3Dnow!A */
29358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29359 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29360 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29362 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29363 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29364 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29365 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29367 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29370 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29372 /* SSE2 */
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29391 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29392 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29500 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29504 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29509 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29540 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29542 /* SSE2 MMX */
29543 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29544 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29546 /* SSE3 */
29547 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29548 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29550 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29551 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29552 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29553 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29554 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29555 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29557 /* SSSE3 */
29558 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29559 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29560 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29590 /* SSSE3. */
29591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29594 /* SSE4.1 */
29595 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29596 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29633 /* SSE4.1 */
29634 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29635 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29636 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29637 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29641 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29642 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29644 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29645 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29647 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29648 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29651 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29652 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29653 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29655 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29656 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29658 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29659 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29661 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29662 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29663 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29665 /* SSE4.2 */
29666 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29667 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29668 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29669 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29670 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29672 /* SSE4A */
29673 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29674 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29675 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29676 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29678 /* AES */
29679 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29680 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29682 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29683 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29684 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29685 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29687 /* PCLMUL */
29688 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29690 /* AVX */
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29826 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29827 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29829 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29831 /* AVX2 */
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29979 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29981 /* BMI */
29982 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29983 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29984 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29986 /* TBM */
29987 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29988 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29990 /* F16C */
29991 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29992 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29993 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29994 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29996 /* BMI2 */
29997 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29998 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29999 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30000 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30001 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30002 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30004 /* AVX512F */
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30054 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30055 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30057 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30058 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30166 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30167 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30168 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30169 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30201 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30206 /* Mask arithmetic operations */
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30218 /* SHA */
30219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30228 /* Builtins with rounding support. */
30229 static const struct builtin_description bdesc_round_args[] =
30231 /* AVX512F */
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30251 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30253 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30260 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30262 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30312 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30314 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30316 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30318 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30320 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30322 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30324 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30326 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30352 /* AVX512ER */
30353 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30354 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30355 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30356 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30357 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30358 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30359 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30360 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30361 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30362 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30365 /* FMA4 and XOP. */
30366 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30367 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30368 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30369 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30370 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30371 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30372 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30373 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30374 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30375 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30376 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30377 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30378 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30379 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30380 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30381 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30382 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30383 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30384 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30385 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30386 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30387 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30388 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30389 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30390 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30391 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30392 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30393 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30394 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30395 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30396 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30397 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30398 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30399 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30400 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30401 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30402 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30403 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30404 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30405 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30406 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30407 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30408 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30409 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30410 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30411 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30412 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30413 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30414 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30415 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30416 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30417 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30419 static const struct builtin_description bdesc_multi_arg[] =
30421 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30422 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30423 UNKNOWN, (int)MULTI_ARG_3_SF },
30424 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30425 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30426 UNKNOWN, (int)MULTI_ARG_3_DF },
30428 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30429 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30430 UNKNOWN, (int)MULTI_ARG_3_SF },
30431 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30432 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30433 UNKNOWN, (int)MULTI_ARG_3_DF },
30435 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30436 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30437 UNKNOWN, (int)MULTI_ARG_3_SF },
30438 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30439 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30440 UNKNOWN, (int)MULTI_ARG_3_DF },
30441 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30442 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30443 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30444 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30445 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30446 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30448 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30449 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30450 UNKNOWN, (int)MULTI_ARG_3_SF },
30451 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30452 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30453 UNKNOWN, (int)MULTI_ARG_3_DF },
30454 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30455 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30456 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30457 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30458 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30459 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30621 /* TM vector builtins. */
30623 /* Reuse the existing x86-specific `struct builtin_description' cause
30624 we're lazy. Add casts to make them fit. */
30625 static const struct builtin_description bdesc_tm[] =
30627 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30628 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30629 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30630 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30631 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30632 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30633 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30635 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30636 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30637 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30638 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30639 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30640 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30641 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30643 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30644 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30645 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30646 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30647 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30648 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30649 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30651 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30652 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30653 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30656 /* TM callbacks. */
30658 /* Return the builtin decl needed to load a vector of TYPE. */
30660 static tree
30661 ix86_builtin_tm_load (tree type)
30663 if (TREE_CODE (type) == VECTOR_TYPE)
30665 switch (tree_to_uhwi (TYPE_SIZE (type)))
30667 case 64:
30668 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30669 case 128:
30670 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30671 case 256:
30672 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30675 return NULL_TREE;
30678 /* Return the builtin decl needed to store a vector of TYPE. */
30680 static tree
30681 ix86_builtin_tm_store (tree type)
30683 if (TREE_CODE (type) == VECTOR_TYPE)
30685 switch (tree_to_uhwi (TYPE_SIZE (type)))
30687 case 64:
30688 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30689 case 128:
30690 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30691 case 256:
30692 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30695 return NULL_TREE;
30698 /* Initialize the transactional memory vector load/store builtins. */
30700 static void
30701 ix86_init_tm_builtins (void)
30703 enum ix86_builtin_func_type ftype;
30704 const struct builtin_description *d;
30705 size_t i;
30706 tree decl;
30707 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30708 tree attrs_log, attrs_type_log;
30710 if (!flag_tm)
30711 return;
30713 /* If there are no builtins defined, we must be compiling in a
30714 language without trans-mem support. */
30715 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30716 return;
30718 /* Use whatever attributes a normal TM load has. */
30719 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30720 attrs_load = DECL_ATTRIBUTES (decl);
30721 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30722 /* Use whatever attributes a normal TM store has. */
30723 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30724 attrs_store = DECL_ATTRIBUTES (decl);
30725 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30726 /* Use whatever attributes a normal TM log has. */
30727 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30728 attrs_log = DECL_ATTRIBUTES (decl);
30729 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30731 for (i = 0, d = bdesc_tm;
30732 i < ARRAY_SIZE (bdesc_tm);
30733 i++, d++)
30735 if ((d->mask & ix86_isa_flags) != 0
30736 || (lang_hooks.builtin_function
30737 == lang_hooks.builtin_function_ext_scope))
30739 tree type, attrs, attrs_type;
30740 enum built_in_function code = (enum built_in_function) d->code;
30742 ftype = (enum ix86_builtin_func_type) d->flag;
30743 type = ix86_get_builtin_func_type (ftype);
30745 if (BUILTIN_TM_LOAD_P (code))
30747 attrs = attrs_load;
30748 attrs_type = attrs_type_load;
30750 else if (BUILTIN_TM_STORE_P (code))
30752 attrs = attrs_store;
30753 attrs_type = attrs_type_store;
30755 else
30757 attrs = attrs_log;
30758 attrs_type = attrs_type_log;
30760 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30761 /* The builtin without the prefix for
30762 calling it directly. */
30763 d->name + strlen ("__builtin_"),
30764 attrs);
30765 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30766 set the TYPE_ATTRIBUTES. */
30767 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30769 set_builtin_decl (code, decl, false);
30774 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30775 in the current target ISA to allow the user to compile particular modules
30776 with different target specific options that differ from the command line
30777 options. */
30778 static void
30779 ix86_init_mmx_sse_builtins (void)
30781 const struct builtin_description * d;
30782 enum ix86_builtin_func_type ftype;
30783 size_t i;
30785 /* Add all special builtins with variable number of operands. */
30786 for (i = 0, d = bdesc_special_args;
30787 i < ARRAY_SIZE (bdesc_special_args);
30788 i++, d++)
30790 if (d->name == 0)
30791 continue;
30793 ftype = (enum ix86_builtin_func_type) d->flag;
30794 def_builtin (d->mask, d->name, ftype, d->code);
30797 /* Add all builtins with variable number of operands. */
30798 for (i = 0, d = bdesc_args;
30799 i < ARRAY_SIZE (bdesc_args);
30800 i++, d++)
30802 if (d->name == 0)
30803 continue;
30805 ftype = (enum ix86_builtin_func_type) d->flag;
30806 def_builtin_const (d->mask, d->name, ftype, d->code);
30809 /* Add all builtins with rounding. */
30810 for (i = 0, d = bdesc_round_args;
30811 i < ARRAY_SIZE (bdesc_round_args);
30812 i++, d++)
30814 if (d->name == 0)
30815 continue;
30817 ftype = (enum ix86_builtin_func_type) d->flag;
30818 def_builtin_const (d->mask, d->name, ftype, d->code);
30821 /* pcmpestr[im] insns. */
30822 for (i = 0, d = bdesc_pcmpestr;
30823 i < ARRAY_SIZE (bdesc_pcmpestr);
30824 i++, d++)
30826 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30827 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30828 else
30829 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30830 def_builtin_const (d->mask, d->name, ftype, d->code);
30833 /* pcmpistr[im] insns. */
30834 for (i = 0, d = bdesc_pcmpistr;
30835 i < ARRAY_SIZE (bdesc_pcmpistr);
30836 i++, d++)
30838 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30839 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30840 else
30841 ftype = INT_FTYPE_V16QI_V16QI_INT;
30842 def_builtin_const (d->mask, d->name, ftype, d->code);
30845 /* comi/ucomi insns. */
30846 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30848 if (d->mask == OPTION_MASK_ISA_SSE2)
30849 ftype = INT_FTYPE_V2DF_V2DF;
30850 else
30851 ftype = INT_FTYPE_V4SF_V4SF;
30852 def_builtin_const (d->mask, d->name, ftype, d->code);
30855 /* SSE */
30856 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30857 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30858 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30859 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30861 /* SSE or 3DNow!A */
30862 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30863 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30864 IX86_BUILTIN_MASKMOVQ);
30866 /* SSE2 */
30867 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30868 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30870 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30871 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30872 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30873 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30875 /* SSE3. */
30876 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30877 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30878 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30879 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30881 /* AES */
30882 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30883 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30884 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30885 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30886 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30887 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30888 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30889 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30890 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30891 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30892 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30893 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30895 /* PCLMUL */
30896 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30897 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30899 /* RDRND */
30900 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30901 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30902 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30903 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30904 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30905 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30906 IX86_BUILTIN_RDRAND64_STEP);
30908 /* AVX2 */
30909 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30910 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30911 IX86_BUILTIN_GATHERSIV2DF);
30913 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30914 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30915 IX86_BUILTIN_GATHERSIV4DF);
30917 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30918 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30919 IX86_BUILTIN_GATHERDIV2DF);
30921 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30922 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30923 IX86_BUILTIN_GATHERDIV4DF);
30925 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30926 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30927 IX86_BUILTIN_GATHERSIV4SF);
30929 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30930 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30931 IX86_BUILTIN_GATHERSIV8SF);
30933 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30934 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30935 IX86_BUILTIN_GATHERDIV4SF);
30937 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30938 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30939 IX86_BUILTIN_GATHERDIV8SF);
30941 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30942 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30943 IX86_BUILTIN_GATHERSIV2DI);
30945 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30946 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30947 IX86_BUILTIN_GATHERSIV4DI);
30949 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30950 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30951 IX86_BUILTIN_GATHERDIV2DI);
30953 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30954 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30955 IX86_BUILTIN_GATHERDIV4DI);
30957 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30958 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30959 IX86_BUILTIN_GATHERSIV4SI);
30961 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30962 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30963 IX86_BUILTIN_GATHERSIV8SI);
30965 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30966 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30967 IX86_BUILTIN_GATHERDIV4SI);
30969 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30970 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30971 IX86_BUILTIN_GATHERDIV8SI);
30973 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30974 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30975 IX86_BUILTIN_GATHERALTSIV4DF);
30977 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30978 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30979 IX86_BUILTIN_GATHERALTDIV8SF);
30981 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30982 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30983 IX86_BUILTIN_GATHERALTSIV4DI);
30985 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30986 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30987 IX86_BUILTIN_GATHERALTDIV8SI);
30989 /* AVX512F */
30990 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30991 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30992 IX86_BUILTIN_GATHER3SIV16SF);
30994 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30995 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30996 IX86_BUILTIN_GATHER3SIV8DF);
30998 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30999 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31000 IX86_BUILTIN_GATHER3DIV16SF);
31002 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31003 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31004 IX86_BUILTIN_GATHER3DIV8DF);
31006 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31007 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31008 IX86_BUILTIN_GATHER3SIV16SI);
31010 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31011 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31012 IX86_BUILTIN_GATHER3SIV8DI);
31014 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31015 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31016 IX86_BUILTIN_GATHER3DIV16SI);
31018 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31019 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31020 IX86_BUILTIN_GATHER3DIV8DI);
31022 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31023 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31024 IX86_BUILTIN_GATHER3ALTSIV8DF);
31026 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31027 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31028 IX86_BUILTIN_GATHER3ALTDIV16SF);
31030 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31031 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31032 IX86_BUILTIN_GATHER3ALTSIV8DI);
31034 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31035 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31036 IX86_BUILTIN_GATHER3ALTDIV16SI);
31038 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31039 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31040 IX86_BUILTIN_SCATTERSIV16SF);
31042 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31043 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31044 IX86_BUILTIN_SCATTERSIV8DF);
31046 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31047 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31048 IX86_BUILTIN_SCATTERDIV16SF);
31050 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31051 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31052 IX86_BUILTIN_SCATTERDIV8DF);
31054 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31055 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31056 IX86_BUILTIN_SCATTERSIV16SI);
31058 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31059 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31060 IX86_BUILTIN_SCATTERSIV8DI);
31062 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31063 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31064 IX86_BUILTIN_SCATTERDIV16SI);
31066 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31067 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31068 IX86_BUILTIN_SCATTERDIV8DI);
31070 /* AVX512PF */
31071 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31072 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31073 IX86_BUILTIN_GATHERPFDPD);
31074 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31075 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31076 IX86_BUILTIN_GATHERPFDPS);
31077 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31078 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31079 IX86_BUILTIN_GATHERPFQPD);
31080 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31081 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31082 IX86_BUILTIN_GATHERPFQPS);
31083 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31084 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31085 IX86_BUILTIN_SCATTERPFDPD);
31086 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31087 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31088 IX86_BUILTIN_SCATTERPFDPS);
31089 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31090 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31091 IX86_BUILTIN_SCATTERPFQPD);
31092 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31093 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31094 IX86_BUILTIN_SCATTERPFQPS);
31096 /* SHA */
31097 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31098 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31099 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31100 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31101 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31102 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31103 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31104 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31105 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31106 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31107 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31108 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31109 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31110 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31112 /* RTM. */
31113 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31114 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31116 /* MMX access to the vec_init patterns. */
31117 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31118 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31120 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31121 V4HI_FTYPE_HI_HI_HI_HI,
31122 IX86_BUILTIN_VEC_INIT_V4HI);
31124 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31125 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31126 IX86_BUILTIN_VEC_INIT_V8QI);
31128 /* Access to the vec_extract patterns. */
31129 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31130 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31131 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31132 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31133 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31134 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31135 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31136 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31137 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31138 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31140 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31141 "__builtin_ia32_vec_ext_v4hi",
31142 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31144 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31145 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31147 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31148 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31150 /* Access to the vec_set patterns. */
31151 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31152 "__builtin_ia32_vec_set_v2di",
31153 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31155 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31156 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31158 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31159 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31161 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31162 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31164 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31165 "__builtin_ia32_vec_set_v4hi",
31166 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31168 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31169 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31171 /* RDSEED */
31172 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31173 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31174 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31175 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31176 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31177 "__builtin_ia32_rdseed_di_step",
31178 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31180 /* ADCX */
31181 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31182 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31183 def_builtin (OPTION_MASK_ISA_64BIT,
31184 "__builtin_ia32_addcarryx_u64",
31185 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31186 IX86_BUILTIN_ADDCARRYX64);
31188 /* Read/write FLAGS. */
31189 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31190 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31191 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31192 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31193 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31194 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31195 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31196 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31198 /* CLFLUSHOPT. */
31199 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31200 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31202 /* Add FMA4 multi-arg argument instructions */
31203 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31205 if (d->name == 0)
31206 continue;
31208 ftype = (enum ix86_builtin_func_type) d->flag;
31209 def_builtin_const (d->mask, d->name, ftype, d->code);
31213 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31214 to return a pointer to VERSION_DECL if the outcome of the expression
31215 formed by PREDICATE_CHAIN is true. This function will be called during
31216 version dispatch to decide which function version to execute. It returns
31217 the basic block at the end, to which more conditions can be added. */
31219 static basic_block
31220 add_condition_to_bb (tree function_decl, tree version_decl,
31221 tree predicate_chain, basic_block new_bb)
31223 gimple return_stmt;
31224 tree convert_expr, result_var;
31225 gimple convert_stmt;
31226 gimple call_cond_stmt;
31227 gimple if_else_stmt;
31229 basic_block bb1, bb2, bb3;
31230 edge e12, e23;
31232 tree cond_var, and_expr_var = NULL_TREE;
31233 gimple_seq gseq;
31235 tree predicate_decl, predicate_arg;
31237 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31239 gcc_assert (new_bb != NULL);
31240 gseq = bb_seq (new_bb);
31243 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31244 build_fold_addr_expr (version_decl));
31245 result_var = create_tmp_var (ptr_type_node, NULL);
31246 convert_stmt = gimple_build_assign (result_var, convert_expr);
31247 return_stmt = gimple_build_return (result_var);
31249 if (predicate_chain == NULL_TREE)
31251 gimple_seq_add_stmt (&gseq, convert_stmt);
31252 gimple_seq_add_stmt (&gseq, return_stmt);
31253 set_bb_seq (new_bb, gseq);
31254 gimple_set_bb (convert_stmt, new_bb);
31255 gimple_set_bb (return_stmt, new_bb);
31256 pop_cfun ();
31257 return new_bb;
31260 while (predicate_chain != NULL)
31262 cond_var = create_tmp_var (integer_type_node, NULL);
31263 predicate_decl = TREE_PURPOSE (predicate_chain);
31264 predicate_arg = TREE_VALUE (predicate_chain);
31265 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31266 gimple_call_set_lhs (call_cond_stmt, cond_var);
31268 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31269 gimple_set_bb (call_cond_stmt, new_bb);
31270 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31272 predicate_chain = TREE_CHAIN (predicate_chain);
31274 if (and_expr_var == NULL)
31275 and_expr_var = cond_var;
31276 else
31278 gimple assign_stmt;
31279 /* Use MIN_EXPR to check if any integer is zero?.
31280 and_expr_var = min_expr <cond_var, and_expr_var> */
31281 assign_stmt = gimple_build_assign (and_expr_var,
31282 build2 (MIN_EXPR, integer_type_node,
31283 cond_var, and_expr_var));
31285 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31286 gimple_set_bb (assign_stmt, new_bb);
31287 gimple_seq_add_stmt (&gseq, assign_stmt);
31291 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31292 integer_zero_node,
31293 NULL_TREE, NULL_TREE);
31294 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31295 gimple_set_bb (if_else_stmt, new_bb);
31296 gimple_seq_add_stmt (&gseq, if_else_stmt);
31298 gimple_seq_add_stmt (&gseq, convert_stmt);
31299 gimple_seq_add_stmt (&gseq, return_stmt);
31300 set_bb_seq (new_bb, gseq);
31302 bb1 = new_bb;
31303 e12 = split_block (bb1, if_else_stmt);
31304 bb2 = e12->dest;
31305 e12->flags &= ~EDGE_FALLTHRU;
31306 e12->flags |= EDGE_TRUE_VALUE;
31308 e23 = split_block (bb2, return_stmt);
31310 gimple_set_bb (convert_stmt, bb2);
31311 gimple_set_bb (return_stmt, bb2);
31313 bb3 = e23->dest;
31314 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31316 remove_edge (e23);
31317 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31319 pop_cfun ();
31321 return bb3;
31324 /* This parses the attribute arguments to target in DECL and determines
31325 the right builtin to use to match the platform specification.
31326 It returns the priority value for this version decl. If PREDICATE_LIST
31327 is not NULL, it stores the list of cpu features that need to be checked
31328 before dispatching this function. */
31330 static unsigned int
31331 get_builtin_code_for_version (tree decl, tree *predicate_list)
31333 tree attrs;
31334 struct cl_target_option cur_target;
31335 tree target_node;
31336 struct cl_target_option *new_target;
31337 const char *arg_str = NULL;
31338 const char *attrs_str = NULL;
31339 char *tok_str = NULL;
31340 char *token;
31342 /* Priority of i386 features, greater value is higher priority. This is
31343 used to decide the order in which function dispatch must happen. For
31344 instance, a version specialized for SSE4.2 should be checked for dispatch
31345 before a version for SSE3, as SSE4.2 implies SSE3. */
31346 enum feature_priority
31348 P_ZERO = 0,
31349 P_MMX,
31350 P_SSE,
31351 P_SSE2,
31352 P_SSE3,
31353 P_SSSE3,
31354 P_PROC_SSSE3,
31355 P_SSE4_A,
31356 P_PROC_SSE4_A,
31357 P_SSE4_1,
31358 P_SSE4_2,
31359 P_PROC_SSE4_2,
31360 P_POPCNT,
31361 P_AVX,
31362 P_PROC_AVX,
31363 P_FMA4,
31364 P_XOP,
31365 P_PROC_XOP,
31366 P_FMA,
31367 P_PROC_FMA,
31368 P_AVX2,
31369 P_PROC_AVX2
31372 enum feature_priority priority = P_ZERO;
31374 /* These are the target attribute strings for which a dispatcher is
31375 available, from fold_builtin_cpu. */
31377 static struct _feature_list
31379 const char *const name;
31380 const enum feature_priority priority;
31382 const feature_list[] =
31384 {"mmx", P_MMX},
31385 {"sse", P_SSE},
31386 {"sse2", P_SSE2},
31387 {"sse3", P_SSE3},
31388 {"sse4a", P_SSE4_A},
31389 {"ssse3", P_SSSE3},
31390 {"sse4.1", P_SSE4_1},
31391 {"sse4.2", P_SSE4_2},
31392 {"popcnt", P_POPCNT},
31393 {"avx", P_AVX},
31394 {"fma4", P_FMA4},
31395 {"xop", P_XOP},
31396 {"fma", P_FMA},
31397 {"avx2", P_AVX2}
31401 static unsigned int NUM_FEATURES
31402 = sizeof (feature_list) / sizeof (struct _feature_list);
31404 unsigned int i;
31406 tree predicate_chain = NULL_TREE;
31407 tree predicate_decl, predicate_arg;
31409 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31410 gcc_assert (attrs != NULL);
31412 attrs = TREE_VALUE (TREE_VALUE (attrs));
31414 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31415 attrs_str = TREE_STRING_POINTER (attrs);
31417 /* Return priority zero for default function. */
31418 if (strcmp (attrs_str, "default") == 0)
31419 return 0;
31421 /* Handle arch= if specified. For priority, set it to be 1 more than
31422 the best instruction set the processor can handle. For instance, if
31423 there is a version for atom and a version for ssse3 (the highest ISA
31424 priority for atom), the atom version must be checked for dispatch
31425 before the ssse3 version. */
31426 if (strstr (attrs_str, "arch=") != NULL)
31428 cl_target_option_save (&cur_target, &global_options);
31429 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31430 &global_options_set);
31432 gcc_assert (target_node);
31433 new_target = TREE_TARGET_OPTION (target_node);
31434 gcc_assert (new_target);
31436 if (new_target->arch_specified && new_target->arch > 0)
31438 switch (new_target->arch)
31440 case PROCESSOR_CORE2:
31441 arg_str = "core2";
31442 priority = P_PROC_SSSE3;
31443 break;
31444 case PROCESSOR_NEHALEM:
31445 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31446 arg_str = "westmere";
31447 else
31448 /* We translate "arch=corei7" and "arch=nehalem" to
31449 "corei7" so that it will be mapped to M_INTEL_COREI7
31450 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31451 arg_str = "corei7";
31452 priority = P_PROC_SSE4_2;
31453 break;
31454 case PROCESSOR_SANDYBRIDGE:
31455 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31456 arg_str = "ivybridge";
31457 else
31458 arg_str = "sandybridge";
31459 priority = P_PROC_AVX;
31460 break;
31461 case PROCESSOR_HASWELL:
31462 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31463 arg_str = "broadwell";
31464 else
31465 arg_str = "haswell";
31466 priority = P_PROC_AVX2;
31467 break;
31468 case PROCESSOR_BONNELL:
31469 arg_str = "bonnell";
31470 priority = P_PROC_SSSE3;
31471 break;
31472 case PROCESSOR_SILVERMONT:
31473 arg_str = "silvermont";
31474 priority = P_PROC_SSE4_2;
31475 break;
31476 case PROCESSOR_AMDFAM10:
31477 arg_str = "amdfam10h";
31478 priority = P_PROC_SSE4_A;
31479 break;
31480 case PROCESSOR_BTVER1:
31481 arg_str = "btver1";
31482 priority = P_PROC_SSE4_A;
31483 break;
31484 case PROCESSOR_BTVER2:
31485 arg_str = "btver2";
31486 priority = P_PROC_AVX;
31487 break;
31488 case PROCESSOR_BDVER1:
31489 arg_str = "bdver1";
31490 priority = P_PROC_XOP;
31491 break;
31492 case PROCESSOR_BDVER2:
31493 arg_str = "bdver2";
31494 priority = P_PROC_FMA;
31495 break;
31496 case PROCESSOR_BDVER3:
31497 arg_str = "bdver3";
31498 priority = P_PROC_FMA;
31499 break;
31500 case PROCESSOR_BDVER4:
31501 arg_str = "bdver4";
31502 priority = P_PROC_AVX2;
31503 break;
31507 cl_target_option_restore (&global_options, &cur_target);
31509 if (predicate_list && arg_str == NULL)
31511 error_at (DECL_SOURCE_LOCATION (decl),
31512 "No dispatcher found for the versioning attributes");
31513 return 0;
31516 if (predicate_list)
31518 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31519 /* For a C string literal the length includes the trailing NULL. */
31520 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31521 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31522 predicate_chain);
31526 /* Process feature name. */
31527 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31528 strcpy (tok_str, attrs_str);
31529 token = strtok (tok_str, ",");
31530 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31532 while (token != NULL)
31534 /* Do not process "arch=" */
31535 if (strncmp (token, "arch=", 5) == 0)
31537 token = strtok (NULL, ",");
31538 continue;
31540 for (i = 0; i < NUM_FEATURES; ++i)
31542 if (strcmp (token, feature_list[i].name) == 0)
31544 if (predicate_list)
31546 predicate_arg = build_string_literal (
31547 strlen (feature_list[i].name) + 1,
31548 feature_list[i].name);
31549 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31550 predicate_chain);
31552 /* Find the maximum priority feature. */
31553 if (feature_list[i].priority > priority)
31554 priority = feature_list[i].priority;
31556 break;
31559 if (predicate_list && i == NUM_FEATURES)
31561 error_at (DECL_SOURCE_LOCATION (decl),
31562 "No dispatcher found for %s", token);
31563 return 0;
31565 token = strtok (NULL, ",");
31567 free (tok_str);
31569 if (predicate_list && predicate_chain == NULL_TREE)
31571 error_at (DECL_SOURCE_LOCATION (decl),
31572 "No dispatcher found for the versioning attributes : %s",
31573 attrs_str);
31574 return 0;
31576 else if (predicate_list)
31578 predicate_chain = nreverse (predicate_chain);
31579 *predicate_list = predicate_chain;
31582 return priority;
31585 /* This compares the priority of target features in function DECL1
31586 and DECL2. It returns positive value if DECL1 is higher priority,
31587 negative value if DECL2 is higher priority and 0 if they are the
31588 same. */
31590 static int
31591 ix86_compare_version_priority (tree decl1, tree decl2)
31593 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31594 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31596 return (int)priority1 - (int)priority2;
31599 /* V1 and V2 point to function versions with different priorities
31600 based on the target ISA. This function compares their priorities. */
31602 static int
31603 feature_compare (const void *v1, const void *v2)
31605 typedef struct _function_version_info
31607 tree version_decl;
31608 tree predicate_chain;
31609 unsigned int dispatch_priority;
31610 } function_version_info;
31612 const function_version_info c1 = *(const function_version_info *)v1;
31613 const function_version_info c2 = *(const function_version_info *)v2;
31614 return (c2.dispatch_priority - c1.dispatch_priority);
31617 /* This function generates the dispatch function for
31618 multi-versioned functions. DISPATCH_DECL is the function which will
31619 contain the dispatch logic. FNDECLS are the function choices for
31620 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31621 in DISPATCH_DECL in which the dispatch code is generated. */
31623 static int
31624 dispatch_function_versions (tree dispatch_decl,
31625 void *fndecls_p,
31626 basic_block *empty_bb)
31628 tree default_decl;
31629 gimple ifunc_cpu_init_stmt;
31630 gimple_seq gseq;
31631 int ix;
31632 tree ele;
31633 vec<tree> *fndecls;
31634 unsigned int num_versions = 0;
31635 unsigned int actual_versions = 0;
31636 unsigned int i;
31638 struct _function_version_info
31640 tree version_decl;
31641 tree predicate_chain;
31642 unsigned int dispatch_priority;
31643 }*function_version_info;
31645 gcc_assert (dispatch_decl != NULL
31646 && fndecls_p != NULL
31647 && empty_bb != NULL);
31649 /*fndecls_p is actually a vector. */
31650 fndecls = static_cast<vec<tree> *> (fndecls_p);
31652 /* At least one more version other than the default. */
31653 num_versions = fndecls->length ();
31654 gcc_assert (num_versions >= 2);
31656 function_version_info = (struct _function_version_info *)
31657 XNEWVEC (struct _function_version_info, (num_versions - 1));
31659 /* The first version in the vector is the default decl. */
31660 default_decl = (*fndecls)[0];
31662 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31664 gseq = bb_seq (*empty_bb);
31665 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31666 constructors, so explicity call __builtin_cpu_init here. */
31667 ifunc_cpu_init_stmt = gimple_build_call_vec (
31668 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31669 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31670 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31671 set_bb_seq (*empty_bb, gseq);
31673 pop_cfun ();
31676 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31678 tree version_decl = ele;
31679 tree predicate_chain = NULL_TREE;
31680 unsigned int priority;
31681 /* Get attribute string, parse it and find the right predicate decl.
31682 The predicate function could be a lengthy combination of many
31683 features, like arch-type and various isa-variants. */
31684 priority = get_builtin_code_for_version (version_decl,
31685 &predicate_chain);
31687 if (predicate_chain == NULL_TREE)
31688 continue;
31690 function_version_info [actual_versions].version_decl = version_decl;
31691 function_version_info [actual_versions].predicate_chain
31692 = predicate_chain;
31693 function_version_info [actual_versions].dispatch_priority = priority;
31694 actual_versions++;
31697 /* Sort the versions according to descending order of dispatch priority. The
31698 priority is based on the ISA. This is not a perfect solution. There
31699 could still be ambiguity. If more than one function version is suitable
31700 to execute, which one should be dispatched? In future, allow the user
31701 to specify a dispatch priority next to the version. */
31702 qsort (function_version_info, actual_versions,
31703 sizeof (struct _function_version_info), feature_compare);
31705 for (i = 0; i < actual_versions; ++i)
31706 *empty_bb = add_condition_to_bb (dispatch_decl,
31707 function_version_info[i].version_decl,
31708 function_version_info[i].predicate_chain,
31709 *empty_bb);
31711 /* dispatch default version at the end. */
31712 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31713 NULL, *empty_bb);
31715 free (function_version_info);
31716 return 0;
31719 /* Comparator function to be used in qsort routine to sort attribute
31720 specification strings to "target". */
31722 static int
31723 attr_strcmp (const void *v1, const void *v2)
31725 const char *c1 = *(char *const*)v1;
31726 const char *c2 = *(char *const*)v2;
31727 return strcmp (c1, c2);
31730 /* ARGLIST is the argument to target attribute. This function tokenizes
31731 the comma separated arguments, sorts them and returns a string which
31732 is a unique identifier for the comma separated arguments. It also
31733 replaces non-identifier characters "=,-" with "_". */
31735 static char *
31736 sorted_attr_string (tree arglist)
31738 tree arg;
31739 size_t str_len_sum = 0;
31740 char **args = NULL;
31741 char *attr_str, *ret_str;
31742 char *attr = NULL;
31743 unsigned int argnum = 1;
31744 unsigned int i;
31746 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31748 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31749 size_t len = strlen (str);
31750 str_len_sum += len + 1;
31751 if (arg != arglist)
31752 argnum++;
31753 for (i = 0; i < strlen (str); i++)
31754 if (str[i] == ',')
31755 argnum++;
31758 attr_str = XNEWVEC (char, str_len_sum);
31759 str_len_sum = 0;
31760 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31762 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31763 size_t len = strlen (str);
31764 memcpy (attr_str + str_len_sum, str, len);
31765 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31766 str_len_sum += len + 1;
31769 /* Replace "=,-" with "_". */
31770 for (i = 0; i < strlen (attr_str); i++)
31771 if (attr_str[i] == '=' || attr_str[i]== '-')
31772 attr_str[i] = '_';
31774 if (argnum == 1)
31775 return attr_str;
31777 args = XNEWVEC (char *, argnum);
31779 i = 0;
31780 attr = strtok (attr_str, ",");
31781 while (attr != NULL)
31783 args[i] = attr;
31784 i++;
31785 attr = strtok (NULL, ",");
31788 qsort (args, argnum, sizeof (char *), attr_strcmp);
31790 ret_str = XNEWVEC (char, str_len_sum);
31791 str_len_sum = 0;
31792 for (i = 0; i < argnum; i++)
31794 size_t len = strlen (args[i]);
31795 memcpy (ret_str + str_len_sum, args[i], len);
31796 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31797 str_len_sum += len + 1;
31800 XDELETEVEC (args);
31801 XDELETEVEC (attr_str);
31802 return ret_str;
31805 /* This function changes the assembler name for functions that are
31806 versions. If DECL is a function version and has a "target"
31807 attribute, it appends the attribute string to its assembler name. */
31809 static tree
31810 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31812 tree version_attr;
31813 const char *orig_name, *version_string;
31814 char *attr_str, *assembler_name;
31816 if (DECL_DECLARED_INLINE_P (decl)
31817 && lookup_attribute ("gnu_inline",
31818 DECL_ATTRIBUTES (decl)))
31819 error_at (DECL_SOURCE_LOCATION (decl),
31820 "Function versions cannot be marked as gnu_inline,"
31821 " bodies have to be generated");
31823 if (DECL_VIRTUAL_P (decl)
31824 || DECL_VINDEX (decl))
31825 sorry ("Virtual function multiversioning not supported");
31827 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31829 /* target attribute string cannot be NULL. */
31830 gcc_assert (version_attr != NULL_TREE);
31832 orig_name = IDENTIFIER_POINTER (id);
31833 version_string
31834 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31836 if (strcmp (version_string, "default") == 0)
31837 return id;
31839 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31840 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31842 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31844 /* Allow assembler name to be modified if already set. */
31845 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31846 SET_DECL_RTL (decl, NULL);
31848 tree ret = get_identifier (assembler_name);
31849 XDELETEVEC (attr_str);
31850 XDELETEVEC (assembler_name);
31851 return ret;
31854 /* This function returns true if FN1 and FN2 are versions of the same function,
31855 that is, the target strings of the function decls are different. This assumes
31856 that FN1 and FN2 have the same signature. */
31858 static bool
31859 ix86_function_versions (tree fn1, tree fn2)
31861 tree attr1, attr2;
31862 char *target1, *target2;
31863 bool result;
31865 if (TREE_CODE (fn1) != FUNCTION_DECL
31866 || TREE_CODE (fn2) != FUNCTION_DECL)
31867 return false;
31869 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31870 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31872 /* At least one function decl should have the target attribute specified. */
31873 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31874 return false;
31876 /* Diagnose missing target attribute if one of the decls is already
31877 multi-versioned. */
31878 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31880 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31882 if (attr2 != NULL_TREE)
31884 tree tem = fn1;
31885 fn1 = fn2;
31886 fn2 = tem;
31887 attr1 = attr2;
31889 error_at (DECL_SOURCE_LOCATION (fn2),
31890 "missing %<target%> attribute for multi-versioned %D",
31891 fn2);
31892 inform (DECL_SOURCE_LOCATION (fn1),
31893 "previous declaration of %D", fn1);
31894 /* Prevent diagnosing of the same error multiple times. */
31895 DECL_ATTRIBUTES (fn2)
31896 = tree_cons (get_identifier ("target"),
31897 copy_node (TREE_VALUE (attr1)),
31898 DECL_ATTRIBUTES (fn2));
31900 return false;
31903 target1 = sorted_attr_string (TREE_VALUE (attr1));
31904 target2 = sorted_attr_string (TREE_VALUE (attr2));
31906 /* The sorted target strings must be different for fn1 and fn2
31907 to be versions. */
31908 if (strcmp (target1, target2) == 0)
31909 result = false;
31910 else
31911 result = true;
31913 XDELETEVEC (target1);
31914 XDELETEVEC (target2);
31916 return result;
31919 static tree
31920 ix86_mangle_decl_assembler_name (tree decl, tree id)
31922 /* For function version, add the target suffix to the assembler name. */
31923 if (TREE_CODE (decl) == FUNCTION_DECL
31924 && DECL_FUNCTION_VERSIONED (decl))
31925 id = ix86_mangle_function_version_assembler_name (decl, id);
31926 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31927 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31928 #endif
31930 return id;
31933 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31934 is true, append the full path name of the source file. */
31936 static char *
31937 make_name (tree decl, const char *suffix, bool make_unique)
31939 char *global_var_name;
31940 int name_len;
31941 const char *name;
31942 const char *unique_name = NULL;
31944 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31946 /* Get a unique name that can be used globally without any chances
31947 of collision at link time. */
31948 if (make_unique)
31949 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31951 name_len = strlen (name) + strlen (suffix) + 2;
31953 if (make_unique)
31954 name_len += strlen (unique_name) + 1;
31955 global_var_name = XNEWVEC (char, name_len);
31957 /* Use '.' to concatenate names as it is demangler friendly. */
31958 if (make_unique)
31959 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31960 suffix);
31961 else
31962 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31964 return global_var_name;
31967 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31969 /* Make a dispatcher declaration for the multi-versioned function DECL.
31970 Calls to DECL function will be replaced with calls to the dispatcher
31971 by the front-end. Return the decl created. */
31973 static tree
31974 make_dispatcher_decl (const tree decl)
31976 tree func_decl;
31977 char *func_name;
31978 tree fn_type, func_type;
31979 bool is_uniq = false;
31981 if (TREE_PUBLIC (decl) == 0)
31982 is_uniq = true;
31984 func_name = make_name (decl, "ifunc", is_uniq);
31986 fn_type = TREE_TYPE (decl);
31987 func_type = build_function_type (TREE_TYPE (fn_type),
31988 TYPE_ARG_TYPES (fn_type));
31990 func_decl = build_fn_decl (func_name, func_type);
31991 XDELETEVEC (func_name);
31992 TREE_USED (func_decl) = 1;
31993 DECL_CONTEXT (func_decl) = NULL_TREE;
31994 DECL_INITIAL (func_decl) = error_mark_node;
31995 DECL_ARTIFICIAL (func_decl) = 1;
31996 /* Mark this func as external, the resolver will flip it again if
31997 it gets generated. */
31998 DECL_EXTERNAL (func_decl) = 1;
31999 /* This will be of type IFUNCs have to be externally visible. */
32000 TREE_PUBLIC (func_decl) = 1;
32002 return func_decl;
32005 #endif
32007 /* Returns true if decl is multi-versioned and DECL is the default function,
32008 that is it is not tagged with target specific optimization. */
32010 static bool
32011 is_function_default_version (const tree decl)
32013 if (TREE_CODE (decl) != FUNCTION_DECL
32014 || !DECL_FUNCTION_VERSIONED (decl))
32015 return false;
32016 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32017 gcc_assert (attr);
32018 attr = TREE_VALUE (TREE_VALUE (attr));
32019 return (TREE_CODE (attr) == STRING_CST
32020 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32023 /* Make a dispatcher declaration for the multi-versioned function DECL.
32024 Calls to DECL function will be replaced with calls to the dispatcher
32025 by the front-end. Returns the decl of the dispatcher function. */
32027 static tree
32028 ix86_get_function_versions_dispatcher (void *decl)
32030 tree fn = (tree) decl;
32031 struct cgraph_node *node = NULL;
32032 struct cgraph_node *default_node = NULL;
32033 struct cgraph_function_version_info *node_v = NULL;
32034 struct cgraph_function_version_info *first_v = NULL;
32036 tree dispatch_decl = NULL;
32038 struct cgraph_function_version_info *default_version_info = NULL;
32040 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32042 node = cgraph_get_node (fn);
32043 gcc_assert (node != NULL);
32045 node_v = get_cgraph_node_version (node);
32046 gcc_assert (node_v != NULL);
32048 if (node_v->dispatcher_resolver != NULL)
32049 return node_v->dispatcher_resolver;
32051 /* Find the default version and make it the first node. */
32052 first_v = node_v;
32053 /* Go to the beginning of the chain. */
32054 while (first_v->prev != NULL)
32055 first_v = first_v->prev;
32056 default_version_info = first_v;
32057 while (default_version_info != NULL)
32059 if (is_function_default_version
32060 (default_version_info->this_node->decl))
32061 break;
32062 default_version_info = default_version_info->next;
32065 /* If there is no default node, just return NULL. */
32066 if (default_version_info == NULL)
32067 return NULL;
32069 /* Make default info the first node. */
32070 if (first_v != default_version_info)
32072 default_version_info->prev->next = default_version_info->next;
32073 if (default_version_info->next)
32074 default_version_info->next->prev = default_version_info->prev;
32075 first_v->prev = default_version_info;
32076 default_version_info->next = first_v;
32077 default_version_info->prev = NULL;
32080 default_node = default_version_info->this_node;
32082 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32083 if (targetm.has_ifunc_p ())
32085 struct cgraph_function_version_info *it_v = NULL;
32086 struct cgraph_node *dispatcher_node = NULL;
32087 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32089 /* Right now, the dispatching is done via ifunc. */
32090 dispatch_decl = make_dispatcher_decl (default_node->decl);
32092 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32093 gcc_assert (dispatcher_node != NULL);
32094 dispatcher_node->dispatcher_function = 1;
32095 dispatcher_version_info
32096 = insert_new_cgraph_node_version (dispatcher_node);
32097 dispatcher_version_info->next = default_version_info;
32098 dispatcher_node->definition = 1;
32100 /* Set the dispatcher for all the versions. */
32101 it_v = default_version_info;
32102 while (it_v != NULL)
32104 it_v->dispatcher_resolver = dispatch_decl;
32105 it_v = it_v->next;
32108 else
32109 #endif
32111 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32112 "multiversioning needs ifunc which is not supported "
32113 "on this target");
32116 return dispatch_decl;
32119 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32120 it to CHAIN. */
32122 static tree
32123 make_attribute (const char *name, const char *arg_name, tree chain)
32125 tree attr_name;
32126 tree attr_arg_name;
32127 tree attr_args;
32128 tree attr;
32130 attr_name = get_identifier (name);
32131 attr_arg_name = build_string (strlen (arg_name), arg_name);
32132 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32133 attr = tree_cons (attr_name, attr_args, chain);
32134 return attr;
32137 /* Make the resolver function decl to dispatch the versions of
32138 a multi-versioned function, DEFAULT_DECL. Create an
32139 empty basic block in the resolver and store the pointer in
32140 EMPTY_BB. Return the decl of the resolver function. */
32142 static tree
32143 make_resolver_func (const tree default_decl,
32144 const tree dispatch_decl,
32145 basic_block *empty_bb)
32147 char *resolver_name;
32148 tree decl, type, decl_name, t;
32149 bool is_uniq = false;
32151 /* IFUNC's have to be globally visible. So, if the default_decl is
32152 not, then the name of the IFUNC should be made unique. */
32153 if (TREE_PUBLIC (default_decl) == 0)
32154 is_uniq = true;
32156 /* Append the filename to the resolver function if the versions are
32157 not externally visible. This is because the resolver function has
32158 to be externally visible for the loader to find it. So, appending
32159 the filename will prevent conflicts with a resolver function from
32160 another module which is based on the same version name. */
32161 resolver_name = make_name (default_decl, "resolver", is_uniq);
32163 /* The resolver function should return a (void *). */
32164 type = build_function_type_list (ptr_type_node, NULL_TREE);
32166 decl = build_fn_decl (resolver_name, type);
32167 decl_name = get_identifier (resolver_name);
32168 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32170 DECL_NAME (decl) = decl_name;
32171 TREE_USED (decl) = 1;
32172 DECL_ARTIFICIAL (decl) = 1;
32173 DECL_IGNORED_P (decl) = 0;
32174 /* IFUNC resolvers have to be externally visible. */
32175 TREE_PUBLIC (decl) = 1;
32176 DECL_UNINLINABLE (decl) = 1;
32178 /* Resolver is not external, body is generated. */
32179 DECL_EXTERNAL (decl) = 0;
32180 DECL_EXTERNAL (dispatch_decl) = 0;
32182 DECL_CONTEXT (decl) = NULL_TREE;
32183 DECL_INITIAL (decl) = make_node (BLOCK);
32184 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32186 if (DECL_COMDAT_GROUP (default_decl)
32187 || TREE_PUBLIC (default_decl))
32189 /* In this case, each translation unit with a call to this
32190 versioned function will put out a resolver. Ensure it
32191 is comdat to keep just one copy. */
32192 DECL_COMDAT (decl) = 1;
32193 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32195 /* Build result decl and add to function_decl. */
32196 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32197 DECL_ARTIFICIAL (t) = 1;
32198 DECL_IGNORED_P (t) = 1;
32199 DECL_RESULT (decl) = t;
32201 gimplify_function_tree (decl);
32202 push_cfun (DECL_STRUCT_FUNCTION (decl));
32203 *empty_bb = init_lowered_empty_function (decl, false);
32205 cgraph_add_new_function (decl, true);
32206 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32208 pop_cfun ();
32210 gcc_assert (dispatch_decl != NULL);
32211 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32212 DECL_ATTRIBUTES (dispatch_decl)
32213 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32215 /* Create the alias for dispatch to resolver here. */
32216 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32217 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32218 XDELETEVEC (resolver_name);
32219 return decl;
32222 /* Generate the dispatching code body to dispatch multi-versioned function
32223 DECL. The target hook is called to process the "target" attributes and
32224 provide the code to dispatch the right function at run-time. NODE points
32225 to the dispatcher decl whose body will be created. */
32227 static tree
32228 ix86_generate_version_dispatcher_body (void *node_p)
32230 tree resolver_decl;
32231 basic_block empty_bb;
32232 tree default_ver_decl;
32233 struct cgraph_node *versn;
32234 struct cgraph_node *node;
32236 struct cgraph_function_version_info *node_version_info = NULL;
32237 struct cgraph_function_version_info *versn_info = NULL;
32239 node = (cgraph_node *)node_p;
32241 node_version_info = get_cgraph_node_version (node);
32242 gcc_assert (node->dispatcher_function
32243 && node_version_info != NULL);
32245 if (node_version_info->dispatcher_resolver)
32246 return node_version_info->dispatcher_resolver;
32248 /* The first version in the chain corresponds to the default version. */
32249 default_ver_decl = node_version_info->next->this_node->decl;
32251 /* node is going to be an alias, so remove the finalized bit. */
32252 node->definition = false;
32254 resolver_decl = make_resolver_func (default_ver_decl,
32255 node->decl, &empty_bb);
32257 node_version_info->dispatcher_resolver = resolver_decl;
32259 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32261 auto_vec<tree, 2> fn_ver_vec;
32263 for (versn_info = node_version_info->next; versn_info;
32264 versn_info = versn_info->next)
32266 versn = versn_info->this_node;
32267 /* Check for virtual functions here again, as by this time it should
32268 have been determined if this function needs a vtable index or
32269 not. This happens for methods in derived classes that override
32270 virtual methods in base classes but are not explicitly marked as
32271 virtual. */
32272 if (DECL_VINDEX (versn->decl))
32273 sorry ("Virtual function multiversioning not supported");
32275 fn_ver_vec.safe_push (versn->decl);
32278 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32279 rebuild_cgraph_edges ();
32280 pop_cfun ();
32281 return resolver_decl;
32283 /* This builds the processor_model struct type defined in
32284 libgcc/config/i386/cpuinfo.c */
32286 static tree
32287 build_processor_model_struct (void)
32289 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32290 "__cpu_features"};
32291 tree field = NULL_TREE, field_chain = NULL_TREE;
32292 int i;
32293 tree type = make_node (RECORD_TYPE);
32295 /* The first 3 fields are unsigned int. */
32296 for (i = 0; i < 3; ++i)
32298 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32299 get_identifier (field_name[i]), unsigned_type_node);
32300 if (field_chain != NULL_TREE)
32301 DECL_CHAIN (field) = field_chain;
32302 field_chain = field;
32305 /* The last field is an array of unsigned integers of size one. */
32306 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32307 get_identifier (field_name[3]),
32308 build_array_type (unsigned_type_node,
32309 build_index_type (size_one_node)));
32310 if (field_chain != NULL_TREE)
32311 DECL_CHAIN (field) = field_chain;
32312 field_chain = field;
32314 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32315 return type;
32318 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32320 static tree
32321 make_var_decl (tree type, const char *name)
32323 tree new_decl;
32325 new_decl = build_decl (UNKNOWN_LOCATION,
32326 VAR_DECL,
32327 get_identifier(name),
32328 type);
32330 DECL_EXTERNAL (new_decl) = 1;
32331 TREE_STATIC (new_decl) = 1;
32332 TREE_PUBLIC (new_decl) = 1;
32333 DECL_INITIAL (new_decl) = 0;
32334 DECL_ARTIFICIAL (new_decl) = 0;
32335 DECL_PRESERVE_P (new_decl) = 1;
32337 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32338 assemble_variable (new_decl, 0, 0, 0);
32340 return new_decl;
32343 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32344 into an integer defined in libgcc/config/i386/cpuinfo.c */
32346 static tree
32347 fold_builtin_cpu (tree fndecl, tree *args)
32349 unsigned int i;
32350 enum ix86_builtins fn_code = (enum ix86_builtins)
32351 DECL_FUNCTION_CODE (fndecl);
32352 tree param_string_cst = NULL;
32354 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32355 enum processor_features
32357 F_CMOV = 0,
32358 F_MMX,
32359 F_POPCNT,
32360 F_SSE,
32361 F_SSE2,
32362 F_SSE3,
32363 F_SSSE3,
32364 F_SSE4_1,
32365 F_SSE4_2,
32366 F_AVX,
32367 F_AVX2,
32368 F_SSE4_A,
32369 F_FMA4,
32370 F_XOP,
32371 F_FMA,
32372 F_MAX
32375 /* These are the values for vendor types and cpu types and subtypes
32376 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32377 the corresponding start value. */
32378 enum processor_model
32380 M_INTEL = 1,
32381 M_AMD,
32382 M_CPU_TYPE_START,
32383 M_INTEL_BONNELL,
32384 M_INTEL_CORE2,
32385 M_INTEL_COREI7,
32386 M_AMDFAM10H,
32387 M_AMDFAM15H,
32388 M_INTEL_SILVERMONT,
32389 M_AMD_BTVER1,
32390 M_AMD_BTVER2,
32391 M_CPU_SUBTYPE_START,
32392 M_INTEL_COREI7_NEHALEM,
32393 M_INTEL_COREI7_WESTMERE,
32394 M_INTEL_COREI7_SANDYBRIDGE,
32395 M_AMDFAM10H_BARCELONA,
32396 M_AMDFAM10H_SHANGHAI,
32397 M_AMDFAM10H_ISTANBUL,
32398 M_AMDFAM15H_BDVER1,
32399 M_AMDFAM15H_BDVER2,
32400 M_AMDFAM15H_BDVER3,
32401 M_AMDFAM15H_BDVER4,
32402 M_INTEL_COREI7_IVYBRIDGE,
32403 M_INTEL_COREI7_HASWELL
32406 static struct _arch_names_table
32408 const char *const name;
32409 const enum processor_model model;
32411 const arch_names_table[] =
32413 {"amd", M_AMD},
32414 {"intel", M_INTEL},
32415 {"atom", M_INTEL_BONNELL},
32416 {"slm", M_INTEL_SILVERMONT},
32417 {"core2", M_INTEL_CORE2},
32418 {"corei7", M_INTEL_COREI7},
32419 {"nehalem", M_INTEL_COREI7_NEHALEM},
32420 {"westmere", M_INTEL_COREI7_WESTMERE},
32421 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32422 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32423 {"haswell", M_INTEL_COREI7_HASWELL},
32424 {"bonnell", M_INTEL_BONNELL},
32425 {"silvermont", M_INTEL_SILVERMONT},
32426 {"amdfam10h", M_AMDFAM10H},
32427 {"barcelona", M_AMDFAM10H_BARCELONA},
32428 {"shanghai", M_AMDFAM10H_SHANGHAI},
32429 {"istanbul", M_AMDFAM10H_ISTANBUL},
32430 {"btver1", M_AMD_BTVER1},
32431 {"amdfam15h", M_AMDFAM15H},
32432 {"bdver1", M_AMDFAM15H_BDVER1},
32433 {"bdver2", M_AMDFAM15H_BDVER2},
32434 {"bdver3", M_AMDFAM15H_BDVER3},
32435 {"bdver4", M_AMDFAM15H_BDVER4},
32436 {"btver2", M_AMD_BTVER2},
32439 static struct _isa_names_table
32441 const char *const name;
32442 const enum processor_features feature;
32444 const isa_names_table[] =
32446 {"cmov", F_CMOV},
32447 {"mmx", F_MMX},
32448 {"popcnt", F_POPCNT},
32449 {"sse", F_SSE},
32450 {"sse2", F_SSE2},
32451 {"sse3", F_SSE3},
32452 {"ssse3", F_SSSE3},
32453 {"sse4a", F_SSE4_A},
32454 {"sse4.1", F_SSE4_1},
32455 {"sse4.2", F_SSE4_2},
32456 {"avx", F_AVX},
32457 {"fma4", F_FMA4},
32458 {"xop", F_XOP},
32459 {"fma", F_FMA},
32460 {"avx2", F_AVX2}
32463 tree __processor_model_type = build_processor_model_struct ();
32464 tree __cpu_model_var = make_var_decl (__processor_model_type,
32465 "__cpu_model");
32468 varpool_add_new_variable (__cpu_model_var);
32470 gcc_assert ((args != NULL) && (*args != NULL));
32472 param_string_cst = *args;
32473 while (param_string_cst
32474 && TREE_CODE (param_string_cst) != STRING_CST)
32476 /* *args must be a expr that can contain other EXPRS leading to a
32477 STRING_CST. */
32478 if (!EXPR_P (param_string_cst))
32480 error ("Parameter to builtin must be a string constant or literal");
32481 return integer_zero_node;
32483 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32486 gcc_assert (param_string_cst);
32488 if (fn_code == IX86_BUILTIN_CPU_IS)
32490 tree ref;
32491 tree field;
32492 tree final;
32494 unsigned int field_val = 0;
32495 unsigned int NUM_ARCH_NAMES
32496 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32498 for (i = 0; i < NUM_ARCH_NAMES; i++)
32499 if (strcmp (arch_names_table[i].name,
32500 TREE_STRING_POINTER (param_string_cst)) == 0)
32501 break;
32503 if (i == NUM_ARCH_NAMES)
32505 error ("Parameter to builtin not valid: %s",
32506 TREE_STRING_POINTER (param_string_cst));
32507 return integer_zero_node;
32510 field = TYPE_FIELDS (__processor_model_type);
32511 field_val = arch_names_table[i].model;
32513 /* CPU types are stored in the next field. */
32514 if (field_val > M_CPU_TYPE_START
32515 && field_val < M_CPU_SUBTYPE_START)
32517 field = DECL_CHAIN (field);
32518 field_val -= M_CPU_TYPE_START;
32521 /* CPU subtypes are stored in the next field. */
32522 if (field_val > M_CPU_SUBTYPE_START)
32524 field = DECL_CHAIN ( DECL_CHAIN (field));
32525 field_val -= M_CPU_SUBTYPE_START;
32528 /* Get the appropriate field in __cpu_model. */
32529 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32530 field, NULL_TREE);
32532 /* Check the value. */
32533 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32534 build_int_cstu (unsigned_type_node, field_val));
32535 return build1 (CONVERT_EXPR, integer_type_node, final);
32537 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32539 tree ref;
32540 tree array_elt;
32541 tree field;
32542 tree final;
32544 unsigned int field_val = 0;
32545 unsigned int NUM_ISA_NAMES
32546 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32548 for (i = 0; i < NUM_ISA_NAMES; i++)
32549 if (strcmp (isa_names_table[i].name,
32550 TREE_STRING_POINTER (param_string_cst)) == 0)
32551 break;
32553 if (i == NUM_ISA_NAMES)
32555 error ("Parameter to builtin not valid: %s",
32556 TREE_STRING_POINTER (param_string_cst));
32557 return integer_zero_node;
32560 field = TYPE_FIELDS (__processor_model_type);
32561 /* Get the last field, which is __cpu_features. */
32562 while (DECL_CHAIN (field))
32563 field = DECL_CHAIN (field);
32565 /* Get the appropriate field: __cpu_model.__cpu_features */
32566 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32567 field, NULL_TREE);
32569 /* Access the 0th element of __cpu_features array. */
32570 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32571 integer_zero_node, NULL_TREE, NULL_TREE);
32573 field_val = (1 << isa_names_table[i].feature);
32574 /* Return __cpu_model.__cpu_features[0] & field_val */
32575 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32576 build_int_cstu (unsigned_type_node, field_val));
32577 return build1 (CONVERT_EXPR, integer_type_node, final);
32579 gcc_unreachable ();
32582 static tree
32583 ix86_fold_builtin (tree fndecl, int n_args,
32584 tree *args, bool ignore ATTRIBUTE_UNUSED)
32586 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32588 enum ix86_builtins fn_code = (enum ix86_builtins)
32589 DECL_FUNCTION_CODE (fndecl);
32590 if (fn_code == IX86_BUILTIN_CPU_IS
32591 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32593 gcc_assert (n_args == 1);
32594 return fold_builtin_cpu (fndecl, args);
32598 #ifdef SUBTARGET_FOLD_BUILTIN
32599 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32600 #endif
32602 return NULL_TREE;
32605 /* Make builtins to detect cpu type and features supported. NAME is
32606 the builtin name, CODE is the builtin code, and FTYPE is the function
32607 type of the builtin. */
32609 static void
32610 make_cpu_type_builtin (const char* name, int code,
32611 enum ix86_builtin_func_type ftype, bool is_const)
32613 tree decl;
32614 tree type;
32616 type = ix86_get_builtin_func_type (ftype);
32617 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32618 NULL, NULL_TREE);
32619 gcc_assert (decl != NULL_TREE);
32620 ix86_builtins[(int) code] = decl;
32621 TREE_READONLY (decl) = is_const;
32624 /* Make builtins to get CPU type and features supported. The created
32625 builtins are :
32627 __builtin_cpu_init (), to detect cpu type and features,
32628 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32629 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32632 static void
32633 ix86_init_platform_type_builtins (void)
32635 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32636 INT_FTYPE_VOID, false);
32637 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32638 INT_FTYPE_PCCHAR, true);
32639 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32640 INT_FTYPE_PCCHAR, true);
32643 /* Internal method for ix86_init_builtins. */
32645 static void
32646 ix86_init_builtins_va_builtins_abi (void)
32648 tree ms_va_ref, sysv_va_ref;
32649 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32650 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32651 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32652 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32654 if (!TARGET_64BIT)
32655 return;
32656 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32657 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32658 ms_va_ref = build_reference_type (ms_va_list_type_node);
32659 sysv_va_ref =
32660 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32662 fnvoid_va_end_ms =
32663 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32664 fnvoid_va_start_ms =
32665 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32666 fnvoid_va_end_sysv =
32667 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32668 fnvoid_va_start_sysv =
32669 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32670 NULL_TREE);
32671 fnvoid_va_copy_ms =
32672 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32673 NULL_TREE);
32674 fnvoid_va_copy_sysv =
32675 build_function_type_list (void_type_node, sysv_va_ref,
32676 sysv_va_ref, NULL_TREE);
32678 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32679 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32680 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32681 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32682 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32683 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32684 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32685 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32686 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32687 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32688 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32689 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32692 static void
32693 ix86_init_builtin_types (void)
32695 tree float128_type_node, float80_type_node;
32697 /* The __float80 type. */
32698 float80_type_node = long_double_type_node;
32699 if (TYPE_MODE (float80_type_node) != XFmode)
32701 /* The __float80 type. */
32702 float80_type_node = make_node (REAL_TYPE);
32704 TYPE_PRECISION (float80_type_node) = 80;
32705 layout_type (float80_type_node);
32707 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32709 /* The __float128 type. */
32710 float128_type_node = make_node (REAL_TYPE);
32711 TYPE_PRECISION (float128_type_node) = 128;
32712 layout_type (float128_type_node);
32713 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32715 /* This macro is built by i386-builtin-types.awk. */
32716 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32719 static void
32720 ix86_init_builtins (void)
32722 tree t;
32724 ix86_init_builtin_types ();
32726 /* Builtins to get CPU type and features. */
32727 ix86_init_platform_type_builtins ();
32729 /* TFmode support builtins. */
32730 def_builtin_const (0, "__builtin_infq",
32731 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32732 def_builtin_const (0, "__builtin_huge_valq",
32733 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32735 /* We will expand them to normal call if SSE isn't available since
32736 they are used by libgcc. */
32737 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32738 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32739 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32740 TREE_READONLY (t) = 1;
32741 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32743 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32744 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32745 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32746 TREE_READONLY (t) = 1;
32747 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32749 ix86_init_tm_builtins ();
32750 ix86_init_mmx_sse_builtins ();
32752 if (TARGET_LP64)
32753 ix86_init_builtins_va_builtins_abi ();
32755 #ifdef SUBTARGET_INIT_BUILTINS
32756 SUBTARGET_INIT_BUILTINS;
32757 #endif
32760 /* Return the ix86 builtin for CODE. */
32762 static tree
32763 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32765 if (code >= IX86_BUILTIN_MAX)
32766 return error_mark_node;
32768 return ix86_builtins[code];
32771 /* Errors in the source file can cause expand_expr to return const0_rtx
32772 where we expect a vector. To avoid crashing, use one of the vector
32773 clear instructions. */
32774 static rtx
32775 safe_vector_operand (rtx x, enum machine_mode mode)
32777 if (x == const0_rtx)
32778 x = CONST0_RTX (mode);
32779 return x;
32782 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32784 static rtx
32785 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32787 rtx pat;
32788 tree arg0 = CALL_EXPR_ARG (exp, 0);
32789 tree arg1 = CALL_EXPR_ARG (exp, 1);
32790 rtx op0 = expand_normal (arg0);
32791 rtx op1 = expand_normal (arg1);
32792 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32793 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32794 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32796 if (VECTOR_MODE_P (mode0))
32797 op0 = safe_vector_operand (op0, mode0);
32798 if (VECTOR_MODE_P (mode1))
32799 op1 = safe_vector_operand (op1, mode1);
32801 if (optimize || !target
32802 || GET_MODE (target) != tmode
32803 || !insn_data[icode].operand[0].predicate (target, tmode))
32804 target = gen_reg_rtx (tmode);
32806 if (GET_MODE (op1) == SImode && mode1 == TImode)
32808 rtx x = gen_reg_rtx (V4SImode);
32809 emit_insn (gen_sse2_loadd (x, op1));
32810 op1 = gen_lowpart (TImode, x);
32813 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32814 op0 = copy_to_mode_reg (mode0, op0);
32815 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32816 op1 = copy_to_mode_reg (mode1, op1);
32818 pat = GEN_FCN (icode) (target, op0, op1);
32819 if (! pat)
32820 return 0;
32822 emit_insn (pat);
32824 return target;
32827 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32829 static rtx
32830 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32831 enum ix86_builtin_func_type m_type,
32832 enum rtx_code sub_code)
32834 rtx pat;
32835 int i;
32836 int nargs;
32837 bool comparison_p = false;
32838 bool tf_p = false;
32839 bool last_arg_constant = false;
32840 int num_memory = 0;
32841 struct {
32842 rtx op;
32843 enum machine_mode mode;
32844 } args[4];
32846 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32848 switch (m_type)
32850 case MULTI_ARG_4_DF2_DI_I:
32851 case MULTI_ARG_4_DF2_DI_I1:
32852 case MULTI_ARG_4_SF2_SI_I:
32853 case MULTI_ARG_4_SF2_SI_I1:
32854 nargs = 4;
32855 last_arg_constant = true;
32856 break;
32858 case MULTI_ARG_3_SF:
32859 case MULTI_ARG_3_DF:
32860 case MULTI_ARG_3_SF2:
32861 case MULTI_ARG_3_DF2:
32862 case MULTI_ARG_3_DI:
32863 case MULTI_ARG_3_SI:
32864 case MULTI_ARG_3_SI_DI:
32865 case MULTI_ARG_3_HI:
32866 case MULTI_ARG_3_HI_SI:
32867 case MULTI_ARG_3_QI:
32868 case MULTI_ARG_3_DI2:
32869 case MULTI_ARG_3_SI2:
32870 case MULTI_ARG_3_HI2:
32871 case MULTI_ARG_3_QI2:
32872 nargs = 3;
32873 break;
32875 case MULTI_ARG_2_SF:
32876 case MULTI_ARG_2_DF:
32877 case MULTI_ARG_2_DI:
32878 case MULTI_ARG_2_SI:
32879 case MULTI_ARG_2_HI:
32880 case MULTI_ARG_2_QI:
32881 nargs = 2;
32882 break;
32884 case MULTI_ARG_2_DI_IMM:
32885 case MULTI_ARG_2_SI_IMM:
32886 case MULTI_ARG_2_HI_IMM:
32887 case MULTI_ARG_2_QI_IMM:
32888 nargs = 2;
32889 last_arg_constant = true;
32890 break;
32892 case MULTI_ARG_1_SF:
32893 case MULTI_ARG_1_DF:
32894 case MULTI_ARG_1_SF2:
32895 case MULTI_ARG_1_DF2:
32896 case MULTI_ARG_1_DI:
32897 case MULTI_ARG_1_SI:
32898 case MULTI_ARG_1_HI:
32899 case MULTI_ARG_1_QI:
32900 case MULTI_ARG_1_SI_DI:
32901 case MULTI_ARG_1_HI_DI:
32902 case MULTI_ARG_1_HI_SI:
32903 case MULTI_ARG_1_QI_DI:
32904 case MULTI_ARG_1_QI_SI:
32905 case MULTI_ARG_1_QI_HI:
32906 nargs = 1;
32907 break;
32909 case MULTI_ARG_2_DI_CMP:
32910 case MULTI_ARG_2_SI_CMP:
32911 case MULTI_ARG_2_HI_CMP:
32912 case MULTI_ARG_2_QI_CMP:
32913 nargs = 2;
32914 comparison_p = true;
32915 break;
32917 case MULTI_ARG_2_SF_TF:
32918 case MULTI_ARG_2_DF_TF:
32919 case MULTI_ARG_2_DI_TF:
32920 case MULTI_ARG_2_SI_TF:
32921 case MULTI_ARG_2_HI_TF:
32922 case MULTI_ARG_2_QI_TF:
32923 nargs = 2;
32924 tf_p = true;
32925 break;
32927 default:
32928 gcc_unreachable ();
32931 if (optimize || !target
32932 || GET_MODE (target) != tmode
32933 || !insn_data[icode].operand[0].predicate (target, tmode))
32934 target = gen_reg_rtx (tmode);
32936 gcc_assert (nargs <= 4);
32938 for (i = 0; i < nargs; i++)
32940 tree arg = CALL_EXPR_ARG (exp, i);
32941 rtx op = expand_normal (arg);
32942 int adjust = (comparison_p) ? 1 : 0;
32943 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32945 if (last_arg_constant && i == nargs - 1)
32947 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32949 enum insn_code new_icode = icode;
32950 switch (icode)
32952 case CODE_FOR_xop_vpermil2v2df3:
32953 case CODE_FOR_xop_vpermil2v4sf3:
32954 case CODE_FOR_xop_vpermil2v4df3:
32955 case CODE_FOR_xop_vpermil2v8sf3:
32956 error ("the last argument must be a 2-bit immediate");
32957 return gen_reg_rtx (tmode);
32958 case CODE_FOR_xop_rotlv2di3:
32959 new_icode = CODE_FOR_rotlv2di3;
32960 goto xop_rotl;
32961 case CODE_FOR_xop_rotlv4si3:
32962 new_icode = CODE_FOR_rotlv4si3;
32963 goto xop_rotl;
32964 case CODE_FOR_xop_rotlv8hi3:
32965 new_icode = CODE_FOR_rotlv8hi3;
32966 goto xop_rotl;
32967 case CODE_FOR_xop_rotlv16qi3:
32968 new_icode = CODE_FOR_rotlv16qi3;
32969 xop_rotl:
32970 if (CONST_INT_P (op))
32972 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32973 op = GEN_INT (INTVAL (op) & mask);
32974 gcc_checking_assert
32975 (insn_data[icode].operand[i + 1].predicate (op, mode));
32977 else
32979 gcc_checking_assert
32980 (nargs == 2
32981 && insn_data[new_icode].operand[0].mode == tmode
32982 && insn_data[new_icode].operand[1].mode == tmode
32983 && insn_data[new_icode].operand[2].mode == mode
32984 && insn_data[new_icode].operand[0].predicate
32985 == insn_data[icode].operand[0].predicate
32986 && insn_data[new_icode].operand[1].predicate
32987 == insn_data[icode].operand[1].predicate);
32988 icode = new_icode;
32989 goto non_constant;
32991 break;
32992 default:
32993 gcc_unreachable ();
32997 else
32999 non_constant:
33000 if (VECTOR_MODE_P (mode))
33001 op = safe_vector_operand (op, mode);
33003 /* If we aren't optimizing, only allow one memory operand to be
33004 generated. */
33005 if (memory_operand (op, mode))
33006 num_memory++;
33008 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33010 if (optimize
33011 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33012 || num_memory > 1)
33013 op = force_reg (mode, op);
33016 args[i].op = op;
33017 args[i].mode = mode;
33020 switch (nargs)
33022 case 1:
33023 pat = GEN_FCN (icode) (target, args[0].op);
33024 break;
33026 case 2:
33027 if (tf_p)
33028 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33029 GEN_INT ((int)sub_code));
33030 else if (! comparison_p)
33031 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33032 else
33034 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33035 args[0].op,
33036 args[1].op);
33038 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33040 break;
33042 case 3:
33043 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33044 break;
33046 case 4:
33047 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33048 break;
33050 default:
33051 gcc_unreachable ();
33054 if (! pat)
33055 return 0;
33057 emit_insn (pat);
33058 return target;
33061 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33062 insns with vec_merge. */
33064 static rtx
33065 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33066 rtx target)
33068 rtx pat;
33069 tree arg0 = CALL_EXPR_ARG (exp, 0);
33070 rtx op1, op0 = expand_normal (arg0);
33071 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33072 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33074 if (optimize || !target
33075 || GET_MODE (target) != tmode
33076 || !insn_data[icode].operand[0].predicate (target, tmode))
33077 target = gen_reg_rtx (tmode);
33079 if (VECTOR_MODE_P (mode0))
33080 op0 = safe_vector_operand (op0, mode0);
33082 if ((optimize && !register_operand (op0, mode0))
33083 || !insn_data[icode].operand[1].predicate (op0, mode0))
33084 op0 = copy_to_mode_reg (mode0, op0);
33086 op1 = op0;
33087 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33088 op1 = copy_to_mode_reg (mode0, op1);
33090 pat = GEN_FCN (icode) (target, op0, op1);
33091 if (! pat)
33092 return 0;
33093 emit_insn (pat);
33094 return target;
33097 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33099 static rtx
33100 ix86_expand_sse_compare (const struct builtin_description *d,
33101 tree exp, rtx target, bool swap)
33103 rtx pat;
33104 tree arg0 = CALL_EXPR_ARG (exp, 0);
33105 tree arg1 = CALL_EXPR_ARG (exp, 1);
33106 rtx op0 = expand_normal (arg0);
33107 rtx op1 = expand_normal (arg1);
33108 rtx op2;
33109 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33110 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33111 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33112 enum rtx_code comparison = d->comparison;
33114 if (VECTOR_MODE_P (mode0))
33115 op0 = safe_vector_operand (op0, mode0);
33116 if (VECTOR_MODE_P (mode1))
33117 op1 = safe_vector_operand (op1, mode1);
33119 /* Swap operands if we have a comparison that isn't available in
33120 hardware. */
33121 if (swap)
33123 rtx tmp = gen_reg_rtx (mode1);
33124 emit_move_insn (tmp, op1);
33125 op1 = op0;
33126 op0 = tmp;
33129 if (optimize || !target
33130 || GET_MODE (target) != tmode
33131 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33132 target = gen_reg_rtx (tmode);
33134 if ((optimize && !register_operand (op0, mode0))
33135 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33136 op0 = copy_to_mode_reg (mode0, op0);
33137 if ((optimize && !register_operand (op1, mode1))
33138 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33139 op1 = copy_to_mode_reg (mode1, op1);
33141 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33142 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33143 if (! pat)
33144 return 0;
33145 emit_insn (pat);
33146 return target;
33149 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33151 static rtx
33152 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33153 rtx target)
33155 rtx pat;
33156 tree arg0 = CALL_EXPR_ARG (exp, 0);
33157 tree arg1 = CALL_EXPR_ARG (exp, 1);
33158 rtx op0 = expand_normal (arg0);
33159 rtx op1 = expand_normal (arg1);
33160 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33161 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33162 enum rtx_code comparison = d->comparison;
33164 if (VECTOR_MODE_P (mode0))
33165 op0 = safe_vector_operand (op0, mode0);
33166 if (VECTOR_MODE_P (mode1))
33167 op1 = safe_vector_operand (op1, mode1);
33169 /* Swap operands if we have a comparison that isn't available in
33170 hardware. */
33171 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33173 rtx tmp = op1;
33174 op1 = op0;
33175 op0 = tmp;
33178 target = gen_reg_rtx (SImode);
33179 emit_move_insn (target, const0_rtx);
33180 target = gen_rtx_SUBREG (QImode, target, 0);
33182 if ((optimize && !register_operand (op0, mode0))
33183 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33184 op0 = copy_to_mode_reg (mode0, op0);
33185 if ((optimize && !register_operand (op1, mode1))
33186 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33187 op1 = copy_to_mode_reg (mode1, op1);
33189 pat = GEN_FCN (d->icode) (op0, op1);
33190 if (! pat)
33191 return 0;
33192 emit_insn (pat);
33193 emit_insn (gen_rtx_SET (VOIDmode,
33194 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33195 gen_rtx_fmt_ee (comparison, QImode,
33196 SET_DEST (pat),
33197 const0_rtx)));
33199 return SUBREG_REG (target);
33202 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33204 static rtx
33205 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33206 rtx target)
33208 rtx pat;
33209 tree arg0 = CALL_EXPR_ARG (exp, 0);
33210 rtx op1, op0 = expand_normal (arg0);
33211 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33212 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33214 if (optimize || target == 0
33215 || GET_MODE (target) != tmode
33216 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33217 target = gen_reg_rtx (tmode);
33219 if (VECTOR_MODE_P (mode0))
33220 op0 = safe_vector_operand (op0, mode0);
33222 if ((optimize && !register_operand (op0, mode0))
33223 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33224 op0 = copy_to_mode_reg (mode0, op0);
33226 op1 = GEN_INT (d->comparison);
33228 pat = GEN_FCN (d->icode) (target, op0, op1);
33229 if (! pat)
33230 return 0;
33231 emit_insn (pat);
33232 return target;
33235 static rtx
33236 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33237 tree exp, rtx target)
33239 rtx pat;
33240 tree arg0 = CALL_EXPR_ARG (exp, 0);
33241 tree arg1 = CALL_EXPR_ARG (exp, 1);
33242 rtx op0 = expand_normal (arg0);
33243 rtx op1 = expand_normal (arg1);
33244 rtx op2;
33245 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33246 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33247 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33249 if (optimize || target == 0
33250 || GET_MODE (target) != tmode
33251 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33252 target = gen_reg_rtx (tmode);
33254 op0 = safe_vector_operand (op0, mode0);
33255 op1 = safe_vector_operand (op1, mode1);
33257 if ((optimize && !register_operand (op0, mode0))
33258 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33259 op0 = copy_to_mode_reg (mode0, op0);
33260 if ((optimize && !register_operand (op1, mode1))
33261 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33262 op1 = copy_to_mode_reg (mode1, op1);
33264 op2 = GEN_INT (d->comparison);
33266 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33267 if (! pat)
33268 return 0;
33269 emit_insn (pat);
33270 return target;
33273 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33275 static rtx
33276 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33277 rtx target)
33279 rtx pat;
33280 tree arg0 = CALL_EXPR_ARG (exp, 0);
33281 tree arg1 = CALL_EXPR_ARG (exp, 1);
33282 rtx op0 = expand_normal (arg0);
33283 rtx op1 = expand_normal (arg1);
33284 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33285 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33286 enum rtx_code comparison = d->comparison;
33288 if (VECTOR_MODE_P (mode0))
33289 op0 = safe_vector_operand (op0, mode0);
33290 if (VECTOR_MODE_P (mode1))
33291 op1 = safe_vector_operand (op1, mode1);
33293 target = gen_reg_rtx (SImode);
33294 emit_move_insn (target, const0_rtx);
33295 target = gen_rtx_SUBREG (QImode, target, 0);
33297 if ((optimize && !register_operand (op0, mode0))
33298 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33299 op0 = copy_to_mode_reg (mode0, op0);
33300 if ((optimize && !register_operand (op1, mode1))
33301 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33302 op1 = copy_to_mode_reg (mode1, op1);
33304 pat = GEN_FCN (d->icode) (op0, op1);
33305 if (! pat)
33306 return 0;
33307 emit_insn (pat);
33308 emit_insn (gen_rtx_SET (VOIDmode,
33309 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33310 gen_rtx_fmt_ee (comparison, QImode,
33311 SET_DEST (pat),
33312 const0_rtx)));
33314 return SUBREG_REG (target);
33317 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33319 static rtx
33320 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33321 tree exp, rtx target)
33323 rtx pat;
33324 tree arg0 = CALL_EXPR_ARG (exp, 0);
33325 tree arg1 = CALL_EXPR_ARG (exp, 1);
33326 tree arg2 = CALL_EXPR_ARG (exp, 2);
33327 tree arg3 = CALL_EXPR_ARG (exp, 3);
33328 tree arg4 = CALL_EXPR_ARG (exp, 4);
33329 rtx scratch0, scratch1;
33330 rtx op0 = expand_normal (arg0);
33331 rtx op1 = expand_normal (arg1);
33332 rtx op2 = expand_normal (arg2);
33333 rtx op3 = expand_normal (arg3);
33334 rtx op4 = expand_normal (arg4);
33335 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33337 tmode0 = insn_data[d->icode].operand[0].mode;
33338 tmode1 = insn_data[d->icode].operand[1].mode;
33339 modev2 = insn_data[d->icode].operand[2].mode;
33340 modei3 = insn_data[d->icode].operand[3].mode;
33341 modev4 = insn_data[d->icode].operand[4].mode;
33342 modei5 = insn_data[d->icode].operand[5].mode;
33343 modeimm = insn_data[d->icode].operand[6].mode;
33345 if (VECTOR_MODE_P (modev2))
33346 op0 = safe_vector_operand (op0, modev2);
33347 if (VECTOR_MODE_P (modev4))
33348 op2 = safe_vector_operand (op2, modev4);
33350 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33351 op0 = copy_to_mode_reg (modev2, op0);
33352 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33353 op1 = copy_to_mode_reg (modei3, op1);
33354 if ((optimize && !register_operand (op2, modev4))
33355 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33356 op2 = copy_to_mode_reg (modev4, op2);
33357 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33358 op3 = copy_to_mode_reg (modei5, op3);
33360 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33362 error ("the fifth argument must be an 8-bit immediate");
33363 return const0_rtx;
33366 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33368 if (optimize || !target
33369 || GET_MODE (target) != tmode0
33370 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33371 target = gen_reg_rtx (tmode0);
33373 scratch1 = gen_reg_rtx (tmode1);
33375 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33377 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33379 if (optimize || !target
33380 || GET_MODE (target) != tmode1
33381 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33382 target = gen_reg_rtx (tmode1);
33384 scratch0 = gen_reg_rtx (tmode0);
33386 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33388 else
33390 gcc_assert (d->flag);
33392 scratch0 = gen_reg_rtx (tmode0);
33393 scratch1 = gen_reg_rtx (tmode1);
33395 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33398 if (! pat)
33399 return 0;
33401 emit_insn (pat);
33403 if (d->flag)
33405 target = gen_reg_rtx (SImode);
33406 emit_move_insn (target, const0_rtx);
33407 target = gen_rtx_SUBREG (QImode, target, 0);
33409 emit_insn
33410 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33411 gen_rtx_fmt_ee (EQ, QImode,
33412 gen_rtx_REG ((enum machine_mode) d->flag,
33413 FLAGS_REG),
33414 const0_rtx)));
33415 return SUBREG_REG (target);
33417 else
33418 return target;
33422 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33424 static rtx
33425 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33426 tree exp, rtx target)
33428 rtx pat;
33429 tree arg0 = CALL_EXPR_ARG (exp, 0);
33430 tree arg1 = CALL_EXPR_ARG (exp, 1);
33431 tree arg2 = CALL_EXPR_ARG (exp, 2);
33432 rtx scratch0, scratch1;
33433 rtx op0 = expand_normal (arg0);
33434 rtx op1 = expand_normal (arg1);
33435 rtx op2 = expand_normal (arg2);
33436 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33438 tmode0 = insn_data[d->icode].operand[0].mode;
33439 tmode1 = insn_data[d->icode].operand[1].mode;
33440 modev2 = insn_data[d->icode].operand[2].mode;
33441 modev3 = insn_data[d->icode].operand[3].mode;
33442 modeimm = insn_data[d->icode].operand[4].mode;
33444 if (VECTOR_MODE_P (modev2))
33445 op0 = safe_vector_operand (op0, modev2);
33446 if (VECTOR_MODE_P (modev3))
33447 op1 = safe_vector_operand (op1, modev3);
33449 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33450 op0 = copy_to_mode_reg (modev2, op0);
33451 if ((optimize && !register_operand (op1, modev3))
33452 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33453 op1 = copy_to_mode_reg (modev3, op1);
33455 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33457 error ("the third argument must be an 8-bit immediate");
33458 return const0_rtx;
33461 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33463 if (optimize || !target
33464 || GET_MODE (target) != tmode0
33465 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33466 target = gen_reg_rtx (tmode0);
33468 scratch1 = gen_reg_rtx (tmode1);
33470 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33472 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33474 if (optimize || !target
33475 || GET_MODE (target) != tmode1
33476 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33477 target = gen_reg_rtx (tmode1);
33479 scratch0 = gen_reg_rtx (tmode0);
33481 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33483 else
33485 gcc_assert (d->flag);
33487 scratch0 = gen_reg_rtx (tmode0);
33488 scratch1 = gen_reg_rtx (tmode1);
33490 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33493 if (! pat)
33494 return 0;
33496 emit_insn (pat);
33498 if (d->flag)
33500 target = gen_reg_rtx (SImode);
33501 emit_move_insn (target, const0_rtx);
33502 target = gen_rtx_SUBREG (QImode, target, 0);
33504 emit_insn
33505 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33506 gen_rtx_fmt_ee (EQ, QImode,
33507 gen_rtx_REG ((enum machine_mode) d->flag,
33508 FLAGS_REG),
33509 const0_rtx)));
33510 return SUBREG_REG (target);
33512 else
33513 return target;
33516 /* Subroutine of ix86_expand_builtin to take care of insns with
33517 variable number of operands. */
33519 static rtx
33520 ix86_expand_args_builtin (const struct builtin_description *d,
33521 tree exp, rtx target)
33523 rtx pat, real_target;
33524 unsigned int i, nargs;
33525 unsigned int nargs_constant = 0;
33526 unsigned int mask_pos = 0;
33527 int num_memory = 0;
33528 struct
33530 rtx op;
33531 enum machine_mode mode;
33532 } args[6];
33533 bool last_arg_count = false;
33534 enum insn_code icode = d->icode;
33535 const struct insn_data_d *insn_p = &insn_data[icode];
33536 enum machine_mode tmode = insn_p->operand[0].mode;
33537 enum machine_mode rmode = VOIDmode;
33538 bool swap = false;
33539 enum rtx_code comparison = d->comparison;
33541 switch ((enum ix86_builtin_func_type) d->flag)
33543 case V2DF_FTYPE_V2DF_ROUND:
33544 case V4DF_FTYPE_V4DF_ROUND:
33545 case V4SF_FTYPE_V4SF_ROUND:
33546 case V8SF_FTYPE_V8SF_ROUND:
33547 case V4SI_FTYPE_V4SF_ROUND:
33548 case V8SI_FTYPE_V8SF_ROUND:
33549 return ix86_expand_sse_round (d, exp, target);
33550 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33551 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33552 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33553 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33554 case INT_FTYPE_V8SF_V8SF_PTEST:
33555 case INT_FTYPE_V4DI_V4DI_PTEST:
33556 case INT_FTYPE_V4DF_V4DF_PTEST:
33557 case INT_FTYPE_V4SF_V4SF_PTEST:
33558 case INT_FTYPE_V2DI_V2DI_PTEST:
33559 case INT_FTYPE_V2DF_V2DF_PTEST:
33560 return ix86_expand_sse_ptest (d, exp, target);
33561 case FLOAT128_FTYPE_FLOAT128:
33562 case FLOAT_FTYPE_FLOAT:
33563 case INT_FTYPE_INT:
33564 case UINT64_FTYPE_INT:
33565 case UINT16_FTYPE_UINT16:
33566 case INT64_FTYPE_INT64:
33567 case INT64_FTYPE_V4SF:
33568 case INT64_FTYPE_V2DF:
33569 case INT_FTYPE_V16QI:
33570 case INT_FTYPE_V8QI:
33571 case INT_FTYPE_V8SF:
33572 case INT_FTYPE_V4DF:
33573 case INT_FTYPE_V4SF:
33574 case INT_FTYPE_V2DF:
33575 case INT_FTYPE_V32QI:
33576 case V16QI_FTYPE_V16QI:
33577 case V8SI_FTYPE_V8SF:
33578 case V8SI_FTYPE_V4SI:
33579 case V8HI_FTYPE_V8HI:
33580 case V8HI_FTYPE_V16QI:
33581 case V8QI_FTYPE_V8QI:
33582 case V8SF_FTYPE_V8SF:
33583 case V8SF_FTYPE_V8SI:
33584 case V8SF_FTYPE_V4SF:
33585 case V8SF_FTYPE_V8HI:
33586 case V4SI_FTYPE_V4SI:
33587 case V4SI_FTYPE_V16QI:
33588 case V4SI_FTYPE_V4SF:
33589 case V4SI_FTYPE_V8SI:
33590 case V4SI_FTYPE_V8HI:
33591 case V4SI_FTYPE_V4DF:
33592 case V4SI_FTYPE_V2DF:
33593 case V4HI_FTYPE_V4HI:
33594 case V4DF_FTYPE_V4DF:
33595 case V4DF_FTYPE_V4SI:
33596 case V4DF_FTYPE_V4SF:
33597 case V4DF_FTYPE_V2DF:
33598 case V4SF_FTYPE_V4SF:
33599 case V4SF_FTYPE_V4SI:
33600 case V4SF_FTYPE_V8SF:
33601 case V4SF_FTYPE_V4DF:
33602 case V4SF_FTYPE_V8HI:
33603 case V4SF_FTYPE_V2DF:
33604 case V2DI_FTYPE_V2DI:
33605 case V2DI_FTYPE_V16QI:
33606 case V2DI_FTYPE_V8HI:
33607 case V2DI_FTYPE_V4SI:
33608 case V2DF_FTYPE_V2DF:
33609 case V2DF_FTYPE_V4SI:
33610 case V2DF_FTYPE_V4DF:
33611 case V2DF_FTYPE_V4SF:
33612 case V2DF_FTYPE_V2SI:
33613 case V2SI_FTYPE_V2SI:
33614 case V2SI_FTYPE_V4SF:
33615 case V2SI_FTYPE_V2SF:
33616 case V2SI_FTYPE_V2DF:
33617 case V2SF_FTYPE_V2SF:
33618 case V2SF_FTYPE_V2SI:
33619 case V32QI_FTYPE_V32QI:
33620 case V32QI_FTYPE_V16QI:
33621 case V16HI_FTYPE_V16HI:
33622 case V16HI_FTYPE_V8HI:
33623 case V8SI_FTYPE_V8SI:
33624 case V16HI_FTYPE_V16QI:
33625 case V8SI_FTYPE_V16QI:
33626 case V4DI_FTYPE_V16QI:
33627 case V8SI_FTYPE_V8HI:
33628 case V4DI_FTYPE_V8HI:
33629 case V4DI_FTYPE_V4SI:
33630 case V4DI_FTYPE_V2DI:
33631 case HI_FTYPE_HI:
33632 case UINT_FTYPE_V2DF:
33633 case UINT_FTYPE_V4SF:
33634 case UINT64_FTYPE_V2DF:
33635 case UINT64_FTYPE_V4SF:
33636 case V16QI_FTYPE_V8DI:
33637 case V16HI_FTYPE_V16SI:
33638 case V16SI_FTYPE_HI:
33639 case V16SI_FTYPE_V16SI:
33640 case V16SI_FTYPE_INT:
33641 case V16SF_FTYPE_FLOAT:
33642 case V16SF_FTYPE_V4SF:
33643 case V16SF_FTYPE_V16SF:
33644 case V8HI_FTYPE_V8DI:
33645 case V8UHI_FTYPE_V8UHI:
33646 case V8SI_FTYPE_V8DI:
33647 case V8USI_FTYPE_V8USI:
33648 case V8SF_FTYPE_V8DF:
33649 case V8DI_FTYPE_QI:
33650 case V8DI_FTYPE_INT64:
33651 case V8DI_FTYPE_V4DI:
33652 case V8DI_FTYPE_V8DI:
33653 case V8DF_FTYPE_DOUBLE:
33654 case V8DF_FTYPE_V4DF:
33655 case V8DF_FTYPE_V8DF:
33656 case V8DF_FTYPE_V8SI:
33657 nargs = 1;
33658 break;
33659 case V4SF_FTYPE_V4SF_VEC_MERGE:
33660 case V2DF_FTYPE_V2DF_VEC_MERGE:
33661 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33662 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33663 case V16QI_FTYPE_V16QI_V16QI:
33664 case V16QI_FTYPE_V8HI_V8HI:
33665 case V16SI_FTYPE_V16SI_V16SI:
33666 case V16SF_FTYPE_V16SF_V16SF:
33667 case V16SF_FTYPE_V16SF_V16SI:
33668 case V8QI_FTYPE_V8QI_V8QI:
33669 case V8QI_FTYPE_V4HI_V4HI:
33670 case V8HI_FTYPE_V8HI_V8HI:
33671 case V8HI_FTYPE_V16QI_V16QI:
33672 case V8HI_FTYPE_V4SI_V4SI:
33673 case V8SF_FTYPE_V8SF_V8SF:
33674 case V8SF_FTYPE_V8SF_V8SI:
33675 case V8DI_FTYPE_V8DI_V8DI:
33676 case V8DF_FTYPE_V8DF_V8DF:
33677 case V8DF_FTYPE_V8DF_V8DI:
33678 case V4SI_FTYPE_V4SI_V4SI:
33679 case V4SI_FTYPE_V8HI_V8HI:
33680 case V4SI_FTYPE_V4SF_V4SF:
33681 case V4SI_FTYPE_V2DF_V2DF:
33682 case V4HI_FTYPE_V4HI_V4HI:
33683 case V4HI_FTYPE_V8QI_V8QI:
33684 case V4HI_FTYPE_V2SI_V2SI:
33685 case V4DF_FTYPE_V4DF_V4DF:
33686 case V4DF_FTYPE_V4DF_V4DI:
33687 case V4SF_FTYPE_V4SF_V4SF:
33688 case V4SF_FTYPE_V4SF_V4SI:
33689 case V4SF_FTYPE_V4SF_V2SI:
33690 case V4SF_FTYPE_V4SF_V2DF:
33691 case V4SF_FTYPE_V4SF_UINT:
33692 case V4SF_FTYPE_V4SF_UINT64:
33693 case V4SF_FTYPE_V4SF_DI:
33694 case V4SF_FTYPE_V4SF_SI:
33695 case V2DI_FTYPE_V2DI_V2DI:
33696 case V2DI_FTYPE_V16QI_V16QI:
33697 case V2DI_FTYPE_V4SI_V4SI:
33698 case V2UDI_FTYPE_V4USI_V4USI:
33699 case V2DI_FTYPE_V2DI_V16QI:
33700 case V2DI_FTYPE_V2DF_V2DF:
33701 case V2SI_FTYPE_V2SI_V2SI:
33702 case V2SI_FTYPE_V4HI_V4HI:
33703 case V2SI_FTYPE_V2SF_V2SF:
33704 case V2DF_FTYPE_V2DF_V2DF:
33705 case V2DF_FTYPE_V2DF_V4SF:
33706 case V2DF_FTYPE_V2DF_V2DI:
33707 case V2DF_FTYPE_V2DF_DI:
33708 case V2DF_FTYPE_V2DF_SI:
33709 case V2DF_FTYPE_V2DF_UINT:
33710 case V2DF_FTYPE_V2DF_UINT64:
33711 case V2SF_FTYPE_V2SF_V2SF:
33712 case V1DI_FTYPE_V1DI_V1DI:
33713 case V1DI_FTYPE_V8QI_V8QI:
33714 case V1DI_FTYPE_V2SI_V2SI:
33715 case V32QI_FTYPE_V16HI_V16HI:
33716 case V16HI_FTYPE_V8SI_V8SI:
33717 case V32QI_FTYPE_V32QI_V32QI:
33718 case V16HI_FTYPE_V32QI_V32QI:
33719 case V16HI_FTYPE_V16HI_V16HI:
33720 case V8SI_FTYPE_V4DF_V4DF:
33721 case V8SI_FTYPE_V8SI_V8SI:
33722 case V8SI_FTYPE_V16HI_V16HI:
33723 case V4DI_FTYPE_V4DI_V4DI:
33724 case V4DI_FTYPE_V8SI_V8SI:
33725 case V4UDI_FTYPE_V8USI_V8USI:
33726 case QI_FTYPE_V8DI_V8DI:
33727 case HI_FTYPE_V16SI_V16SI:
33728 if (comparison == UNKNOWN)
33729 return ix86_expand_binop_builtin (icode, exp, target);
33730 nargs = 2;
33731 break;
33732 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33733 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33734 gcc_assert (comparison != UNKNOWN);
33735 nargs = 2;
33736 swap = true;
33737 break;
33738 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33739 case V16HI_FTYPE_V16HI_SI_COUNT:
33740 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33741 case V8SI_FTYPE_V8SI_SI_COUNT:
33742 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33743 case V4DI_FTYPE_V4DI_INT_COUNT:
33744 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33745 case V8HI_FTYPE_V8HI_SI_COUNT:
33746 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33747 case V4SI_FTYPE_V4SI_SI_COUNT:
33748 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33749 case V4HI_FTYPE_V4HI_SI_COUNT:
33750 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33751 case V2DI_FTYPE_V2DI_SI_COUNT:
33752 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33753 case V2SI_FTYPE_V2SI_SI_COUNT:
33754 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33755 case V1DI_FTYPE_V1DI_SI_COUNT:
33756 nargs = 2;
33757 last_arg_count = true;
33758 break;
33759 case UINT64_FTYPE_UINT64_UINT64:
33760 case UINT_FTYPE_UINT_UINT:
33761 case UINT_FTYPE_UINT_USHORT:
33762 case UINT_FTYPE_UINT_UCHAR:
33763 case UINT16_FTYPE_UINT16_INT:
33764 case UINT8_FTYPE_UINT8_INT:
33765 case HI_FTYPE_HI_HI:
33766 case V16SI_FTYPE_V8DF_V8DF:
33767 nargs = 2;
33768 break;
33769 case V2DI_FTYPE_V2DI_INT_CONVERT:
33770 nargs = 2;
33771 rmode = V1TImode;
33772 nargs_constant = 1;
33773 break;
33774 case V4DI_FTYPE_V4DI_INT_CONVERT:
33775 nargs = 2;
33776 rmode = V2TImode;
33777 nargs_constant = 1;
33778 break;
33779 case V8HI_FTYPE_V8HI_INT:
33780 case V8HI_FTYPE_V8SF_INT:
33781 case V16HI_FTYPE_V16SF_INT:
33782 case V8HI_FTYPE_V4SF_INT:
33783 case V8SF_FTYPE_V8SF_INT:
33784 case V4SF_FTYPE_V16SF_INT:
33785 case V16SF_FTYPE_V16SF_INT:
33786 case V4SI_FTYPE_V4SI_INT:
33787 case V4SI_FTYPE_V8SI_INT:
33788 case V4HI_FTYPE_V4HI_INT:
33789 case V4DF_FTYPE_V4DF_INT:
33790 case V4DF_FTYPE_V8DF_INT:
33791 case V4SF_FTYPE_V4SF_INT:
33792 case V4SF_FTYPE_V8SF_INT:
33793 case V2DI_FTYPE_V2DI_INT:
33794 case V2DF_FTYPE_V2DF_INT:
33795 case V2DF_FTYPE_V4DF_INT:
33796 case V16HI_FTYPE_V16HI_INT:
33797 case V8SI_FTYPE_V8SI_INT:
33798 case V16SI_FTYPE_V16SI_INT:
33799 case V4SI_FTYPE_V16SI_INT:
33800 case V4DI_FTYPE_V4DI_INT:
33801 case V2DI_FTYPE_V4DI_INT:
33802 case V4DI_FTYPE_V8DI_INT:
33803 case HI_FTYPE_HI_INT:
33804 nargs = 2;
33805 nargs_constant = 1;
33806 break;
33807 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33808 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33809 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33810 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33811 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33812 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33813 case HI_FTYPE_V16SI_V16SI_HI:
33814 case QI_FTYPE_V8DI_V8DI_QI:
33815 case V16HI_FTYPE_V16SI_V16HI_HI:
33816 case V16QI_FTYPE_V16SI_V16QI_HI:
33817 case V16QI_FTYPE_V8DI_V16QI_QI:
33818 case V16SF_FTYPE_V16SF_V16SF_HI:
33819 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33820 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33821 case V16SF_FTYPE_V16SI_V16SF_HI:
33822 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33823 case V16SF_FTYPE_V4SF_V16SF_HI:
33824 case V16SI_FTYPE_SI_V16SI_HI:
33825 case V16SI_FTYPE_V16HI_V16SI_HI:
33826 case V16SI_FTYPE_V16QI_V16SI_HI:
33827 case V16SI_FTYPE_V16SF_V16SI_HI:
33828 case V16SI_FTYPE_V16SI_V16SI_HI:
33829 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33830 case V16SI_FTYPE_V4SI_V16SI_HI:
33831 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33832 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33833 case V8DF_FTYPE_V2DF_V8DF_QI:
33834 case V8DF_FTYPE_V4DF_V8DF_QI:
33835 case V8DF_FTYPE_V8DF_V8DF_QI:
33836 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33837 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33838 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33839 case V8DF_FTYPE_V8SF_V8DF_QI:
33840 case V8DF_FTYPE_V8SI_V8DF_QI:
33841 case V8DI_FTYPE_DI_V8DI_QI:
33842 case V8DI_FTYPE_V16QI_V8DI_QI:
33843 case V8DI_FTYPE_V2DI_V8DI_QI:
33844 case V8DI_FTYPE_V4DI_V8DI_QI:
33845 case V8DI_FTYPE_V8DI_V8DI_QI:
33846 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33847 case V8DI_FTYPE_V8HI_V8DI_QI:
33848 case V8DI_FTYPE_V8SI_V8DI_QI:
33849 case V8HI_FTYPE_V8DI_V8HI_QI:
33850 case V8SF_FTYPE_V8DF_V8SF_QI:
33851 case V8SI_FTYPE_V8DF_V8SI_QI:
33852 case V8SI_FTYPE_V8DI_V8SI_QI:
33853 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33854 nargs = 3;
33855 break;
33856 case V32QI_FTYPE_V32QI_V32QI_INT:
33857 case V16HI_FTYPE_V16HI_V16HI_INT:
33858 case V16QI_FTYPE_V16QI_V16QI_INT:
33859 case V4DI_FTYPE_V4DI_V4DI_INT:
33860 case V8HI_FTYPE_V8HI_V8HI_INT:
33861 case V8SI_FTYPE_V8SI_V8SI_INT:
33862 case V8SI_FTYPE_V8SI_V4SI_INT:
33863 case V8SF_FTYPE_V8SF_V8SF_INT:
33864 case V8SF_FTYPE_V8SF_V4SF_INT:
33865 case V4SI_FTYPE_V4SI_V4SI_INT:
33866 case V4DF_FTYPE_V4DF_V4DF_INT:
33867 case V16SF_FTYPE_V16SF_V16SF_INT:
33868 case V16SF_FTYPE_V16SF_V4SF_INT:
33869 case V16SI_FTYPE_V16SI_V4SI_INT:
33870 case V4DF_FTYPE_V4DF_V2DF_INT:
33871 case V4SF_FTYPE_V4SF_V4SF_INT:
33872 case V2DI_FTYPE_V2DI_V2DI_INT:
33873 case V4DI_FTYPE_V4DI_V2DI_INT:
33874 case V2DF_FTYPE_V2DF_V2DF_INT:
33875 case QI_FTYPE_V8DI_V8DI_INT:
33876 case QI_FTYPE_V8DF_V8DF_INT:
33877 case QI_FTYPE_V2DF_V2DF_INT:
33878 case QI_FTYPE_V4SF_V4SF_INT:
33879 case HI_FTYPE_V16SI_V16SI_INT:
33880 case HI_FTYPE_V16SF_V16SF_INT:
33881 nargs = 3;
33882 nargs_constant = 1;
33883 break;
33884 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33885 nargs = 3;
33886 rmode = V4DImode;
33887 nargs_constant = 1;
33888 break;
33889 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33890 nargs = 3;
33891 rmode = V2DImode;
33892 nargs_constant = 1;
33893 break;
33894 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33895 nargs = 3;
33896 rmode = DImode;
33897 nargs_constant = 1;
33898 break;
33899 case V2DI_FTYPE_V2DI_UINT_UINT:
33900 nargs = 3;
33901 nargs_constant = 2;
33902 break;
33903 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33904 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33905 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33906 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33907 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33908 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33909 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33910 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33911 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33912 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33913 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33914 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33915 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33916 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33917 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33918 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33919 nargs = 4;
33920 break;
33921 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33922 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33923 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33924 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33925 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33926 nargs = 4;
33927 nargs_constant = 1;
33928 break;
33929 case QI_FTYPE_V2DF_V2DF_INT_QI:
33930 case QI_FTYPE_V4SF_V4SF_INT_QI:
33931 nargs = 4;
33932 mask_pos = 1;
33933 nargs_constant = 1;
33934 break;
33935 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33936 nargs = 4;
33937 nargs_constant = 2;
33938 break;
33939 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33940 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33941 nargs = 4;
33942 break;
33943 case QI_FTYPE_V8DI_V8DI_INT_QI:
33944 case HI_FTYPE_V16SI_V16SI_INT_HI:
33945 case QI_FTYPE_V8DF_V8DF_INT_QI:
33946 case HI_FTYPE_V16SF_V16SF_INT_HI:
33947 mask_pos = 1;
33948 nargs = 4;
33949 nargs_constant = 1;
33950 break;
33951 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33952 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33953 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33954 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33955 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33956 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33957 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33958 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33959 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33960 nargs = 4;
33961 mask_pos = 2;
33962 nargs_constant = 1;
33963 break;
33964 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33965 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33966 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33967 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33968 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33969 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33970 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33971 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33972 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33973 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33974 nargs = 5;
33975 mask_pos = 2;
33976 nargs_constant = 1;
33977 break;
33978 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33979 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33980 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33981 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33982 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33983 nargs = 5;
33984 mask_pos = 1;
33985 nargs_constant = 1;
33986 break;
33988 default:
33989 gcc_unreachable ();
33992 gcc_assert (nargs <= ARRAY_SIZE (args));
33994 if (comparison != UNKNOWN)
33996 gcc_assert (nargs == 2);
33997 return ix86_expand_sse_compare (d, exp, target, swap);
34000 if (rmode == VOIDmode || rmode == tmode)
34002 if (optimize
34003 || target == 0
34004 || GET_MODE (target) != tmode
34005 || !insn_p->operand[0].predicate (target, tmode))
34006 target = gen_reg_rtx (tmode);
34007 real_target = target;
34009 else
34011 real_target = gen_reg_rtx (tmode);
34012 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34015 for (i = 0; i < nargs; i++)
34017 tree arg = CALL_EXPR_ARG (exp, i);
34018 rtx op = expand_normal (arg);
34019 enum machine_mode mode = insn_p->operand[i + 1].mode;
34020 bool match = insn_p->operand[i + 1].predicate (op, mode);
34022 if (last_arg_count && (i + 1) == nargs)
34024 /* SIMD shift insns take either an 8-bit immediate or
34025 register as count. But builtin functions take int as
34026 count. If count doesn't match, we put it in register. */
34027 if (!match)
34029 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34030 if (!insn_p->operand[i + 1].predicate (op, mode))
34031 op = copy_to_reg (op);
34034 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34035 (!mask_pos && (nargs - i) <= nargs_constant))
34037 if (!match)
34038 switch (icode)
34040 case CODE_FOR_avx2_inserti128:
34041 case CODE_FOR_avx2_extracti128:
34042 error ("the last argument must be an 1-bit immediate");
34043 return const0_rtx;
34045 case CODE_FOR_avx512f_cmpv8di3_mask:
34046 case CODE_FOR_avx512f_cmpv16si3_mask:
34047 case CODE_FOR_avx512f_ucmpv8di3_mask:
34048 case CODE_FOR_avx512f_ucmpv16si3_mask:
34049 error ("the last argument must be a 3-bit immediate");
34050 return const0_rtx;
34052 case CODE_FOR_sse4_1_roundsd:
34053 case CODE_FOR_sse4_1_roundss:
34055 case CODE_FOR_sse4_1_roundpd:
34056 case CODE_FOR_sse4_1_roundps:
34057 case CODE_FOR_avx_roundpd256:
34058 case CODE_FOR_avx_roundps256:
34060 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34061 case CODE_FOR_sse4_1_roundps_sfix:
34062 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34063 case CODE_FOR_avx_roundps_sfix256:
34065 case CODE_FOR_sse4_1_blendps:
34066 case CODE_FOR_avx_blendpd256:
34067 case CODE_FOR_avx_vpermilv4df:
34068 case CODE_FOR_avx512f_getmantv8df_mask:
34069 case CODE_FOR_avx512f_getmantv16sf_mask:
34070 error ("the last argument must be a 4-bit immediate");
34071 return const0_rtx;
34073 case CODE_FOR_sha1rnds4:
34074 case CODE_FOR_sse4_1_blendpd:
34075 case CODE_FOR_avx_vpermilv2df:
34076 case CODE_FOR_xop_vpermil2v2df3:
34077 case CODE_FOR_xop_vpermil2v4sf3:
34078 case CODE_FOR_xop_vpermil2v4df3:
34079 case CODE_FOR_xop_vpermil2v8sf3:
34080 case CODE_FOR_avx512f_vinsertf32x4_mask:
34081 case CODE_FOR_avx512f_vinserti32x4_mask:
34082 case CODE_FOR_avx512f_vextractf32x4_mask:
34083 case CODE_FOR_avx512f_vextracti32x4_mask:
34084 error ("the last argument must be a 2-bit immediate");
34085 return const0_rtx;
34087 case CODE_FOR_avx_vextractf128v4df:
34088 case CODE_FOR_avx_vextractf128v8sf:
34089 case CODE_FOR_avx_vextractf128v8si:
34090 case CODE_FOR_avx_vinsertf128v4df:
34091 case CODE_FOR_avx_vinsertf128v8sf:
34092 case CODE_FOR_avx_vinsertf128v8si:
34093 case CODE_FOR_avx512f_vinsertf64x4_mask:
34094 case CODE_FOR_avx512f_vinserti64x4_mask:
34095 case CODE_FOR_avx512f_vextractf64x4_mask:
34096 case CODE_FOR_avx512f_vextracti64x4_mask:
34097 error ("the last argument must be a 1-bit immediate");
34098 return const0_rtx;
34100 case CODE_FOR_avx_vmcmpv2df3:
34101 case CODE_FOR_avx_vmcmpv4sf3:
34102 case CODE_FOR_avx_cmpv2df3:
34103 case CODE_FOR_avx_cmpv4sf3:
34104 case CODE_FOR_avx_cmpv4df3:
34105 case CODE_FOR_avx_cmpv8sf3:
34106 case CODE_FOR_avx512f_cmpv8df3_mask:
34107 case CODE_FOR_avx512f_cmpv16sf3_mask:
34108 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34109 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34110 error ("the last argument must be a 5-bit immediate");
34111 return const0_rtx;
34113 default:
34114 switch (nargs_constant)
34116 case 2:
34117 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34118 (!mask_pos && (nargs - i) == nargs_constant))
34120 error ("the next to last argument must be an 8-bit immediate");
34121 break;
34123 case 1:
34124 error ("the last argument must be an 8-bit immediate");
34125 break;
34126 default:
34127 gcc_unreachable ();
34129 return const0_rtx;
34132 else
34134 if (VECTOR_MODE_P (mode))
34135 op = safe_vector_operand (op, mode);
34137 /* If we aren't optimizing, only allow one memory operand to
34138 be generated. */
34139 if (memory_operand (op, mode))
34140 num_memory++;
34142 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34144 if (optimize || !match || num_memory > 1)
34145 op = copy_to_mode_reg (mode, op);
34147 else
34149 op = copy_to_reg (op);
34150 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34154 args[i].op = op;
34155 args[i].mode = mode;
34158 switch (nargs)
34160 case 1:
34161 pat = GEN_FCN (icode) (real_target, args[0].op);
34162 break;
34163 case 2:
34164 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34165 break;
34166 case 3:
34167 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34168 args[2].op);
34169 break;
34170 case 4:
34171 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34172 args[2].op, args[3].op);
34173 break;
34174 case 5:
34175 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34176 args[2].op, args[3].op, args[4].op);
34177 case 6:
34178 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34179 args[2].op, args[3].op, args[4].op,
34180 args[5].op);
34181 break;
34182 default:
34183 gcc_unreachable ();
34186 if (! pat)
34187 return 0;
34189 emit_insn (pat);
34190 return target;
34193 /* Transform pattern of following layout:
34194 (parallel [
34195 set (A B)
34196 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34198 into:
34199 (set (A B))
34202 (parallel [ A B
34204 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34207 into:
34208 (parallel [ A B ... ]) */
34210 static rtx
34211 ix86_erase_embedded_rounding (rtx pat)
34213 if (GET_CODE (pat) == INSN)
34214 pat = PATTERN (pat);
34216 gcc_assert (GET_CODE (pat) == PARALLEL);
34218 if (XVECLEN (pat, 0) == 2)
34220 rtx p0 = XVECEXP (pat, 0, 0);
34221 rtx p1 = XVECEXP (pat, 0, 1);
34223 gcc_assert (GET_CODE (p0) == SET
34224 && GET_CODE (p1) == UNSPEC
34225 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34227 return p0;
34229 else
34231 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34232 int i = 0;
34233 int j = 0;
34235 for (; i < XVECLEN (pat, 0); ++i)
34237 rtx elem = XVECEXP (pat, 0, i);
34238 if (GET_CODE (elem) != UNSPEC
34239 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34240 res [j++] = elem;
34243 /* No more than 1 occurence was removed. */
34244 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34246 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34250 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34251 with rounding. */
34252 static rtx
34253 ix86_expand_sse_comi_round (const struct builtin_description *d,
34254 tree exp, rtx target)
34256 rtx pat, set_dst;
34257 tree arg0 = CALL_EXPR_ARG (exp, 0);
34258 tree arg1 = CALL_EXPR_ARG (exp, 1);
34259 tree arg2 = CALL_EXPR_ARG (exp, 2);
34260 tree arg3 = CALL_EXPR_ARG (exp, 3);
34261 rtx op0 = expand_normal (arg0);
34262 rtx op1 = expand_normal (arg1);
34263 rtx op2 = expand_normal (arg2);
34264 rtx op3 = expand_normal (arg3);
34265 enum insn_code icode = d->icode;
34266 const struct insn_data_d *insn_p = &insn_data[icode];
34267 enum machine_mode mode0 = insn_p->operand[0].mode;
34268 enum machine_mode mode1 = insn_p->operand[1].mode;
34269 enum rtx_code comparison = UNEQ;
34270 bool need_ucomi = false;
34272 /* See avxintrin.h for values. */
34273 enum rtx_code comi_comparisons[32] =
34275 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34276 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34277 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34279 bool need_ucomi_values[32] =
34281 true, false, false, true, true, false, false, true,
34282 true, false, false, true, true, false, false, true,
34283 false, true, true, false, false, true, true, false,
34284 false, true, true, false, false, true, true, false
34287 if (!CONST_INT_P (op2))
34289 error ("the third argument must be comparison constant");
34290 return const0_rtx;
34292 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34294 error ("incorect comparison mode");
34295 return const0_rtx;
34298 if (!insn_p->operand[2].predicate (op3, SImode))
34300 error ("incorrect rounding operand");
34301 return const0_rtx;
34304 comparison = comi_comparisons[INTVAL (op2)];
34305 need_ucomi = need_ucomi_values[INTVAL (op2)];
34307 if (VECTOR_MODE_P (mode0))
34308 op0 = safe_vector_operand (op0, mode0);
34309 if (VECTOR_MODE_P (mode1))
34310 op1 = safe_vector_operand (op1, mode1);
34312 target = gen_reg_rtx (SImode);
34313 emit_move_insn (target, const0_rtx);
34314 target = gen_rtx_SUBREG (QImode, target, 0);
34316 if ((optimize && !register_operand (op0, mode0))
34317 || !insn_p->operand[0].predicate (op0, mode0))
34318 op0 = copy_to_mode_reg (mode0, op0);
34319 if ((optimize && !register_operand (op1, mode1))
34320 || !insn_p->operand[1].predicate (op1, mode1))
34321 op1 = copy_to_mode_reg (mode1, op1);
34323 if (need_ucomi)
34324 icode = icode == CODE_FOR_sse_comi_round
34325 ? CODE_FOR_sse_ucomi_round
34326 : CODE_FOR_sse2_ucomi_round;
34328 pat = GEN_FCN (icode) (op0, op1, op3);
34329 if (! pat)
34330 return 0;
34332 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34333 if (INTVAL (op3) == NO_ROUND)
34335 pat = ix86_erase_embedded_rounding (pat);
34336 if (! pat)
34337 return 0;
34339 set_dst = SET_DEST (pat);
34341 else
34343 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34344 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34347 emit_insn (pat);
34348 emit_insn (gen_rtx_SET (VOIDmode,
34349 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34350 gen_rtx_fmt_ee (comparison, QImode,
34351 set_dst,
34352 const0_rtx)));
34354 return SUBREG_REG (target);
34357 static rtx
34358 ix86_expand_round_builtin (const struct builtin_description *d,
34359 tree exp, rtx target)
34361 rtx pat;
34362 unsigned int i, nargs;
34363 struct
34365 rtx op;
34366 enum machine_mode mode;
34367 } args[6];
34368 enum insn_code icode = d->icode;
34369 const struct insn_data_d *insn_p = &insn_data[icode];
34370 enum machine_mode tmode = insn_p->operand[0].mode;
34371 unsigned int nargs_constant = 0;
34372 unsigned int redundant_embed_rnd = 0;
34374 switch ((enum ix86_builtin_func_type) d->flag)
34376 case UINT64_FTYPE_V2DF_INT:
34377 case UINT64_FTYPE_V4SF_INT:
34378 case UINT_FTYPE_V2DF_INT:
34379 case UINT_FTYPE_V4SF_INT:
34380 case INT64_FTYPE_V2DF_INT:
34381 case INT64_FTYPE_V4SF_INT:
34382 case INT_FTYPE_V2DF_INT:
34383 case INT_FTYPE_V4SF_INT:
34384 nargs = 2;
34385 break;
34386 case V4SF_FTYPE_V4SF_UINT_INT:
34387 case V4SF_FTYPE_V4SF_UINT64_INT:
34388 case V2DF_FTYPE_V2DF_UINT64_INT:
34389 case V4SF_FTYPE_V4SF_INT_INT:
34390 case V4SF_FTYPE_V4SF_INT64_INT:
34391 case V2DF_FTYPE_V2DF_INT64_INT:
34392 case V4SF_FTYPE_V4SF_V4SF_INT:
34393 case V2DF_FTYPE_V2DF_V2DF_INT:
34394 case V4SF_FTYPE_V4SF_V2DF_INT:
34395 case V2DF_FTYPE_V2DF_V4SF_INT:
34396 nargs = 3;
34397 break;
34398 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34399 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34400 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34401 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34402 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34403 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34404 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34405 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34406 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34407 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34408 nargs = 4;
34409 break;
34410 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34411 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34412 nargs_constant = 2;
34413 nargs = 4;
34414 break;
34415 case INT_FTYPE_V4SF_V4SF_INT_INT:
34416 case INT_FTYPE_V2DF_V2DF_INT_INT:
34417 return ix86_expand_sse_comi_round (d, exp, target);
34418 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34419 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34420 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34421 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34422 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34423 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34424 nargs = 5;
34425 break;
34426 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34427 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34428 nargs_constant = 4;
34429 nargs = 5;
34430 break;
34431 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34432 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34433 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34434 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34435 nargs_constant = 3;
34436 nargs = 5;
34437 break;
34438 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34439 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34440 nargs = 6;
34441 nargs_constant = 4;
34442 break;
34443 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34444 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34445 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34446 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34447 nargs = 6;
34448 nargs_constant = 3;
34449 break;
34450 default:
34451 gcc_unreachable ();
34453 gcc_assert (nargs <= ARRAY_SIZE (args));
34455 if (optimize
34456 || target == 0
34457 || GET_MODE (target) != tmode
34458 || !insn_p->operand[0].predicate (target, tmode))
34459 target = gen_reg_rtx (tmode);
34461 for (i = 0; i < nargs; i++)
34463 tree arg = CALL_EXPR_ARG (exp, i);
34464 rtx op = expand_normal (arg);
34465 enum machine_mode mode = insn_p->operand[i + 1].mode;
34466 bool match = insn_p->operand[i + 1].predicate (op, mode);
34468 if (i == nargs - nargs_constant)
34470 if (!match)
34472 switch (icode)
34474 case CODE_FOR_avx512f_getmantv8df_mask_round:
34475 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34476 case CODE_FOR_avx512f_getmantv2df_round:
34477 case CODE_FOR_avx512f_getmantv4sf_round:
34478 error ("the immediate argument must be a 4-bit immediate");
34479 return const0_rtx;
34480 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34481 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34482 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34483 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34484 error ("the immediate argument must be a 5-bit immediate");
34485 return const0_rtx;
34486 default:
34487 error ("the immediate argument must be an 8-bit immediate");
34488 return const0_rtx;
34492 else if (i == nargs-1)
34494 if (!insn_p->operand[nargs].predicate (op, SImode))
34496 error ("incorrect rounding operand");
34497 return const0_rtx;
34500 /* If there is no rounding use normal version of the pattern. */
34501 if (INTVAL (op) == NO_ROUND)
34502 redundant_embed_rnd = 1;
34504 else
34506 if (VECTOR_MODE_P (mode))
34507 op = safe_vector_operand (op, mode);
34509 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34511 if (optimize || !match)
34512 op = copy_to_mode_reg (mode, op);
34514 else
34516 op = copy_to_reg (op);
34517 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34521 args[i].op = op;
34522 args[i].mode = mode;
34525 switch (nargs)
34527 case 1:
34528 pat = GEN_FCN (icode) (target, args[0].op);
34529 break;
34530 case 2:
34531 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34532 break;
34533 case 3:
34534 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34535 args[2].op);
34536 break;
34537 case 4:
34538 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34539 args[2].op, args[3].op);
34540 break;
34541 case 5:
34542 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34543 args[2].op, args[3].op, args[4].op);
34544 case 6:
34545 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34546 args[2].op, args[3].op, args[4].op,
34547 args[5].op);
34548 break;
34549 default:
34550 gcc_unreachable ();
34553 if (!pat)
34554 return 0;
34556 if (redundant_embed_rnd)
34557 pat = ix86_erase_embedded_rounding (pat);
34559 emit_insn (pat);
34560 return target;
34563 /* Subroutine of ix86_expand_builtin to take care of special insns
34564 with variable number of operands. */
34566 static rtx
34567 ix86_expand_special_args_builtin (const struct builtin_description *d,
34568 tree exp, rtx target)
34570 tree arg;
34571 rtx pat, op;
34572 unsigned int i, nargs, arg_adjust, memory;
34573 bool aligned_mem = false;
34574 struct
34576 rtx op;
34577 enum machine_mode mode;
34578 } args[3];
34579 enum insn_code icode = d->icode;
34580 bool last_arg_constant = false;
34581 const struct insn_data_d *insn_p = &insn_data[icode];
34582 enum machine_mode tmode = insn_p->operand[0].mode;
34583 enum { load, store } klass;
34585 switch ((enum ix86_builtin_func_type) d->flag)
34587 case VOID_FTYPE_VOID:
34588 emit_insn (GEN_FCN (icode) (target));
34589 return 0;
34590 case VOID_FTYPE_UINT64:
34591 case VOID_FTYPE_UNSIGNED:
34592 nargs = 0;
34593 klass = store;
34594 memory = 0;
34595 break;
34597 case INT_FTYPE_VOID:
34598 case UINT64_FTYPE_VOID:
34599 case UNSIGNED_FTYPE_VOID:
34600 nargs = 0;
34601 klass = load;
34602 memory = 0;
34603 break;
34604 case UINT64_FTYPE_PUNSIGNED:
34605 case V2DI_FTYPE_PV2DI:
34606 case V4DI_FTYPE_PV4DI:
34607 case V32QI_FTYPE_PCCHAR:
34608 case V16QI_FTYPE_PCCHAR:
34609 case V8SF_FTYPE_PCV4SF:
34610 case V8SF_FTYPE_PCFLOAT:
34611 case V4SF_FTYPE_PCFLOAT:
34612 case V4DF_FTYPE_PCV2DF:
34613 case V4DF_FTYPE_PCDOUBLE:
34614 case V2DF_FTYPE_PCDOUBLE:
34615 case VOID_FTYPE_PVOID:
34616 case V16SI_FTYPE_PV4SI:
34617 case V16SF_FTYPE_PV4SF:
34618 case V8DI_FTYPE_PV4DI:
34619 case V8DI_FTYPE_PV8DI:
34620 case V8DF_FTYPE_PV4DF:
34621 nargs = 1;
34622 klass = load;
34623 memory = 0;
34624 switch (icode)
34626 case CODE_FOR_sse4_1_movntdqa:
34627 case CODE_FOR_avx2_movntdqa:
34628 case CODE_FOR_avx512f_movntdqa:
34629 aligned_mem = true;
34630 break;
34631 default:
34632 break;
34634 break;
34635 case VOID_FTYPE_PV2SF_V4SF:
34636 case VOID_FTYPE_PV8DI_V8DI:
34637 case VOID_FTYPE_PV4DI_V4DI:
34638 case VOID_FTYPE_PV2DI_V2DI:
34639 case VOID_FTYPE_PCHAR_V32QI:
34640 case VOID_FTYPE_PCHAR_V16QI:
34641 case VOID_FTYPE_PFLOAT_V16SF:
34642 case VOID_FTYPE_PFLOAT_V8SF:
34643 case VOID_FTYPE_PFLOAT_V4SF:
34644 case VOID_FTYPE_PDOUBLE_V8DF:
34645 case VOID_FTYPE_PDOUBLE_V4DF:
34646 case VOID_FTYPE_PDOUBLE_V2DF:
34647 case VOID_FTYPE_PLONGLONG_LONGLONG:
34648 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34649 case VOID_FTYPE_PINT_INT:
34650 nargs = 1;
34651 klass = store;
34652 /* Reserve memory operand for target. */
34653 memory = ARRAY_SIZE (args);
34654 switch (icode)
34656 /* These builtins and instructions require the memory
34657 to be properly aligned. */
34658 case CODE_FOR_avx_movntv4di:
34659 case CODE_FOR_sse2_movntv2di:
34660 case CODE_FOR_avx_movntv8sf:
34661 case CODE_FOR_sse_movntv4sf:
34662 case CODE_FOR_sse4a_vmmovntv4sf:
34663 case CODE_FOR_avx_movntv4df:
34664 case CODE_FOR_sse2_movntv2df:
34665 case CODE_FOR_sse4a_vmmovntv2df:
34666 case CODE_FOR_sse2_movntidi:
34667 case CODE_FOR_sse_movntq:
34668 case CODE_FOR_sse2_movntisi:
34669 case CODE_FOR_avx512f_movntv16sf:
34670 case CODE_FOR_avx512f_movntv8df:
34671 case CODE_FOR_avx512f_movntv8di:
34672 aligned_mem = true;
34673 break;
34674 default:
34675 break;
34677 break;
34678 case V4SF_FTYPE_V4SF_PCV2SF:
34679 case V2DF_FTYPE_V2DF_PCDOUBLE:
34680 nargs = 2;
34681 klass = load;
34682 memory = 1;
34683 break;
34684 case V8SF_FTYPE_PCV8SF_V8SI:
34685 case V4DF_FTYPE_PCV4DF_V4DI:
34686 case V4SF_FTYPE_PCV4SF_V4SI:
34687 case V2DF_FTYPE_PCV2DF_V2DI:
34688 case V8SI_FTYPE_PCV8SI_V8SI:
34689 case V4DI_FTYPE_PCV4DI_V4DI:
34690 case V4SI_FTYPE_PCV4SI_V4SI:
34691 case V2DI_FTYPE_PCV2DI_V2DI:
34692 nargs = 2;
34693 klass = load;
34694 memory = 0;
34695 break;
34696 case VOID_FTYPE_PV8DF_V8DF_QI:
34697 case VOID_FTYPE_PV16SF_V16SF_HI:
34698 case VOID_FTYPE_PV8DI_V8DI_QI:
34699 case VOID_FTYPE_PV16SI_V16SI_HI:
34700 switch (icode)
34702 /* These builtins and instructions require the memory
34703 to be properly aligned. */
34704 case CODE_FOR_avx512f_storev16sf_mask:
34705 case CODE_FOR_avx512f_storev16si_mask:
34706 case CODE_FOR_avx512f_storev8df_mask:
34707 case CODE_FOR_avx512f_storev8di_mask:
34708 aligned_mem = true;
34709 break;
34710 default:
34711 break;
34713 /* FALLTHRU */
34714 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34715 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34716 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34717 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34718 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34719 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34720 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34721 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34722 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34723 case VOID_FTYPE_PFLOAT_V4SF_QI:
34724 case VOID_FTYPE_PV8SI_V8DI_QI:
34725 case VOID_FTYPE_PV8HI_V8DI_QI:
34726 case VOID_FTYPE_PV16HI_V16SI_HI:
34727 case VOID_FTYPE_PV16QI_V8DI_QI:
34728 case VOID_FTYPE_PV16QI_V16SI_HI:
34729 nargs = 2;
34730 klass = store;
34731 /* Reserve memory operand for target. */
34732 memory = ARRAY_SIZE (args);
34733 break;
34734 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34735 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34736 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34737 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34738 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34739 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34740 nargs = 3;
34741 klass = load;
34742 memory = 0;
34743 switch (icode)
34745 /* These builtins and instructions require the memory
34746 to be properly aligned. */
34747 case CODE_FOR_avx512f_loadv16sf_mask:
34748 case CODE_FOR_avx512f_loadv16si_mask:
34749 case CODE_FOR_avx512f_loadv8df_mask:
34750 case CODE_FOR_avx512f_loadv8di_mask:
34751 aligned_mem = true;
34752 break;
34753 default:
34754 break;
34756 break;
34757 case VOID_FTYPE_UINT_UINT_UINT:
34758 case VOID_FTYPE_UINT64_UINT_UINT:
34759 case UCHAR_FTYPE_UINT_UINT_UINT:
34760 case UCHAR_FTYPE_UINT64_UINT_UINT:
34761 nargs = 3;
34762 klass = load;
34763 memory = ARRAY_SIZE (args);
34764 last_arg_constant = true;
34765 break;
34766 default:
34767 gcc_unreachable ();
34770 gcc_assert (nargs <= ARRAY_SIZE (args));
34772 if (klass == store)
34774 arg = CALL_EXPR_ARG (exp, 0);
34775 op = expand_normal (arg);
34776 gcc_assert (target == 0);
34777 if (memory)
34779 op = ix86_zero_extend_to_Pmode (op);
34780 target = gen_rtx_MEM (tmode, op);
34781 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34782 on it. Try to improve it using get_pointer_alignment,
34783 and if the special builtin is one that requires strict
34784 mode alignment, also from it's GET_MODE_ALIGNMENT.
34785 Failure to do so could lead to ix86_legitimate_combined_insn
34786 rejecting all changes to such insns. */
34787 unsigned int align = get_pointer_alignment (arg);
34788 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34789 align = GET_MODE_ALIGNMENT (tmode);
34790 if (MEM_ALIGN (target) < align)
34791 set_mem_align (target, align);
34793 else
34794 target = force_reg (tmode, op);
34795 arg_adjust = 1;
34797 else
34799 arg_adjust = 0;
34800 if (optimize
34801 || target == 0
34802 || !register_operand (target, tmode)
34803 || GET_MODE (target) != tmode)
34804 target = gen_reg_rtx (tmode);
34807 for (i = 0; i < nargs; i++)
34809 enum machine_mode mode = insn_p->operand[i + 1].mode;
34810 bool match;
34812 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34813 op = expand_normal (arg);
34814 match = insn_p->operand[i + 1].predicate (op, mode);
34816 if (last_arg_constant && (i + 1) == nargs)
34818 if (!match)
34820 if (icode == CODE_FOR_lwp_lwpvalsi3
34821 || icode == CODE_FOR_lwp_lwpinssi3
34822 || icode == CODE_FOR_lwp_lwpvaldi3
34823 || icode == CODE_FOR_lwp_lwpinsdi3)
34824 error ("the last argument must be a 32-bit immediate");
34825 else
34826 error ("the last argument must be an 8-bit immediate");
34827 return const0_rtx;
34830 else
34832 if (i == memory)
34834 /* This must be the memory operand. */
34835 op = ix86_zero_extend_to_Pmode (op);
34836 op = gen_rtx_MEM (mode, op);
34837 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34838 on it. Try to improve it using get_pointer_alignment,
34839 and if the special builtin is one that requires strict
34840 mode alignment, also from it's GET_MODE_ALIGNMENT.
34841 Failure to do so could lead to ix86_legitimate_combined_insn
34842 rejecting all changes to such insns. */
34843 unsigned int align = get_pointer_alignment (arg);
34844 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34845 align = GET_MODE_ALIGNMENT (mode);
34846 if (MEM_ALIGN (op) < align)
34847 set_mem_align (op, align);
34849 else
34851 /* This must be register. */
34852 if (VECTOR_MODE_P (mode))
34853 op = safe_vector_operand (op, mode);
34855 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34856 op = copy_to_mode_reg (mode, op);
34857 else
34859 op = copy_to_reg (op);
34860 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34865 args[i].op = op;
34866 args[i].mode = mode;
34869 switch (nargs)
34871 case 0:
34872 pat = GEN_FCN (icode) (target);
34873 break;
34874 case 1:
34875 pat = GEN_FCN (icode) (target, args[0].op);
34876 break;
34877 case 2:
34878 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34879 break;
34880 case 3:
34881 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34882 break;
34883 default:
34884 gcc_unreachable ();
34887 if (! pat)
34888 return 0;
34889 emit_insn (pat);
34890 return klass == store ? 0 : target;
34893 /* Return the integer constant in ARG. Constrain it to be in the range
34894 of the subparts of VEC_TYPE; issue an error if not. */
34896 static int
34897 get_element_number (tree vec_type, tree arg)
34899 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34901 if (!tree_fits_uhwi_p (arg)
34902 || (elt = tree_to_uhwi (arg), elt > max))
34904 error ("selector must be an integer constant in the range 0..%wi", max);
34905 return 0;
34908 return elt;
34911 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34912 ix86_expand_vector_init. We DO have language-level syntax for this, in
34913 the form of (type){ init-list }. Except that since we can't place emms
34914 instructions from inside the compiler, we can't allow the use of MMX
34915 registers unless the user explicitly asks for it. So we do *not* define
34916 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34917 we have builtins invoked by mmintrin.h that gives us license to emit
34918 these sorts of instructions. */
34920 static rtx
34921 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34923 enum machine_mode tmode = TYPE_MODE (type);
34924 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34925 int i, n_elt = GET_MODE_NUNITS (tmode);
34926 rtvec v = rtvec_alloc (n_elt);
34928 gcc_assert (VECTOR_MODE_P (tmode));
34929 gcc_assert (call_expr_nargs (exp) == n_elt);
34931 for (i = 0; i < n_elt; ++i)
34933 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34934 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34937 if (!target || !register_operand (target, tmode))
34938 target = gen_reg_rtx (tmode);
34940 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34941 return target;
34944 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34945 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34946 had a language-level syntax for referencing vector elements. */
34948 static rtx
34949 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34951 enum machine_mode tmode, mode0;
34952 tree arg0, arg1;
34953 int elt;
34954 rtx op0;
34956 arg0 = CALL_EXPR_ARG (exp, 0);
34957 arg1 = CALL_EXPR_ARG (exp, 1);
34959 op0 = expand_normal (arg0);
34960 elt = get_element_number (TREE_TYPE (arg0), arg1);
34962 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34963 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34964 gcc_assert (VECTOR_MODE_P (mode0));
34966 op0 = force_reg (mode0, op0);
34968 if (optimize || !target || !register_operand (target, tmode))
34969 target = gen_reg_rtx (tmode);
34971 ix86_expand_vector_extract (true, target, op0, elt);
34973 return target;
34976 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34977 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34978 a language-level syntax for referencing vector elements. */
34980 static rtx
34981 ix86_expand_vec_set_builtin (tree exp)
34983 enum machine_mode tmode, mode1;
34984 tree arg0, arg1, arg2;
34985 int elt;
34986 rtx op0, op1, target;
34988 arg0 = CALL_EXPR_ARG (exp, 0);
34989 arg1 = CALL_EXPR_ARG (exp, 1);
34990 arg2 = CALL_EXPR_ARG (exp, 2);
34992 tmode = TYPE_MODE (TREE_TYPE (arg0));
34993 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34994 gcc_assert (VECTOR_MODE_P (tmode));
34996 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34997 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34998 elt = get_element_number (TREE_TYPE (arg0), arg2);
35000 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35001 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35003 op0 = force_reg (tmode, op0);
35004 op1 = force_reg (mode1, op1);
35006 /* OP0 is the source of these builtin functions and shouldn't be
35007 modified. Create a copy, use it and return it as target. */
35008 target = gen_reg_rtx (tmode);
35009 emit_move_insn (target, op0);
35010 ix86_expand_vector_set (true, target, op1, elt);
35012 return target;
35015 /* Expand an expression EXP that calls a built-in function,
35016 with result going to TARGET if that's convenient
35017 (and in mode MODE if that's convenient).
35018 SUBTARGET may be used as the target for computing one of EXP's operands.
35019 IGNORE is nonzero if the value is to be ignored. */
35021 static rtx
35022 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35023 enum machine_mode mode, int ignore)
35025 const struct builtin_description *d;
35026 size_t i;
35027 enum insn_code icode;
35028 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35029 tree arg0, arg1, arg2, arg3, arg4;
35030 rtx op0, op1, op2, op3, op4, pat, insn;
35031 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35032 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35034 /* For CPU builtins that can be folded, fold first and expand the fold. */
35035 switch (fcode)
35037 case IX86_BUILTIN_CPU_INIT:
35039 /* Make it call __cpu_indicator_init in libgcc. */
35040 tree call_expr, fndecl, type;
35041 type = build_function_type_list (integer_type_node, NULL_TREE);
35042 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35043 call_expr = build_call_expr (fndecl, 0);
35044 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35046 case IX86_BUILTIN_CPU_IS:
35047 case IX86_BUILTIN_CPU_SUPPORTS:
35049 tree arg0 = CALL_EXPR_ARG (exp, 0);
35050 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35051 gcc_assert (fold_expr != NULL_TREE);
35052 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35056 /* Determine whether the builtin function is available under the current ISA.
35057 Originally the builtin was not created if it wasn't applicable to the
35058 current ISA based on the command line switches. With function specific
35059 options, we need to check in the context of the function making the call
35060 whether it is supported. */
35061 if (ix86_builtins_isa[fcode].isa
35062 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35064 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35065 NULL, (enum fpmath_unit) 0, false);
35067 if (!opts)
35068 error ("%qE needs unknown isa option", fndecl);
35069 else
35071 gcc_assert (opts != NULL);
35072 error ("%qE needs isa option %s", fndecl, opts);
35073 free (opts);
35075 return const0_rtx;
35078 switch (fcode)
35080 case IX86_BUILTIN_MASKMOVQ:
35081 case IX86_BUILTIN_MASKMOVDQU:
35082 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35083 ? CODE_FOR_mmx_maskmovq
35084 : CODE_FOR_sse2_maskmovdqu);
35085 /* Note the arg order is different from the operand order. */
35086 arg1 = CALL_EXPR_ARG (exp, 0);
35087 arg2 = CALL_EXPR_ARG (exp, 1);
35088 arg0 = CALL_EXPR_ARG (exp, 2);
35089 op0 = expand_normal (arg0);
35090 op1 = expand_normal (arg1);
35091 op2 = expand_normal (arg2);
35092 mode0 = insn_data[icode].operand[0].mode;
35093 mode1 = insn_data[icode].operand[1].mode;
35094 mode2 = insn_data[icode].operand[2].mode;
35096 op0 = ix86_zero_extend_to_Pmode (op0);
35097 op0 = gen_rtx_MEM (mode1, op0);
35099 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35100 op0 = copy_to_mode_reg (mode0, op0);
35101 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35102 op1 = copy_to_mode_reg (mode1, op1);
35103 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35104 op2 = copy_to_mode_reg (mode2, op2);
35105 pat = GEN_FCN (icode) (op0, op1, op2);
35106 if (! pat)
35107 return 0;
35108 emit_insn (pat);
35109 return 0;
35111 case IX86_BUILTIN_LDMXCSR:
35112 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35113 target = assign_386_stack_local (SImode, SLOT_TEMP);
35114 emit_move_insn (target, op0);
35115 emit_insn (gen_sse_ldmxcsr (target));
35116 return 0;
35118 case IX86_BUILTIN_STMXCSR:
35119 target = assign_386_stack_local (SImode, SLOT_TEMP);
35120 emit_insn (gen_sse_stmxcsr (target));
35121 return copy_to_mode_reg (SImode, target);
35123 case IX86_BUILTIN_CLFLUSH:
35124 arg0 = CALL_EXPR_ARG (exp, 0);
35125 op0 = expand_normal (arg0);
35126 icode = CODE_FOR_sse2_clflush;
35127 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35128 op0 = ix86_zero_extend_to_Pmode (op0);
35130 emit_insn (gen_sse2_clflush (op0));
35131 return 0;
35133 case IX86_BUILTIN_CLFLUSHOPT:
35134 arg0 = CALL_EXPR_ARG (exp, 0);
35135 op0 = expand_normal (arg0);
35136 icode = CODE_FOR_clflushopt;
35137 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35138 op0 = ix86_zero_extend_to_Pmode (op0);
35140 emit_insn (gen_clflushopt (op0));
35141 return 0;
35143 case IX86_BUILTIN_MONITOR:
35144 arg0 = CALL_EXPR_ARG (exp, 0);
35145 arg1 = CALL_EXPR_ARG (exp, 1);
35146 arg2 = CALL_EXPR_ARG (exp, 2);
35147 op0 = expand_normal (arg0);
35148 op1 = expand_normal (arg1);
35149 op2 = expand_normal (arg2);
35150 if (!REG_P (op0))
35151 op0 = ix86_zero_extend_to_Pmode (op0);
35152 if (!REG_P (op1))
35153 op1 = copy_to_mode_reg (SImode, op1);
35154 if (!REG_P (op2))
35155 op2 = copy_to_mode_reg (SImode, op2);
35156 emit_insn (ix86_gen_monitor (op0, op1, op2));
35157 return 0;
35159 case IX86_BUILTIN_MWAIT:
35160 arg0 = CALL_EXPR_ARG (exp, 0);
35161 arg1 = CALL_EXPR_ARG (exp, 1);
35162 op0 = expand_normal (arg0);
35163 op1 = expand_normal (arg1);
35164 if (!REG_P (op0))
35165 op0 = copy_to_mode_reg (SImode, op0);
35166 if (!REG_P (op1))
35167 op1 = copy_to_mode_reg (SImode, op1);
35168 emit_insn (gen_sse3_mwait (op0, op1));
35169 return 0;
35171 case IX86_BUILTIN_VEC_INIT_V2SI:
35172 case IX86_BUILTIN_VEC_INIT_V4HI:
35173 case IX86_BUILTIN_VEC_INIT_V8QI:
35174 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35176 case IX86_BUILTIN_VEC_EXT_V2DF:
35177 case IX86_BUILTIN_VEC_EXT_V2DI:
35178 case IX86_BUILTIN_VEC_EXT_V4SF:
35179 case IX86_BUILTIN_VEC_EXT_V4SI:
35180 case IX86_BUILTIN_VEC_EXT_V8HI:
35181 case IX86_BUILTIN_VEC_EXT_V2SI:
35182 case IX86_BUILTIN_VEC_EXT_V4HI:
35183 case IX86_BUILTIN_VEC_EXT_V16QI:
35184 return ix86_expand_vec_ext_builtin (exp, target);
35186 case IX86_BUILTIN_VEC_SET_V2DI:
35187 case IX86_BUILTIN_VEC_SET_V4SF:
35188 case IX86_BUILTIN_VEC_SET_V4SI:
35189 case IX86_BUILTIN_VEC_SET_V8HI:
35190 case IX86_BUILTIN_VEC_SET_V4HI:
35191 case IX86_BUILTIN_VEC_SET_V16QI:
35192 return ix86_expand_vec_set_builtin (exp);
35194 case IX86_BUILTIN_INFQ:
35195 case IX86_BUILTIN_HUGE_VALQ:
35197 REAL_VALUE_TYPE inf;
35198 rtx tmp;
35200 real_inf (&inf);
35201 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35203 tmp = validize_mem (force_const_mem (mode, tmp));
35205 if (target == 0)
35206 target = gen_reg_rtx (mode);
35208 emit_move_insn (target, tmp);
35209 return target;
35212 case IX86_BUILTIN_RDPMC:
35213 case IX86_BUILTIN_RDTSC:
35214 case IX86_BUILTIN_RDTSCP:
35216 op0 = gen_reg_rtx (DImode);
35217 op1 = gen_reg_rtx (DImode);
35219 if (fcode == IX86_BUILTIN_RDPMC)
35221 arg0 = CALL_EXPR_ARG (exp, 0);
35222 op2 = expand_normal (arg0);
35223 if (!register_operand (op2, SImode))
35224 op2 = copy_to_mode_reg (SImode, op2);
35226 insn = (TARGET_64BIT
35227 ? gen_rdpmc_rex64 (op0, op1, op2)
35228 : gen_rdpmc (op0, op2));
35229 emit_insn (insn);
35231 else if (fcode == IX86_BUILTIN_RDTSC)
35233 insn = (TARGET_64BIT
35234 ? gen_rdtsc_rex64 (op0, op1)
35235 : gen_rdtsc (op0));
35236 emit_insn (insn);
35238 else
35240 op2 = gen_reg_rtx (SImode);
35242 insn = (TARGET_64BIT
35243 ? gen_rdtscp_rex64 (op0, op1, op2)
35244 : gen_rdtscp (op0, op2));
35245 emit_insn (insn);
35247 arg0 = CALL_EXPR_ARG (exp, 0);
35248 op4 = expand_normal (arg0);
35249 if (!address_operand (op4, VOIDmode))
35251 op4 = convert_memory_address (Pmode, op4);
35252 op4 = copy_addr_to_reg (op4);
35254 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35257 if (target == 0)
35259 /* mode is VOIDmode if __builtin_rd* has been called
35260 without lhs. */
35261 if (mode == VOIDmode)
35262 return target;
35263 target = gen_reg_rtx (mode);
35266 if (TARGET_64BIT)
35268 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35269 op1, 1, OPTAB_DIRECT);
35270 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35271 op0, 1, OPTAB_DIRECT);
35274 emit_move_insn (target, op0);
35275 return target;
35277 case IX86_BUILTIN_FXSAVE:
35278 case IX86_BUILTIN_FXRSTOR:
35279 case IX86_BUILTIN_FXSAVE64:
35280 case IX86_BUILTIN_FXRSTOR64:
35281 case IX86_BUILTIN_FNSTENV:
35282 case IX86_BUILTIN_FLDENV:
35283 case IX86_BUILTIN_FNSTSW:
35284 mode0 = BLKmode;
35285 switch (fcode)
35287 case IX86_BUILTIN_FXSAVE:
35288 icode = CODE_FOR_fxsave;
35289 break;
35290 case IX86_BUILTIN_FXRSTOR:
35291 icode = CODE_FOR_fxrstor;
35292 break;
35293 case IX86_BUILTIN_FXSAVE64:
35294 icode = CODE_FOR_fxsave64;
35295 break;
35296 case IX86_BUILTIN_FXRSTOR64:
35297 icode = CODE_FOR_fxrstor64;
35298 break;
35299 case IX86_BUILTIN_FNSTENV:
35300 icode = CODE_FOR_fnstenv;
35301 break;
35302 case IX86_BUILTIN_FLDENV:
35303 icode = CODE_FOR_fldenv;
35304 break;
35305 case IX86_BUILTIN_FNSTSW:
35306 icode = CODE_FOR_fnstsw;
35307 mode0 = HImode;
35308 break;
35309 default:
35310 gcc_unreachable ();
35313 arg0 = CALL_EXPR_ARG (exp, 0);
35314 op0 = expand_normal (arg0);
35316 if (!address_operand (op0, VOIDmode))
35318 op0 = convert_memory_address (Pmode, op0);
35319 op0 = copy_addr_to_reg (op0);
35321 op0 = gen_rtx_MEM (mode0, op0);
35323 pat = GEN_FCN (icode) (op0);
35324 if (pat)
35325 emit_insn (pat);
35326 return 0;
35328 case IX86_BUILTIN_XSAVE:
35329 case IX86_BUILTIN_XRSTOR:
35330 case IX86_BUILTIN_XSAVE64:
35331 case IX86_BUILTIN_XRSTOR64:
35332 case IX86_BUILTIN_XSAVEOPT:
35333 case IX86_BUILTIN_XSAVEOPT64:
35334 case IX86_BUILTIN_XSAVES:
35335 case IX86_BUILTIN_XRSTORS:
35336 case IX86_BUILTIN_XSAVES64:
35337 case IX86_BUILTIN_XRSTORS64:
35338 case IX86_BUILTIN_XSAVEC:
35339 case IX86_BUILTIN_XSAVEC64:
35340 arg0 = CALL_EXPR_ARG (exp, 0);
35341 arg1 = CALL_EXPR_ARG (exp, 1);
35342 op0 = expand_normal (arg0);
35343 op1 = expand_normal (arg1);
35345 if (!address_operand (op0, VOIDmode))
35347 op0 = convert_memory_address (Pmode, op0);
35348 op0 = copy_addr_to_reg (op0);
35350 op0 = gen_rtx_MEM (BLKmode, op0);
35352 op1 = force_reg (DImode, op1);
35354 if (TARGET_64BIT)
35356 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35357 NULL, 1, OPTAB_DIRECT);
35358 switch (fcode)
35360 case IX86_BUILTIN_XSAVE:
35361 icode = CODE_FOR_xsave_rex64;
35362 break;
35363 case IX86_BUILTIN_XRSTOR:
35364 icode = CODE_FOR_xrstor_rex64;
35365 break;
35366 case IX86_BUILTIN_XSAVE64:
35367 icode = CODE_FOR_xsave64;
35368 break;
35369 case IX86_BUILTIN_XRSTOR64:
35370 icode = CODE_FOR_xrstor64;
35371 break;
35372 case IX86_BUILTIN_XSAVEOPT:
35373 icode = CODE_FOR_xsaveopt_rex64;
35374 break;
35375 case IX86_BUILTIN_XSAVEOPT64:
35376 icode = CODE_FOR_xsaveopt64;
35377 break;
35378 case IX86_BUILTIN_XSAVES:
35379 icode = CODE_FOR_xsaves_rex64;
35380 break;
35381 case IX86_BUILTIN_XRSTORS:
35382 icode = CODE_FOR_xrstors_rex64;
35383 break;
35384 case IX86_BUILTIN_XSAVES64:
35385 icode = CODE_FOR_xsaves64;
35386 break;
35387 case IX86_BUILTIN_XRSTORS64:
35388 icode = CODE_FOR_xrstors64;
35389 break;
35390 case IX86_BUILTIN_XSAVEC:
35391 icode = CODE_FOR_xsavec_rex64;
35392 break;
35393 case IX86_BUILTIN_XSAVEC64:
35394 icode = CODE_FOR_xsavec64;
35395 break;
35396 default:
35397 gcc_unreachable ();
35400 op2 = gen_lowpart (SImode, op2);
35401 op1 = gen_lowpart (SImode, op1);
35402 pat = GEN_FCN (icode) (op0, op1, op2);
35404 else
35406 switch (fcode)
35408 case IX86_BUILTIN_XSAVE:
35409 icode = CODE_FOR_xsave;
35410 break;
35411 case IX86_BUILTIN_XRSTOR:
35412 icode = CODE_FOR_xrstor;
35413 break;
35414 case IX86_BUILTIN_XSAVEOPT:
35415 icode = CODE_FOR_xsaveopt;
35416 break;
35417 case IX86_BUILTIN_XSAVES:
35418 icode = CODE_FOR_xsaves;
35419 break;
35420 case IX86_BUILTIN_XRSTORS:
35421 icode = CODE_FOR_xrstors;
35422 break;
35423 case IX86_BUILTIN_XSAVEC:
35424 icode = CODE_FOR_xsavec;
35425 break;
35426 default:
35427 gcc_unreachable ();
35429 pat = GEN_FCN (icode) (op0, op1);
35432 if (pat)
35433 emit_insn (pat);
35434 return 0;
35436 case IX86_BUILTIN_LLWPCB:
35437 arg0 = CALL_EXPR_ARG (exp, 0);
35438 op0 = expand_normal (arg0);
35439 icode = CODE_FOR_lwp_llwpcb;
35440 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35441 op0 = ix86_zero_extend_to_Pmode (op0);
35442 emit_insn (gen_lwp_llwpcb (op0));
35443 return 0;
35445 case IX86_BUILTIN_SLWPCB:
35446 icode = CODE_FOR_lwp_slwpcb;
35447 if (!target
35448 || !insn_data[icode].operand[0].predicate (target, Pmode))
35449 target = gen_reg_rtx (Pmode);
35450 emit_insn (gen_lwp_slwpcb (target));
35451 return target;
35453 case IX86_BUILTIN_BEXTRI32:
35454 case IX86_BUILTIN_BEXTRI64:
35455 arg0 = CALL_EXPR_ARG (exp, 0);
35456 arg1 = CALL_EXPR_ARG (exp, 1);
35457 op0 = expand_normal (arg0);
35458 op1 = expand_normal (arg1);
35459 icode = (fcode == IX86_BUILTIN_BEXTRI32
35460 ? CODE_FOR_tbm_bextri_si
35461 : CODE_FOR_tbm_bextri_di);
35462 if (!CONST_INT_P (op1))
35464 error ("last argument must be an immediate");
35465 return const0_rtx;
35467 else
35469 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35470 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35471 op1 = GEN_INT (length);
35472 op2 = GEN_INT (lsb_index);
35473 pat = GEN_FCN (icode) (target, op0, op1, op2);
35474 if (pat)
35475 emit_insn (pat);
35476 return target;
35479 case IX86_BUILTIN_RDRAND16_STEP:
35480 icode = CODE_FOR_rdrandhi_1;
35481 mode0 = HImode;
35482 goto rdrand_step;
35484 case IX86_BUILTIN_RDRAND32_STEP:
35485 icode = CODE_FOR_rdrandsi_1;
35486 mode0 = SImode;
35487 goto rdrand_step;
35489 case IX86_BUILTIN_RDRAND64_STEP:
35490 icode = CODE_FOR_rdranddi_1;
35491 mode0 = DImode;
35493 rdrand_step:
35494 op0 = gen_reg_rtx (mode0);
35495 emit_insn (GEN_FCN (icode) (op0));
35497 arg0 = CALL_EXPR_ARG (exp, 0);
35498 op1 = expand_normal (arg0);
35499 if (!address_operand (op1, VOIDmode))
35501 op1 = convert_memory_address (Pmode, op1);
35502 op1 = copy_addr_to_reg (op1);
35504 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35506 op1 = gen_reg_rtx (SImode);
35507 emit_move_insn (op1, CONST1_RTX (SImode));
35509 /* Emit SImode conditional move. */
35510 if (mode0 == HImode)
35512 op2 = gen_reg_rtx (SImode);
35513 emit_insn (gen_zero_extendhisi2 (op2, op0));
35515 else if (mode0 == SImode)
35516 op2 = op0;
35517 else
35518 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35520 if (target == 0
35521 || !register_operand (target, SImode))
35522 target = gen_reg_rtx (SImode);
35524 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35525 const0_rtx);
35526 emit_insn (gen_rtx_SET (VOIDmode, target,
35527 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35528 return target;
35530 case IX86_BUILTIN_RDSEED16_STEP:
35531 icode = CODE_FOR_rdseedhi_1;
35532 mode0 = HImode;
35533 goto rdseed_step;
35535 case IX86_BUILTIN_RDSEED32_STEP:
35536 icode = CODE_FOR_rdseedsi_1;
35537 mode0 = SImode;
35538 goto rdseed_step;
35540 case IX86_BUILTIN_RDSEED64_STEP:
35541 icode = CODE_FOR_rdseeddi_1;
35542 mode0 = DImode;
35544 rdseed_step:
35545 op0 = gen_reg_rtx (mode0);
35546 emit_insn (GEN_FCN (icode) (op0));
35548 arg0 = CALL_EXPR_ARG (exp, 0);
35549 op1 = expand_normal (arg0);
35550 if (!address_operand (op1, VOIDmode))
35552 op1 = convert_memory_address (Pmode, op1);
35553 op1 = copy_addr_to_reg (op1);
35555 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35557 op2 = gen_reg_rtx (QImode);
35559 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35560 const0_rtx);
35561 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35563 if (target == 0
35564 || !register_operand (target, SImode))
35565 target = gen_reg_rtx (SImode);
35567 emit_insn (gen_zero_extendqisi2 (target, op2));
35568 return target;
35570 case IX86_BUILTIN_ADDCARRYX32:
35571 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35572 mode0 = SImode;
35573 goto addcarryx;
35575 case IX86_BUILTIN_ADDCARRYX64:
35576 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35577 mode0 = DImode;
35579 addcarryx:
35580 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35581 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35582 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35583 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35585 op0 = gen_reg_rtx (QImode);
35587 /* Generate CF from input operand. */
35588 op1 = expand_normal (arg0);
35589 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35590 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35592 /* Gen ADCX instruction to compute X+Y+CF. */
35593 op2 = expand_normal (arg1);
35594 op3 = expand_normal (arg2);
35596 if (!REG_P (op2))
35597 op2 = copy_to_mode_reg (mode0, op2);
35598 if (!REG_P (op3))
35599 op3 = copy_to_mode_reg (mode0, op3);
35601 op0 = gen_reg_rtx (mode0);
35603 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35604 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35605 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35607 /* Store the result. */
35608 op4 = expand_normal (arg3);
35609 if (!address_operand (op4, VOIDmode))
35611 op4 = convert_memory_address (Pmode, op4);
35612 op4 = copy_addr_to_reg (op4);
35614 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35616 /* Return current CF value. */
35617 if (target == 0)
35618 target = gen_reg_rtx (QImode);
35620 PUT_MODE (pat, QImode);
35621 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35622 return target;
35624 case IX86_BUILTIN_READ_FLAGS:
35625 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35627 if (optimize
35628 || target == NULL_RTX
35629 || !nonimmediate_operand (target, word_mode)
35630 || GET_MODE (target) != word_mode)
35631 target = gen_reg_rtx (word_mode);
35633 emit_insn (gen_pop (target));
35634 return target;
35636 case IX86_BUILTIN_WRITE_FLAGS:
35638 arg0 = CALL_EXPR_ARG (exp, 0);
35639 op0 = expand_normal (arg0);
35640 if (!general_no_elim_operand (op0, word_mode))
35641 op0 = copy_to_mode_reg (word_mode, op0);
35643 emit_insn (gen_push (op0));
35644 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35645 return 0;
35647 case IX86_BUILTIN_KORTESTC16:
35648 icode = CODE_FOR_kortestchi;
35649 mode0 = HImode;
35650 mode1 = CCCmode;
35651 goto kortest;
35653 case IX86_BUILTIN_KORTESTZ16:
35654 icode = CODE_FOR_kortestzhi;
35655 mode0 = HImode;
35656 mode1 = CCZmode;
35658 kortest:
35659 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35660 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35661 op0 = expand_normal (arg0);
35662 op1 = expand_normal (arg1);
35664 op0 = copy_to_reg (op0);
35665 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35666 op1 = copy_to_reg (op1);
35667 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35669 target = gen_reg_rtx (QImode);
35670 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35672 /* Emit kortest. */
35673 emit_insn (GEN_FCN (icode) (op0, op1));
35674 /* And use setcc to return result from flags. */
35675 ix86_expand_setcc (target, EQ,
35676 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35677 return target;
35679 case IX86_BUILTIN_GATHERSIV2DF:
35680 icode = CODE_FOR_avx2_gathersiv2df;
35681 goto gather_gen;
35682 case IX86_BUILTIN_GATHERSIV4DF:
35683 icode = CODE_FOR_avx2_gathersiv4df;
35684 goto gather_gen;
35685 case IX86_BUILTIN_GATHERDIV2DF:
35686 icode = CODE_FOR_avx2_gatherdiv2df;
35687 goto gather_gen;
35688 case IX86_BUILTIN_GATHERDIV4DF:
35689 icode = CODE_FOR_avx2_gatherdiv4df;
35690 goto gather_gen;
35691 case IX86_BUILTIN_GATHERSIV4SF:
35692 icode = CODE_FOR_avx2_gathersiv4sf;
35693 goto gather_gen;
35694 case IX86_BUILTIN_GATHERSIV8SF:
35695 icode = CODE_FOR_avx2_gathersiv8sf;
35696 goto gather_gen;
35697 case IX86_BUILTIN_GATHERDIV4SF:
35698 icode = CODE_FOR_avx2_gatherdiv4sf;
35699 goto gather_gen;
35700 case IX86_BUILTIN_GATHERDIV8SF:
35701 icode = CODE_FOR_avx2_gatherdiv8sf;
35702 goto gather_gen;
35703 case IX86_BUILTIN_GATHERSIV2DI:
35704 icode = CODE_FOR_avx2_gathersiv2di;
35705 goto gather_gen;
35706 case IX86_BUILTIN_GATHERSIV4DI:
35707 icode = CODE_FOR_avx2_gathersiv4di;
35708 goto gather_gen;
35709 case IX86_BUILTIN_GATHERDIV2DI:
35710 icode = CODE_FOR_avx2_gatherdiv2di;
35711 goto gather_gen;
35712 case IX86_BUILTIN_GATHERDIV4DI:
35713 icode = CODE_FOR_avx2_gatherdiv4di;
35714 goto gather_gen;
35715 case IX86_BUILTIN_GATHERSIV4SI:
35716 icode = CODE_FOR_avx2_gathersiv4si;
35717 goto gather_gen;
35718 case IX86_BUILTIN_GATHERSIV8SI:
35719 icode = CODE_FOR_avx2_gathersiv8si;
35720 goto gather_gen;
35721 case IX86_BUILTIN_GATHERDIV4SI:
35722 icode = CODE_FOR_avx2_gatherdiv4si;
35723 goto gather_gen;
35724 case IX86_BUILTIN_GATHERDIV8SI:
35725 icode = CODE_FOR_avx2_gatherdiv8si;
35726 goto gather_gen;
35727 case IX86_BUILTIN_GATHERALTSIV4DF:
35728 icode = CODE_FOR_avx2_gathersiv4df;
35729 goto gather_gen;
35730 case IX86_BUILTIN_GATHERALTDIV8SF:
35731 icode = CODE_FOR_avx2_gatherdiv8sf;
35732 goto gather_gen;
35733 case IX86_BUILTIN_GATHERALTSIV4DI:
35734 icode = CODE_FOR_avx2_gathersiv4di;
35735 goto gather_gen;
35736 case IX86_BUILTIN_GATHERALTDIV8SI:
35737 icode = CODE_FOR_avx2_gatherdiv8si;
35738 goto gather_gen;
35739 case IX86_BUILTIN_GATHER3SIV16SF:
35740 icode = CODE_FOR_avx512f_gathersiv16sf;
35741 goto gather_gen;
35742 case IX86_BUILTIN_GATHER3SIV8DF:
35743 icode = CODE_FOR_avx512f_gathersiv8df;
35744 goto gather_gen;
35745 case IX86_BUILTIN_GATHER3DIV16SF:
35746 icode = CODE_FOR_avx512f_gatherdiv16sf;
35747 goto gather_gen;
35748 case IX86_BUILTIN_GATHER3DIV8DF:
35749 icode = CODE_FOR_avx512f_gatherdiv8df;
35750 goto gather_gen;
35751 case IX86_BUILTIN_GATHER3SIV16SI:
35752 icode = CODE_FOR_avx512f_gathersiv16si;
35753 goto gather_gen;
35754 case IX86_BUILTIN_GATHER3SIV8DI:
35755 icode = CODE_FOR_avx512f_gathersiv8di;
35756 goto gather_gen;
35757 case IX86_BUILTIN_GATHER3DIV16SI:
35758 icode = CODE_FOR_avx512f_gatherdiv16si;
35759 goto gather_gen;
35760 case IX86_BUILTIN_GATHER3DIV8DI:
35761 icode = CODE_FOR_avx512f_gatherdiv8di;
35762 goto gather_gen;
35763 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35764 icode = CODE_FOR_avx512f_gathersiv8df;
35765 goto gather_gen;
35766 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35767 icode = CODE_FOR_avx512f_gatherdiv16sf;
35768 goto gather_gen;
35769 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35770 icode = CODE_FOR_avx512f_gathersiv8di;
35771 goto gather_gen;
35772 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35773 icode = CODE_FOR_avx512f_gatherdiv16si;
35774 goto gather_gen;
35775 case IX86_BUILTIN_SCATTERSIV16SF:
35776 icode = CODE_FOR_avx512f_scattersiv16sf;
35777 goto scatter_gen;
35778 case IX86_BUILTIN_SCATTERSIV8DF:
35779 icode = CODE_FOR_avx512f_scattersiv8df;
35780 goto scatter_gen;
35781 case IX86_BUILTIN_SCATTERDIV16SF:
35782 icode = CODE_FOR_avx512f_scatterdiv16sf;
35783 goto scatter_gen;
35784 case IX86_BUILTIN_SCATTERDIV8DF:
35785 icode = CODE_FOR_avx512f_scatterdiv8df;
35786 goto scatter_gen;
35787 case IX86_BUILTIN_SCATTERSIV16SI:
35788 icode = CODE_FOR_avx512f_scattersiv16si;
35789 goto scatter_gen;
35790 case IX86_BUILTIN_SCATTERSIV8DI:
35791 icode = CODE_FOR_avx512f_scattersiv8di;
35792 goto scatter_gen;
35793 case IX86_BUILTIN_SCATTERDIV16SI:
35794 icode = CODE_FOR_avx512f_scatterdiv16si;
35795 goto scatter_gen;
35796 case IX86_BUILTIN_SCATTERDIV8DI:
35797 icode = CODE_FOR_avx512f_scatterdiv8di;
35798 goto scatter_gen;
35800 case IX86_BUILTIN_GATHERPFDPD:
35801 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35802 goto vec_prefetch_gen;
35803 case IX86_BUILTIN_GATHERPFDPS:
35804 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35805 goto vec_prefetch_gen;
35806 case IX86_BUILTIN_GATHERPFQPD:
35807 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35808 goto vec_prefetch_gen;
35809 case IX86_BUILTIN_GATHERPFQPS:
35810 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35811 goto vec_prefetch_gen;
35812 case IX86_BUILTIN_SCATTERPFDPD:
35813 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35814 goto vec_prefetch_gen;
35815 case IX86_BUILTIN_SCATTERPFDPS:
35816 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35817 goto vec_prefetch_gen;
35818 case IX86_BUILTIN_SCATTERPFQPD:
35819 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35820 goto vec_prefetch_gen;
35821 case IX86_BUILTIN_SCATTERPFQPS:
35822 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35823 goto vec_prefetch_gen;
35825 gather_gen:
35826 rtx half;
35827 rtx (*gen) (rtx, rtx);
35829 arg0 = CALL_EXPR_ARG (exp, 0);
35830 arg1 = CALL_EXPR_ARG (exp, 1);
35831 arg2 = CALL_EXPR_ARG (exp, 2);
35832 arg3 = CALL_EXPR_ARG (exp, 3);
35833 arg4 = CALL_EXPR_ARG (exp, 4);
35834 op0 = expand_normal (arg0);
35835 op1 = expand_normal (arg1);
35836 op2 = expand_normal (arg2);
35837 op3 = expand_normal (arg3);
35838 op4 = expand_normal (arg4);
35839 /* Note the arg order is different from the operand order. */
35840 mode0 = insn_data[icode].operand[1].mode;
35841 mode2 = insn_data[icode].operand[3].mode;
35842 mode3 = insn_data[icode].operand[4].mode;
35843 mode4 = insn_data[icode].operand[5].mode;
35845 if (target == NULL_RTX
35846 || GET_MODE (target) != insn_data[icode].operand[0].mode
35847 || !insn_data[icode].operand[0].predicate (target,
35848 GET_MODE (target)))
35849 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35850 else
35851 subtarget = target;
35853 switch (fcode)
35855 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35856 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35857 half = gen_reg_rtx (V8SImode);
35858 if (!nonimmediate_operand (op2, V16SImode))
35859 op2 = copy_to_mode_reg (V16SImode, op2);
35860 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35861 op2 = half;
35862 break;
35863 case IX86_BUILTIN_GATHERALTSIV4DF:
35864 case IX86_BUILTIN_GATHERALTSIV4DI:
35865 half = gen_reg_rtx (V4SImode);
35866 if (!nonimmediate_operand (op2, V8SImode))
35867 op2 = copy_to_mode_reg (V8SImode, op2);
35868 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35869 op2 = half;
35870 break;
35871 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35872 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35873 half = gen_reg_rtx (mode0);
35874 if (mode0 == V8SFmode)
35875 gen = gen_vec_extract_lo_v16sf;
35876 else
35877 gen = gen_vec_extract_lo_v16si;
35878 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35879 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35880 emit_insn (gen (half, op0));
35881 op0 = half;
35882 if (GET_MODE (op3) != VOIDmode)
35884 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35885 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35886 emit_insn (gen (half, op3));
35887 op3 = half;
35889 break;
35890 case IX86_BUILTIN_GATHERALTDIV8SF:
35891 case IX86_BUILTIN_GATHERALTDIV8SI:
35892 half = gen_reg_rtx (mode0);
35893 if (mode0 == V4SFmode)
35894 gen = gen_vec_extract_lo_v8sf;
35895 else
35896 gen = gen_vec_extract_lo_v8si;
35897 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35898 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35899 emit_insn (gen (half, op0));
35900 op0 = half;
35901 if (GET_MODE (op3) != VOIDmode)
35903 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35904 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35905 emit_insn (gen (half, op3));
35906 op3 = half;
35908 break;
35909 default:
35910 break;
35913 /* Force memory operand only with base register here. But we
35914 don't want to do it on memory operand for other builtin
35915 functions. */
35916 op1 = ix86_zero_extend_to_Pmode (op1);
35918 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35919 op0 = copy_to_mode_reg (mode0, op0);
35920 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35921 op1 = copy_to_mode_reg (Pmode, op1);
35922 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35923 op2 = copy_to_mode_reg (mode2, op2);
35924 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35926 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35927 op3 = copy_to_mode_reg (mode3, op3);
35929 else
35931 op3 = copy_to_reg (op3);
35932 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35934 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35936 error ("the last argument must be scale 1, 2, 4, 8");
35937 return const0_rtx;
35940 /* Optimize. If mask is known to have all high bits set,
35941 replace op0 with pc_rtx to signal that the instruction
35942 overwrites the whole destination and doesn't use its
35943 previous contents. */
35944 if (optimize)
35946 if (TREE_CODE (arg3) == INTEGER_CST)
35948 if (integer_all_onesp (arg3))
35949 op0 = pc_rtx;
35951 else if (TREE_CODE (arg3) == VECTOR_CST)
35953 unsigned int negative = 0;
35954 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35956 tree cst = VECTOR_CST_ELT (arg3, i);
35957 if (TREE_CODE (cst) == INTEGER_CST
35958 && tree_int_cst_sign_bit (cst))
35959 negative++;
35960 else if (TREE_CODE (cst) == REAL_CST
35961 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35962 negative++;
35964 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35965 op0 = pc_rtx;
35967 else if (TREE_CODE (arg3) == SSA_NAME
35968 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35970 /* Recognize also when mask is like:
35971 __v2df src = _mm_setzero_pd ();
35972 __v2df mask = _mm_cmpeq_pd (src, src);
35974 __v8sf src = _mm256_setzero_ps ();
35975 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35976 as that is a cheaper way to load all ones into
35977 a register than having to load a constant from
35978 memory. */
35979 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35980 if (is_gimple_call (def_stmt))
35982 tree fndecl = gimple_call_fndecl (def_stmt);
35983 if (fndecl
35984 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35985 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35987 case IX86_BUILTIN_CMPPD:
35988 case IX86_BUILTIN_CMPPS:
35989 case IX86_BUILTIN_CMPPD256:
35990 case IX86_BUILTIN_CMPPS256:
35991 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35992 break;
35993 /* FALLTHRU */
35994 case IX86_BUILTIN_CMPEQPD:
35995 case IX86_BUILTIN_CMPEQPS:
35996 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35997 && initializer_zerop (gimple_call_arg (def_stmt,
35998 1)))
35999 op0 = pc_rtx;
36000 break;
36001 default:
36002 break;
36008 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36009 if (! pat)
36010 return const0_rtx;
36011 emit_insn (pat);
36013 switch (fcode)
36015 case IX86_BUILTIN_GATHER3DIV16SF:
36016 if (target == NULL_RTX)
36017 target = gen_reg_rtx (V8SFmode);
36018 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36019 break;
36020 case IX86_BUILTIN_GATHER3DIV16SI:
36021 if (target == NULL_RTX)
36022 target = gen_reg_rtx (V8SImode);
36023 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36024 break;
36025 case IX86_BUILTIN_GATHERDIV8SF:
36026 if (target == NULL_RTX)
36027 target = gen_reg_rtx (V4SFmode);
36028 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36029 break;
36030 case IX86_BUILTIN_GATHERDIV8SI:
36031 if (target == NULL_RTX)
36032 target = gen_reg_rtx (V4SImode);
36033 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36034 break;
36035 default:
36036 target = subtarget;
36037 break;
36039 return target;
36041 scatter_gen:
36042 arg0 = CALL_EXPR_ARG (exp, 0);
36043 arg1 = CALL_EXPR_ARG (exp, 1);
36044 arg2 = CALL_EXPR_ARG (exp, 2);
36045 arg3 = CALL_EXPR_ARG (exp, 3);
36046 arg4 = CALL_EXPR_ARG (exp, 4);
36047 op0 = expand_normal (arg0);
36048 op1 = expand_normal (arg1);
36049 op2 = expand_normal (arg2);
36050 op3 = expand_normal (arg3);
36051 op4 = expand_normal (arg4);
36052 mode1 = insn_data[icode].operand[1].mode;
36053 mode2 = insn_data[icode].operand[2].mode;
36054 mode3 = insn_data[icode].operand[3].mode;
36055 mode4 = insn_data[icode].operand[4].mode;
36057 /* Force memory operand only with base register here. But we
36058 don't want to do it on memory operand for other builtin
36059 functions. */
36060 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36062 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36063 op0 = copy_to_mode_reg (Pmode, op0);
36065 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36067 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36068 op1 = copy_to_mode_reg (mode1, op1);
36070 else
36072 op1 = copy_to_reg (op1);
36073 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36076 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36077 op2 = copy_to_mode_reg (mode2, op2);
36079 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36080 op3 = copy_to_mode_reg (mode3, op3);
36082 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36084 error ("the last argument must be scale 1, 2, 4, 8");
36085 return const0_rtx;
36088 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36089 if (! pat)
36090 return const0_rtx;
36092 emit_insn (pat);
36093 return 0;
36095 vec_prefetch_gen:
36096 arg0 = CALL_EXPR_ARG (exp, 0);
36097 arg1 = CALL_EXPR_ARG (exp, 1);
36098 arg2 = CALL_EXPR_ARG (exp, 2);
36099 arg3 = CALL_EXPR_ARG (exp, 3);
36100 arg4 = CALL_EXPR_ARG (exp, 4);
36101 op0 = expand_normal (arg0);
36102 op1 = expand_normal (arg1);
36103 op2 = expand_normal (arg2);
36104 op3 = expand_normal (arg3);
36105 op4 = expand_normal (arg4);
36106 mode0 = insn_data[icode].operand[0].mode;
36107 mode1 = insn_data[icode].operand[1].mode;
36108 mode3 = insn_data[icode].operand[3].mode;
36109 mode4 = insn_data[icode].operand[4].mode;
36111 if (GET_MODE (op0) == mode0
36112 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36114 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36115 op0 = copy_to_mode_reg (mode0, op0);
36117 else if (op0 != constm1_rtx)
36119 op0 = copy_to_reg (op0);
36120 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36123 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36124 op1 = copy_to_mode_reg (mode1, op1);
36126 /* Force memory operand only with base register here. But we
36127 don't want to do it on memory operand for other builtin
36128 functions. */
36129 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36131 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36132 op2 = copy_to_mode_reg (Pmode, op2);
36134 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36136 error ("the forth argument must be scale 1, 2, 4, 8");
36137 return const0_rtx;
36140 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36142 error ("incorrect hint operand");
36143 return const0_rtx;
36146 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36147 if (! pat)
36148 return const0_rtx;
36150 emit_insn (pat);
36152 return 0;
36154 case IX86_BUILTIN_XABORT:
36155 icode = CODE_FOR_xabort;
36156 arg0 = CALL_EXPR_ARG (exp, 0);
36157 op0 = expand_normal (arg0);
36158 mode0 = insn_data[icode].operand[0].mode;
36159 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36161 error ("the xabort's argument must be an 8-bit immediate");
36162 return const0_rtx;
36164 emit_insn (gen_xabort (op0));
36165 return 0;
36167 default:
36168 break;
36171 for (i = 0, d = bdesc_special_args;
36172 i < ARRAY_SIZE (bdesc_special_args);
36173 i++, d++)
36174 if (d->code == fcode)
36175 return ix86_expand_special_args_builtin (d, exp, target);
36177 for (i = 0, d = bdesc_args;
36178 i < ARRAY_SIZE (bdesc_args);
36179 i++, d++)
36180 if (d->code == fcode)
36181 switch (fcode)
36183 case IX86_BUILTIN_FABSQ:
36184 case IX86_BUILTIN_COPYSIGNQ:
36185 if (!TARGET_SSE)
36186 /* Emit a normal call if SSE isn't available. */
36187 return expand_call (exp, target, ignore);
36188 default:
36189 return ix86_expand_args_builtin (d, exp, target);
36192 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36193 if (d->code == fcode)
36194 return ix86_expand_sse_comi (d, exp, target);
36196 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36197 if (d->code == fcode)
36198 return ix86_expand_round_builtin (d, exp, target);
36200 for (i = 0, d = bdesc_pcmpestr;
36201 i < ARRAY_SIZE (bdesc_pcmpestr);
36202 i++, d++)
36203 if (d->code == fcode)
36204 return ix86_expand_sse_pcmpestr (d, exp, target);
36206 for (i = 0, d = bdesc_pcmpistr;
36207 i < ARRAY_SIZE (bdesc_pcmpistr);
36208 i++, d++)
36209 if (d->code == fcode)
36210 return ix86_expand_sse_pcmpistr (d, exp, target);
36212 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36213 if (d->code == fcode)
36214 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36215 (enum ix86_builtin_func_type)
36216 d->flag, d->comparison);
36218 gcc_unreachable ();
36221 /* This returns the target-specific builtin with code CODE if
36222 current_function_decl has visibility on this builtin, which is checked
36223 using isa flags. Returns NULL_TREE otherwise. */
36225 static tree ix86_get_builtin (enum ix86_builtins code)
36227 struct cl_target_option *opts;
36228 tree target_tree = NULL_TREE;
36230 /* Determine the isa flags of current_function_decl. */
36232 if (current_function_decl)
36233 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36235 if (target_tree == NULL)
36236 target_tree = target_option_default_node;
36238 opts = TREE_TARGET_OPTION (target_tree);
36240 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36241 return ix86_builtin_decl (code, true);
36242 else
36243 return NULL_TREE;
36246 /* Returns a function decl for a vectorized version of the builtin function
36247 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36248 if it is not available. */
36250 static tree
36251 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36252 tree type_in)
36254 enum machine_mode in_mode, out_mode;
36255 int in_n, out_n;
36256 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36258 if (TREE_CODE (type_out) != VECTOR_TYPE
36259 || TREE_CODE (type_in) != VECTOR_TYPE
36260 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36261 return NULL_TREE;
36263 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36264 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36265 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36266 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36268 switch (fn)
36270 case BUILT_IN_SQRT:
36271 if (out_mode == DFmode && in_mode == DFmode)
36273 if (out_n == 2 && in_n == 2)
36274 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36275 else if (out_n == 4 && in_n == 4)
36276 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36277 else if (out_n == 8 && in_n == 8)
36278 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36280 break;
36282 case BUILT_IN_EXP2F:
36283 if (out_mode == SFmode && in_mode == SFmode)
36285 if (out_n == 16 && in_n == 16)
36286 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36288 break;
36290 case BUILT_IN_SQRTF:
36291 if (out_mode == SFmode && in_mode == SFmode)
36293 if (out_n == 4 && in_n == 4)
36294 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36295 else if (out_n == 8 && in_n == 8)
36296 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36297 else if (out_n == 16 && in_n == 16)
36298 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36300 break;
36302 case BUILT_IN_IFLOOR:
36303 case BUILT_IN_LFLOOR:
36304 case BUILT_IN_LLFLOOR:
36305 /* The round insn does not trap on denormals. */
36306 if (flag_trapping_math || !TARGET_ROUND)
36307 break;
36309 if (out_mode == SImode && in_mode == DFmode)
36311 if (out_n == 4 && in_n == 2)
36312 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36313 else if (out_n == 8 && in_n == 4)
36314 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36315 else if (out_n == 16 && in_n == 8)
36316 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36318 break;
36320 case BUILT_IN_IFLOORF:
36321 case BUILT_IN_LFLOORF:
36322 case BUILT_IN_LLFLOORF:
36323 /* The round insn does not trap on denormals. */
36324 if (flag_trapping_math || !TARGET_ROUND)
36325 break;
36327 if (out_mode == SImode && in_mode == SFmode)
36329 if (out_n == 4 && in_n == 4)
36330 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36331 else if (out_n == 8 && in_n == 8)
36332 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36334 break;
36336 case BUILT_IN_ICEIL:
36337 case BUILT_IN_LCEIL:
36338 case BUILT_IN_LLCEIL:
36339 /* The round insn does not trap on denormals. */
36340 if (flag_trapping_math || !TARGET_ROUND)
36341 break;
36343 if (out_mode == SImode && in_mode == DFmode)
36345 if (out_n == 4 && in_n == 2)
36346 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36347 else if (out_n == 8 && in_n == 4)
36348 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36349 else if (out_n == 16 && in_n == 8)
36350 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36352 break;
36354 case BUILT_IN_ICEILF:
36355 case BUILT_IN_LCEILF:
36356 case BUILT_IN_LLCEILF:
36357 /* The round insn does not trap on denormals. */
36358 if (flag_trapping_math || !TARGET_ROUND)
36359 break;
36361 if (out_mode == SImode && in_mode == SFmode)
36363 if (out_n == 4 && in_n == 4)
36364 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36365 else if (out_n == 8 && in_n == 8)
36366 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36368 break;
36370 case BUILT_IN_IRINT:
36371 case BUILT_IN_LRINT:
36372 case BUILT_IN_LLRINT:
36373 if (out_mode == SImode && in_mode == DFmode)
36375 if (out_n == 4 && in_n == 2)
36376 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36377 else if (out_n == 8 && in_n == 4)
36378 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36380 break;
36382 case BUILT_IN_IRINTF:
36383 case BUILT_IN_LRINTF:
36384 case BUILT_IN_LLRINTF:
36385 if (out_mode == SImode && in_mode == SFmode)
36387 if (out_n == 4 && in_n == 4)
36388 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36389 else if (out_n == 8 && in_n == 8)
36390 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36392 break;
36394 case BUILT_IN_IROUND:
36395 case BUILT_IN_LROUND:
36396 case BUILT_IN_LLROUND:
36397 /* The round insn does not trap on denormals. */
36398 if (flag_trapping_math || !TARGET_ROUND)
36399 break;
36401 if (out_mode == SImode && in_mode == DFmode)
36403 if (out_n == 4 && in_n == 2)
36404 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36405 else if (out_n == 8 && in_n == 4)
36406 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36407 else if (out_n == 16 && in_n == 8)
36408 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36410 break;
36412 case BUILT_IN_IROUNDF:
36413 case BUILT_IN_LROUNDF:
36414 case BUILT_IN_LLROUNDF:
36415 /* The round insn does not trap on denormals. */
36416 if (flag_trapping_math || !TARGET_ROUND)
36417 break;
36419 if (out_mode == SImode && in_mode == SFmode)
36421 if (out_n == 4 && in_n == 4)
36422 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36423 else if (out_n == 8 && in_n == 8)
36424 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36426 break;
36428 case BUILT_IN_COPYSIGN:
36429 if (out_mode == DFmode && in_mode == DFmode)
36431 if (out_n == 2 && in_n == 2)
36432 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36433 else if (out_n == 4 && in_n == 4)
36434 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36435 else if (out_n == 8 && in_n == 8)
36436 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36438 break;
36440 case BUILT_IN_COPYSIGNF:
36441 if (out_mode == SFmode && in_mode == SFmode)
36443 if (out_n == 4 && in_n == 4)
36444 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36445 else if (out_n == 8 && in_n == 8)
36446 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36447 else if (out_n == 16 && in_n == 16)
36448 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36450 break;
36452 case BUILT_IN_FLOOR:
36453 /* The round insn does not trap on denormals. */
36454 if (flag_trapping_math || !TARGET_ROUND)
36455 break;
36457 if (out_mode == DFmode && in_mode == DFmode)
36459 if (out_n == 2 && in_n == 2)
36460 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36461 else if (out_n == 4 && in_n == 4)
36462 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36464 break;
36466 case BUILT_IN_FLOORF:
36467 /* The round insn does not trap on denormals. */
36468 if (flag_trapping_math || !TARGET_ROUND)
36469 break;
36471 if (out_mode == SFmode && in_mode == SFmode)
36473 if (out_n == 4 && in_n == 4)
36474 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36475 else if (out_n == 8 && in_n == 8)
36476 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36478 break;
36480 case BUILT_IN_CEIL:
36481 /* The round insn does not trap on denormals. */
36482 if (flag_trapping_math || !TARGET_ROUND)
36483 break;
36485 if (out_mode == DFmode && in_mode == DFmode)
36487 if (out_n == 2 && in_n == 2)
36488 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36489 else if (out_n == 4 && in_n == 4)
36490 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36492 break;
36494 case BUILT_IN_CEILF:
36495 /* The round insn does not trap on denormals. */
36496 if (flag_trapping_math || !TARGET_ROUND)
36497 break;
36499 if (out_mode == SFmode && in_mode == SFmode)
36501 if (out_n == 4 && in_n == 4)
36502 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36503 else if (out_n == 8 && in_n == 8)
36504 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36506 break;
36508 case BUILT_IN_TRUNC:
36509 /* The round insn does not trap on denormals. */
36510 if (flag_trapping_math || !TARGET_ROUND)
36511 break;
36513 if (out_mode == DFmode && in_mode == DFmode)
36515 if (out_n == 2 && in_n == 2)
36516 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36517 else if (out_n == 4 && in_n == 4)
36518 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36520 break;
36522 case BUILT_IN_TRUNCF:
36523 /* The round insn does not trap on denormals. */
36524 if (flag_trapping_math || !TARGET_ROUND)
36525 break;
36527 if (out_mode == SFmode && in_mode == SFmode)
36529 if (out_n == 4 && in_n == 4)
36530 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36531 else if (out_n == 8 && in_n == 8)
36532 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36534 break;
36536 case BUILT_IN_RINT:
36537 /* The round insn does not trap on denormals. */
36538 if (flag_trapping_math || !TARGET_ROUND)
36539 break;
36541 if (out_mode == DFmode && in_mode == DFmode)
36543 if (out_n == 2 && in_n == 2)
36544 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36545 else if (out_n == 4 && in_n == 4)
36546 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36548 break;
36550 case BUILT_IN_RINTF:
36551 /* The round insn does not trap on denormals. */
36552 if (flag_trapping_math || !TARGET_ROUND)
36553 break;
36555 if (out_mode == SFmode && in_mode == SFmode)
36557 if (out_n == 4 && in_n == 4)
36558 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36559 else if (out_n == 8 && in_n == 8)
36560 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36562 break;
36564 case BUILT_IN_ROUND:
36565 /* The round insn does not trap on denormals. */
36566 if (flag_trapping_math || !TARGET_ROUND)
36567 break;
36569 if (out_mode == DFmode && in_mode == DFmode)
36571 if (out_n == 2 && in_n == 2)
36572 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36573 else if (out_n == 4 && in_n == 4)
36574 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36576 break;
36578 case BUILT_IN_ROUNDF:
36579 /* The round insn does not trap on denormals. */
36580 if (flag_trapping_math || !TARGET_ROUND)
36581 break;
36583 if (out_mode == SFmode && in_mode == SFmode)
36585 if (out_n == 4 && in_n == 4)
36586 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36587 else if (out_n == 8 && in_n == 8)
36588 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36590 break;
36592 case BUILT_IN_FMA:
36593 if (out_mode == DFmode && in_mode == DFmode)
36595 if (out_n == 2 && in_n == 2)
36596 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36597 if (out_n == 4 && in_n == 4)
36598 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36600 break;
36602 case BUILT_IN_FMAF:
36603 if (out_mode == SFmode && in_mode == SFmode)
36605 if (out_n == 4 && in_n == 4)
36606 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36607 if (out_n == 8 && in_n == 8)
36608 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36610 break;
36612 default:
36613 break;
36616 /* Dispatch to a handler for a vectorization library. */
36617 if (ix86_veclib_handler)
36618 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36619 type_in);
36621 return NULL_TREE;
36624 /* Handler for an SVML-style interface to
36625 a library with vectorized intrinsics. */
36627 static tree
36628 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36630 char name[20];
36631 tree fntype, new_fndecl, args;
36632 unsigned arity;
36633 const char *bname;
36634 enum machine_mode el_mode, in_mode;
36635 int n, in_n;
36637 /* The SVML is suitable for unsafe math only. */
36638 if (!flag_unsafe_math_optimizations)
36639 return NULL_TREE;
36641 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36642 n = TYPE_VECTOR_SUBPARTS (type_out);
36643 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36644 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36645 if (el_mode != in_mode
36646 || n != in_n)
36647 return NULL_TREE;
36649 switch (fn)
36651 case BUILT_IN_EXP:
36652 case BUILT_IN_LOG:
36653 case BUILT_IN_LOG10:
36654 case BUILT_IN_POW:
36655 case BUILT_IN_TANH:
36656 case BUILT_IN_TAN:
36657 case BUILT_IN_ATAN:
36658 case BUILT_IN_ATAN2:
36659 case BUILT_IN_ATANH:
36660 case BUILT_IN_CBRT:
36661 case BUILT_IN_SINH:
36662 case BUILT_IN_SIN:
36663 case BUILT_IN_ASINH:
36664 case BUILT_IN_ASIN:
36665 case BUILT_IN_COSH:
36666 case BUILT_IN_COS:
36667 case BUILT_IN_ACOSH:
36668 case BUILT_IN_ACOS:
36669 if (el_mode != DFmode || n != 2)
36670 return NULL_TREE;
36671 break;
36673 case BUILT_IN_EXPF:
36674 case BUILT_IN_LOGF:
36675 case BUILT_IN_LOG10F:
36676 case BUILT_IN_POWF:
36677 case BUILT_IN_TANHF:
36678 case BUILT_IN_TANF:
36679 case BUILT_IN_ATANF:
36680 case BUILT_IN_ATAN2F:
36681 case BUILT_IN_ATANHF:
36682 case BUILT_IN_CBRTF:
36683 case BUILT_IN_SINHF:
36684 case BUILT_IN_SINF:
36685 case BUILT_IN_ASINHF:
36686 case BUILT_IN_ASINF:
36687 case BUILT_IN_COSHF:
36688 case BUILT_IN_COSF:
36689 case BUILT_IN_ACOSHF:
36690 case BUILT_IN_ACOSF:
36691 if (el_mode != SFmode || n != 4)
36692 return NULL_TREE;
36693 break;
36695 default:
36696 return NULL_TREE;
36699 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36701 if (fn == BUILT_IN_LOGF)
36702 strcpy (name, "vmlsLn4");
36703 else if (fn == BUILT_IN_LOG)
36704 strcpy (name, "vmldLn2");
36705 else if (n == 4)
36707 sprintf (name, "vmls%s", bname+10);
36708 name[strlen (name)-1] = '4';
36710 else
36711 sprintf (name, "vmld%s2", bname+10);
36713 /* Convert to uppercase. */
36714 name[4] &= ~0x20;
36716 arity = 0;
36717 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36718 args;
36719 args = TREE_CHAIN (args))
36720 arity++;
36722 if (arity == 1)
36723 fntype = build_function_type_list (type_out, type_in, NULL);
36724 else
36725 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36727 /* Build a function declaration for the vectorized function. */
36728 new_fndecl = build_decl (BUILTINS_LOCATION,
36729 FUNCTION_DECL, get_identifier (name), fntype);
36730 TREE_PUBLIC (new_fndecl) = 1;
36731 DECL_EXTERNAL (new_fndecl) = 1;
36732 DECL_IS_NOVOPS (new_fndecl) = 1;
36733 TREE_READONLY (new_fndecl) = 1;
36735 return new_fndecl;
36738 /* Handler for an ACML-style interface to
36739 a library with vectorized intrinsics. */
36741 static tree
36742 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36744 char name[20] = "__vr.._";
36745 tree fntype, new_fndecl, args;
36746 unsigned arity;
36747 const char *bname;
36748 enum machine_mode el_mode, in_mode;
36749 int n, in_n;
36751 /* The ACML is 64bits only and suitable for unsafe math only as
36752 it does not correctly support parts of IEEE with the required
36753 precision such as denormals. */
36754 if (!TARGET_64BIT
36755 || !flag_unsafe_math_optimizations)
36756 return NULL_TREE;
36758 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36759 n = TYPE_VECTOR_SUBPARTS (type_out);
36760 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36761 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36762 if (el_mode != in_mode
36763 || n != in_n)
36764 return NULL_TREE;
36766 switch (fn)
36768 case BUILT_IN_SIN:
36769 case BUILT_IN_COS:
36770 case BUILT_IN_EXP:
36771 case BUILT_IN_LOG:
36772 case BUILT_IN_LOG2:
36773 case BUILT_IN_LOG10:
36774 name[4] = 'd';
36775 name[5] = '2';
36776 if (el_mode != DFmode
36777 || n != 2)
36778 return NULL_TREE;
36779 break;
36781 case BUILT_IN_SINF:
36782 case BUILT_IN_COSF:
36783 case BUILT_IN_EXPF:
36784 case BUILT_IN_POWF:
36785 case BUILT_IN_LOGF:
36786 case BUILT_IN_LOG2F:
36787 case BUILT_IN_LOG10F:
36788 name[4] = 's';
36789 name[5] = '4';
36790 if (el_mode != SFmode
36791 || n != 4)
36792 return NULL_TREE;
36793 break;
36795 default:
36796 return NULL_TREE;
36799 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36800 sprintf (name + 7, "%s", bname+10);
36802 arity = 0;
36803 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36804 args;
36805 args = TREE_CHAIN (args))
36806 arity++;
36808 if (arity == 1)
36809 fntype = build_function_type_list (type_out, type_in, NULL);
36810 else
36811 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36813 /* Build a function declaration for the vectorized function. */
36814 new_fndecl = build_decl (BUILTINS_LOCATION,
36815 FUNCTION_DECL, get_identifier (name), fntype);
36816 TREE_PUBLIC (new_fndecl) = 1;
36817 DECL_EXTERNAL (new_fndecl) = 1;
36818 DECL_IS_NOVOPS (new_fndecl) = 1;
36819 TREE_READONLY (new_fndecl) = 1;
36821 return new_fndecl;
36824 /* Returns a decl of a function that implements gather load with
36825 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36826 Return NULL_TREE if it is not available. */
36828 static tree
36829 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36830 const_tree index_type, int scale)
36832 bool si;
36833 enum ix86_builtins code;
36835 if (! TARGET_AVX2)
36836 return NULL_TREE;
36838 if ((TREE_CODE (index_type) != INTEGER_TYPE
36839 && !POINTER_TYPE_P (index_type))
36840 || (TYPE_MODE (index_type) != SImode
36841 && TYPE_MODE (index_type) != DImode))
36842 return NULL_TREE;
36844 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36845 return NULL_TREE;
36847 /* v*gather* insn sign extends index to pointer mode. */
36848 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36849 && TYPE_UNSIGNED (index_type))
36850 return NULL_TREE;
36852 if (scale <= 0
36853 || scale > 8
36854 || (scale & (scale - 1)) != 0)
36855 return NULL_TREE;
36857 si = TYPE_MODE (index_type) == SImode;
36858 switch (TYPE_MODE (mem_vectype))
36860 case V2DFmode:
36861 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36862 break;
36863 case V4DFmode:
36864 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36865 break;
36866 case V2DImode:
36867 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36868 break;
36869 case V4DImode:
36870 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36871 break;
36872 case V4SFmode:
36873 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36874 break;
36875 case V8SFmode:
36876 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36877 break;
36878 case V4SImode:
36879 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36880 break;
36881 case V8SImode:
36882 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36883 break;
36884 case V8DFmode:
36885 if (TARGET_AVX512F)
36886 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36887 else
36888 return NULL_TREE;
36889 break;
36890 case V8DImode:
36891 if (TARGET_AVX512F)
36892 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36893 else
36894 return NULL_TREE;
36895 break;
36896 case V16SFmode:
36897 if (TARGET_AVX512F)
36898 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36899 else
36900 return NULL_TREE;
36901 break;
36902 case V16SImode:
36903 if (TARGET_AVX512F)
36904 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36905 else
36906 return NULL_TREE;
36907 break;
36908 default:
36909 return NULL_TREE;
36912 return ix86_get_builtin (code);
36915 /* Returns a code for a target-specific builtin that implements
36916 reciprocal of the function, or NULL_TREE if not available. */
36918 static tree
36919 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36920 bool sqrt ATTRIBUTE_UNUSED)
36922 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36923 && flag_finite_math_only && !flag_trapping_math
36924 && flag_unsafe_math_optimizations))
36925 return NULL_TREE;
36927 if (md_fn)
36928 /* Machine dependent builtins. */
36929 switch (fn)
36931 /* Vectorized version of sqrt to rsqrt conversion. */
36932 case IX86_BUILTIN_SQRTPS_NR:
36933 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36935 case IX86_BUILTIN_SQRTPS_NR256:
36936 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36938 default:
36939 return NULL_TREE;
36941 else
36942 /* Normal builtins. */
36943 switch (fn)
36945 /* Sqrt to rsqrt conversion. */
36946 case BUILT_IN_SQRTF:
36947 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36949 default:
36950 return NULL_TREE;
36954 /* Helper for avx_vpermilps256_operand et al. This is also used by
36955 the expansion functions to turn the parallel back into a mask.
36956 The return value is 0 for no match and the imm8+1 for a match. */
36959 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36961 unsigned i, nelt = GET_MODE_NUNITS (mode);
36962 unsigned mask = 0;
36963 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36965 if (XVECLEN (par, 0) != (int) nelt)
36966 return 0;
36968 /* Validate that all of the elements are constants, and not totally
36969 out of range. Copy the data into an integral array to make the
36970 subsequent checks easier. */
36971 for (i = 0; i < nelt; ++i)
36973 rtx er = XVECEXP (par, 0, i);
36974 unsigned HOST_WIDE_INT ei;
36976 if (!CONST_INT_P (er))
36977 return 0;
36978 ei = INTVAL (er);
36979 if (ei >= nelt)
36980 return 0;
36981 ipar[i] = ei;
36984 switch (mode)
36986 case V8DFmode:
36987 /* In the 512-bit DFmode case, we can only move elements within
36988 a 128-bit lane. First fill the second part of the mask,
36989 then fallthru. */
36990 for (i = 4; i < 6; ++i)
36992 if (ipar[i] < 4 || ipar[i] >= 6)
36993 return 0;
36994 mask |= (ipar[i] - 4) << i;
36996 for (i = 6; i < 8; ++i)
36998 if (ipar[i] < 6)
36999 return 0;
37000 mask |= (ipar[i] - 6) << i;
37002 /* FALLTHRU */
37004 case V4DFmode:
37005 /* In the 256-bit DFmode case, we can only move elements within
37006 a 128-bit lane. */
37007 for (i = 0; i < 2; ++i)
37009 if (ipar[i] >= 2)
37010 return 0;
37011 mask |= ipar[i] << i;
37013 for (i = 2; i < 4; ++i)
37015 if (ipar[i] < 2)
37016 return 0;
37017 mask |= (ipar[i] - 2) << i;
37019 break;
37021 case V16SFmode:
37022 /* In 512 bit SFmode case, permutation in the upper 256 bits
37023 must mirror the permutation in the lower 256-bits. */
37024 for (i = 0; i < 8; ++i)
37025 if (ipar[i] + 8 != ipar[i + 8])
37026 return 0;
37027 /* FALLTHRU */
37029 case V8SFmode:
37030 /* In 256 bit SFmode case, we have full freedom of
37031 movement within the low 128-bit lane, but the high 128-bit
37032 lane must mirror the exact same pattern. */
37033 for (i = 0; i < 4; ++i)
37034 if (ipar[i] + 4 != ipar[i + 4])
37035 return 0;
37036 nelt = 4;
37037 /* FALLTHRU */
37039 case V2DFmode:
37040 case V4SFmode:
37041 /* In the 128-bit case, we've full freedom in the placement of
37042 the elements from the source operand. */
37043 for (i = 0; i < nelt; ++i)
37044 mask |= ipar[i] << (i * (nelt / 2));
37045 break;
37047 default:
37048 gcc_unreachable ();
37051 /* Make sure success has a non-zero value by adding one. */
37052 return mask + 1;
37055 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37056 the expansion functions to turn the parallel back into a mask.
37057 The return value is 0 for no match and the imm8+1 for a match. */
37060 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37062 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37063 unsigned mask = 0;
37064 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37066 if (XVECLEN (par, 0) != (int) nelt)
37067 return 0;
37069 /* Validate that all of the elements are constants, and not totally
37070 out of range. Copy the data into an integral array to make the
37071 subsequent checks easier. */
37072 for (i = 0; i < nelt; ++i)
37074 rtx er = XVECEXP (par, 0, i);
37075 unsigned HOST_WIDE_INT ei;
37077 if (!CONST_INT_P (er))
37078 return 0;
37079 ei = INTVAL (er);
37080 if (ei >= 2 * nelt)
37081 return 0;
37082 ipar[i] = ei;
37085 /* Validate that the halves of the permute are halves. */
37086 for (i = 0; i < nelt2 - 1; ++i)
37087 if (ipar[i] + 1 != ipar[i + 1])
37088 return 0;
37089 for (i = nelt2; i < nelt - 1; ++i)
37090 if (ipar[i] + 1 != ipar[i + 1])
37091 return 0;
37093 /* Reconstruct the mask. */
37094 for (i = 0; i < 2; ++i)
37096 unsigned e = ipar[i * nelt2];
37097 if (e % nelt2)
37098 return 0;
37099 e /= nelt2;
37100 mask |= e << (i * 4);
37103 /* Make sure success has a non-zero value by adding one. */
37104 return mask + 1;
37107 /* Return a register priority for hard reg REGNO. */
37108 static int
37109 ix86_register_priority (int hard_regno)
37111 /* ebp and r13 as the base always wants a displacement, r12 as the
37112 base always wants an index. So discourage their usage in an
37113 address. */
37114 if (hard_regno == R12_REG || hard_regno == R13_REG)
37115 return 0;
37116 if (hard_regno == BP_REG)
37117 return 1;
37118 /* New x86-64 int registers result in bigger code size. Discourage
37119 them. */
37120 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37121 return 2;
37122 /* New x86-64 SSE registers result in bigger code size. Discourage
37123 them. */
37124 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37125 return 2;
37126 /* Usage of AX register results in smaller code. Prefer it. */
37127 if (hard_regno == 0)
37128 return 4;
37129 return 3;
37132 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37134 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37135 QImode must go into class Q_REGS.
37136 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37137 movdf to do mem-to-mem moves through integer regs. */
37139 static reg_class_t
37140 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37142 enum machine_mode mode = GET_MODE (x);
37144 /* We're only allowed to return a subclass of CLASS. Many of the
37145 following checks fail for NO_REGS, so eliminate that early. */
37146 if (regclass == NO_REGS)
37147 return NO_REGS;
37149 /* All classes can load zeros. */
37150 if (x == CONST0_RTX (mode))
37151 return regclass;
37153 /* Force constants into memory if we are loading a (nonzero) constant into
37154 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37155 instructions to load from a constant. */
37156 if (CONSTANT_P (x)
37157 && (MAYBE_MMX_CLASS_P (regclass)
37158 || MAYBE_SSE_CLASS_P (regclass)
37159 || MAYBE_MASK_CLASS_P (regclass)))
37160 return NO_REGS;
37162 /* Prefer SSE regs only, if we can use them for math. */
37163 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37164 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37166 /* Floating-point constants need more complex checks. */
37167 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37169 /* General regs can load everything. */
37170 if (reg_class_subset_p (regclass, GENERAL_REGS))
37171 return regclass;
37173 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37174 zero above. We only want to wind up preferring 80387 registers if
37175 we plan on doing computation with them. */
37176 if (TARGET_80387
37177 && standard_80387_constant_p (x) > 0)
37179 /* Limit class to non-sse. */
37180 if (regclass == FLOAT_SSE_REGS)
37181 return FLOAT_REGS;
37182 if (regclass == FP_TOP_SSE_REGS)
37183 return FP_TOP_REG;
37184 if (regclass == FP_SECOND_SSE_REGS)
37185 return FP_SECOND_REG;
37186 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37187 return regclass;
37190 return NO_REGS;
37193 /* Generally when we see PLUS here, it's the function invariant
37194 (plus soft-fp const_int). Which can only be computed into general
37195 regs. */
37196 if (GET_CODE (x) == PLUS)
37197 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37199 /* QImode constants are easy to load, but non-constant QImode data
37200 must go into Q_REGS. */
37201 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37203 if (reg_class_subset_p (regclass, Q_REGS))
37204 return regclass;
37205 if (reg_class_subset_p (Q_REGS, regclass))
37206 return Q_REGS;
37207 return NO_REGS;
37210 return regclass;
37213 /* Discourage putting floating-point values in SSE registers unless
37214 SSE math is being used, and likewise for the 387 registers. */
37215 static reg_class_t
37216 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37218 enum machine_mode mode = GET_MODE (x);
37220 /* Restrict the output reload class to the register bank that we are doing
37221 math on. If we would like not to return a subset of CLASS, reject this
37222 alternative: if reload cannot do this, it will still use its choice. */
37223 mode = GET_MODE (x);
37224 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37225 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37227 if (X87_FLOAT_MODE_P (mode))
37229 if (regclass == FP_TOP_SSE_REGS)
37230 return FP_TOP_REG;
37231 else if (regclass == FP_SECOND_SSE_REGS)
37232 return FP_SECOND_REG;
37233 else
37234 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37237 return regclass;
37240 static reg_class_t
37241 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37242 enum machine_mode mode, secondary_reload_info *sri)
37244 /* Double-word spills from general registers to non-offsettable memory
37245 references (zero-extended addresses) require special handling. */
37246 if (TARGET_64BIT
37247 && MEM_P (x)
37248 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37249 && INTEGER_CLASS_P (rclass)
37250 && !offsettable_memref_p (x))
37252 sri->icode = (in_p
37253 ? CODE_FOR_reload_noff_load
37254 : CODE_FOR_reload_noff_store);
37255 /* Add the cost of moving address to a temporary. */
37256 sri->extra_cost = 1;
37258 return NO_REGS;
37261 /* QImode spills from non-QI registers require
37262 intermediate register on 32bit targets. */
37263 if (mode == QImode
37264 && (MAYBE_MASK_CLASS_P (rclass)
37265 || (!TARGET_64BIT && !in_p
37266 && INTEGER_CLASS_P (rclass)
37267 && MAYBE_NON_Q_CLASS_P (rclass))))
37269 int regno;
37271 if (REG_P (x))
37272 regno = REGNO (x);
37273 else
37274 regno = -1;
37276 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37277 regno = true_regnum (x);
37279 /* Return Q_REGS if the operand is in memory. */
37280 if (regno == -1)
37281 return Q_REGS;
37284 /* This condition handles corner case where an expression involving
37285 pointers gets vectorized. We're trying to use the address of a
37286 stack slot as a vector initializer.
37288 (set (reg:V2DI 74 [ vect_cst_.2 ])
37289 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37291 Eventually frame gets turned into sp+offset like this:
37293 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37294 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37295 (const_int 392 [0x188]))))
37297 That later gets turned into:
37299 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37300 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37301 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37303 We'll have the following reload recorded:
37305 Reload 0: reload_in (DI) =
37306 (plus:DI (reg/f:DI 7 sp)
37307 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37308 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37309 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37310 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37311 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37312 reload_reg_rtx: (reg:V2DI 22 xmm1)
37314 Which isn't going to work since SSE instructions can't handle scalar
37315 additions. Returning GENERAL_REGS forces the addition into integer
37316 register and reload can handle subsequent reloads without problems. */
37318 if (in_p && GET_CODE (x) == PLUS
37319 && SSE_CLASS_P (rclass)
37320 && SCALAR_INT_MODE_P (mode))
37321 return GENERAL_REGS;
37323 return NO_REGS;
37326 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37328 static bool
37329 ix86_class_likely_spilled_p (reg_class_t rclass)
37331 switch (rclass)
37333 case AREG:
37334 case DREG:
37335 case CREG:
37336 case BREG:
37337 case AD_REGS:
37338 case SIREG:
37339 case DIREG:
37340 case SSE_FIRST_REG:
37341 case FP_TOP_REG:
37342 case FP_SECOND_REG:
37343 return true;
37345 default:
37346 break;
37349 return false;
37352 /* If we are copying between general and FP registers, we need a memory
37353 location. The same is true for SSE and MMX registers.
37355 To optimize register_move_cost performance, allow inline variant.
37357 The macro can't work reliably when one of the CLASSES is class containing
37358 registers from multiple units (SSE, MMX, integer). We avoid this by never
37359 combining those units in single alternative in the machine description.
37360 Ensure that this constraint holds to avoid unexpected surprises.
37362 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37363 enforce these sanity checks. */
37365 static inline bool
37366 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37367 enum machine_mode mode, int strict)
37369 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37370 return false;
37371 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37372 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37373 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37374 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37375 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37376 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37378 gcc_assert (!strict || lra_in_progress);
37379 return true;
37382 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37383 return true;
37385 /* ??? This is a lie. We do have moves between mmx/general, and for
37386 mmx/sse2. But by saying we need secondary memory we discourage the
37387 register allocator from using the mmx registers unless needed. */
37388 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37389 return true;
37391 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37393 /* SSE1 doesn't have any direct moves from other classes. */
37394 if (!TARGET_SSE2)
37395 return true;
37397 /* If the target says that inter-unit moves are more expensive
37398 than moving through memory, then don't generate them. */
37399 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37400 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37401 return true;
37403 /* Between SSE and general, we have moves no larger than word size. */
37404 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37405 return true;
37408 return false;
37411 bool
37412 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37413 enum machine_mode mode, int strict)
37415 return inline_secondary_memory_needed (class1, class2, mode, strict);
37418 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37420 On the 80386, this is the size of MODE in words,
37421 except in the FP regs, where a single reg is always enough. */
37423 static unsigned char
37424 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37426 if (MAYBE_INTEGER_CLASS_P (rclass))
37428 if (mode == XFmode)
37429 return (TARGET_64BIT ? 2 : 3);
37430 else if (mode == XCmode)
37431 return (TARGET_64BIT ? 4 : 6);
37432 else
37433 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37435 else
37437 if (COMPLEX_MODE_P (mode))
37438 return 2;
37439 else
37440 return 1;
37444 /* Return true if the registers in CLASS cannot represent the change from
37445 modes FROM to TO. */
37447 bool
37448 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37449 enum reg_class regclass)
37451 if (from == to)
37452 return false;
37454 /* x87 registers can't do subreg at all, as all values are reformatted
37455 to extended precision. */
37456 if (MAYBE_FLOAT_CLASS_P (regclass))
37457 return true;
37459 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37461 /* Vector registers do not support QI or HImode loads. If we don't
37462 disallow a change to these modes, reload will assume it's ok to
37463 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37464 the vec_dupv4hi pattern. */
37465 if (GET_MODE_SIZE (from) < 4)
37466 return true;
37468 /* Vector registers do not support subreg with nonzero offsets, which
37469 are otherwise valid for integer registers. Since we can't see
37470 whether we have a nonzero offset from here, prohibit all
37471 nonparadoxical subregs changing size. */
37472 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37473 return true;
37476 return false;
37479 /* Return the cost of moving data of mode M between a
37480 register and memory. A value of 2 is the default; this cost is
37481 relative to those in `REGISTER_MOVE_COST'.
37483 This function is used extensively by register_move_cost that is used to
37484 build tables at startup. Make it inline in this case.
37485 When IN is 2, return maximum of in and out move cost.
37487 If moving between registers and memory is more expensive than
37488 between two registers, you should define this macro to express the
37489 relative cost.
37491 Model also increased moving costs of QImode registers in non
37492 Q_REGS classes.
37494 static inline int
37495 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37496 int in)
37498 int cost;
37499 if (FLOAT_CLASS_P (regclass))
37501 int index;
37502 switch (mode)
37504 case SFmode:
37505 index = 0;
37506 break;
37507 case DFmode:
37508 index = 1;
37509 break;
37510 case XFmode:
37511 index = 2;
37512 break;
37513 default:
37514 return 100;
37516 if (in == 2)
37517 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37518 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37520 if (SSE_CLASS_P (regclass))
37522 int index;
37523 switch (GET_MODE_SIZE (mode))
37525 case 4:
37526 index = 0;
37527 break;
37528 case 8:
37529 index = 1;
37530 break;
37531 case 16:
37532 index = 2;
37533 break;
37534 default:
37535 return 100;
37537 if (in == 2)
37538 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37539 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37541 if (MMX_CLASS_P (regclass))
37543 int index;
37544 switch (GET_MODE_SIZE (mode))
37546 case 4:
37547 index = 0;
37548 break;
37549 case 8:
37550 index = 1;
37551 break;
37552 default:
37553 return 100;
37555 if (in)
37556 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37557 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37559 switch (GET_MODE_SIZE (mode))
37561 case 1:
37562 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37564 if (!in)
37565 return ix86_cost->int_store[0];
37566 if (TARGET_PARTIAL_REG_DEPENDENCY
37567 && optimize_function_for_speed_p (cfun))
37568 cost = ix86_cost->movzbl_load;
37569 else
37570 cost = ix86_cost->int_load[0];
37571 if (in == 2)
37572 return MAX (cost, ix86_cost->int_store[0]);
37573 return cost;
37575 else
37577 if (in == 2)
37578 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37579 if (in)
37580 return ix86_cost->movzbl_load;
37581 else
37582 return ix86_cost->int_store[0] + 4;
37584 break;
37585 case 2:
37586 if (in == 2)
37587 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37588 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37589 default:
37590 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37591 if (mode == TFmode)
37592 mode = XFmode;
37593 if (in == 2)
37594 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37595 else if (in)
37596 cost = ix86_cost->int_load[2];
37597 else
37598 cost = ix86_cost->int_store[2];
37599 return (cost * (((int) GET_MODE_SIZE (mode)
37600 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37604 static int
37605 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37606 bool in)
37608 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37612 /* Return the cost of moving data from a register in class CLASS1 to
37613 one in class CLASS2.
37615 It is not required that the cost always equal 2 when FROM is the same as TO;
37616 on some machines it is expensive to move between registers if they are not
37617 general registers. */
37619 static int
37620 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37621 reg_class_t class2_i)
37623 enum reg_class class1 = (enum reg_class) class1_i;
37624 enum reg_class class2 = (enum reg_class) class2_i;
37626 /* In case we require secondary memory, compute cost of the store followed
37627 by load. In order to avoid bad register allocation choices, we need
37628 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37630 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37632 int cost = 1;
37634 cost += inline_memory_move_cost (mode, class1, 2);
37635 cost += inline_memory_move_cost (mode, class2, 2);
37637 /* In case of copying from general_purpose_register we may emit multiple
37638 stores followed by single load causing memory size mismatch stall.
37639 Count this as arbitrarily high cost of 20. */
37640 if (targetm.class_max_nregs (class1, mode)
37641 > targetm.class_max_nregs (class2, mode))
37642 cost += 20;
37644 /* In the case of FP/MMX moves, the registers actually overlap, and we
37645 have to switch modes in order to treat them differently. */
37646 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37647 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37648 cost += 20;
37650 return cost;
37653 /* Moves between SSE/MMX and integer unit are expensive. */
37654 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37655 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37657 /* ??? By keeping returned value relatively high, we limit the number
37658 of moves between integer and MMX/SSE registers for all targets.
37659 Additionally, high value prevents problem with x86_modes_tieable_p(),
37660 where integer modes in MMX/SSE registers are not tieable
37661 because of missing QImode and HImode moves to, from or between
37662 MMX/SSE registers. */
37663 return MAX (8, ix86_cost->mmxsse_to_integer);
37665 if (MAYBE_FLOAT_CLASS_P (class1))
37666 return ix86_cost->fp_move;
37667 if (MAYBE_SSE_CLASS_P (class1))
37668 return ix86_cost->sse_move;
37669 if (MAYBE_MMX_CLASS_P (class1))
37670 return ix86_cost->mmx_move;
37671 return 2;
37674 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37675 MODE. */
37677 bool
37678 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37680 /* Flags and only flags can only hold CCmode values. */
37681 if (CC_REGNO_P (regno))
37682 return GET_MODE_CLASS (mode) == MODE_CC;
37683 if (GET_MODE_CLASS (mode) == MODE_CC
37684 || GET_MODE_CLASS (mode) == MODE_RANDOM
37685 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37686 return false;
37687 if (STACK_REGNO_P (regno))
37688 return VALID_FP_MODE_P (mode);
37689 if (MASK_REGNO_P (regno))
37690 return VALID_MASK_REG_MODE (mode);
37691 if (SSE_REGNO_P (regno))
37693 /* We implement the move patterns for all vector modes into and
37694 out of SSE registers, even when no operation instructions
37695 are available. */
37697 /* For AVX-512 we allow, regardless of regno:
37698 - XI mode
37699 - any of 512-bit wide vector mode
37700 - any scalar mode. */
37701 if (TARGET_AVX512F
37702 && (mode == XImode
37703 || VALID_AVX512F_REG_MODE (mode)
37704 || VALID_AVX512F_SCALAR_MODE (mode)))
37705 return true;
37707 /* xmm16-xmm31 are only available for AVX-512. */
37708 if (EXT_REX_SSE_REGNO_P (regno))
37709 return false;
37711 /* OImode and AVX modes are available only when AVX is enabled. */
37712 return ((TARGET_AVX
37713 && VALID_AVX256_REG_OR_OI_MODE (mode))
37714 || VALID_SSE_REG_MODE (mode)
37715 || VALID_SSE2_REG_MODE (mode)
37716 || VALID_MMX_REG_MODE (mode)
37717 || VALID_MMX_REG_MODE_3DNOW (mode));
37719 if (MMX_REGNO_P (regno))
37721 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37722 so if the register is available at all, then we can move data of
37723 the given mode into or out of it. */
37724 return (VALID_MMX_REG_MODE (mode)
37725 || VALID_MMX_REG_MODE_3DNOW (mode));
37728 if (mode == QImode)
37730 /* Take care for QImode values - they can be in non-QI regs,
37731 but then they do cause partial register stalls. */
37732 if (ANY_QI_REGNO_P (regno))
37733 return true;
37734 if (!TARGET_PARTIAL_REG_STALL)
37735 return true;
37736 /* LRA checks if the hard register is OK for the given mode.
37737 QImode values can live in non-QI regs, so we allow all
37738 registers here. */
37739 if (lra_in_progress)
37740 return true;
37741 return !can_create_pseudo_p ();
37743 /* We handle both integer and floats in the general purpose registers. */
37744 else if (VALID_INT_MODE_P (mode))
37745 return true;
37746 else if (VALID_FP_MODE_P (mode))
37747 return true;
37748 else if (VALID_DFP_MODE_P (mode))
37749 return true;
37750 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37751 on to use that value in smaller contexts, this can easily force a
37752 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37753 supporting DImode, allow it. */
37754 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37755 return true;
37757 return false;
37760 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37761 tieable integer mode. */
37763 static bool
37764 ix86_tieable_integer_mode_p (enum machine_mode mode)
37766 switch (mode)
37768 case HImode:
37769 case SImode:
37770 return true;
37772 case QImode:
37773 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37775 case DImode:
37776 return TARGET_64BIT;
37778 default:
37779 return false;
37783 /* Return true if MODE1 is accessible in a register that can hold MODE2
37784 without copying. That is, all register classes that can hold MODE2
37785 can also hold MODE1. */
37787 bool
37788 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37790 if (mode1 == mode2)
37791 return true;
37793 if (ix86_tieable_integer_mode_p (mode1)
37794 && ix86_tieable_integer_mode_p (mode2))
37795 return true;
37797 /* MODE2 being XFmode implies fp stack or general regs, which means we
37798 can tie any smaller floating point modes to it. Note that we do not
37799 tie this with TFmode. */
37800 if (mode2 == XFmode)
37801 return mode1 == SFmode || mode1 == DFmode;
37803 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37804 that we can tie it with SFmode. */
37805 if (mode2 == DFmode)
37806 return mode1 == SFmode;
37808 /* If MODE2 is only appropriate for an SSE register, then tie with
37809 any other mode acceptable to SSE registers. */
37810 if (GET_MODE_SIZE (mode2) == 32
37811 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37812 return (GET_MODE_SIZE (mode1) == 32
37813 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37814 if (GET_MODE_SIZE (mode2) == 16
37815 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37816 return (GET_MODE_SIZE (mode1) == 16
37817 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37819 /* If MODE2 is appropriate for an MMX register, then tie
37820 with any other mode acceptable to MMX registers. */
37821 if (GET_MODE_SIZE (mode2) == 8
37822 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37823 return (GET_MODE_SIZE (mode1) == 8
37824 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37826 return false;
37829 /* Return the cost of moving between two registers of mode MODE. */
37831 static int
37832 ix86_set_reg_reg_cost (enum machine_mode mode)
37834 unsigned int units = UNITS_PER_WORD;
37836 switch (GET_MODE_CLASS (mode))
37838 default:
37839 break;
37841 case MODE_CC:
37842 units = GET_MODE_SIZE (CCmode);
37843 break;
37845 case MODE_FLOAT:
37846 if ((TARGET_SSE && mode == TFmode)
37847 || (TARGET_80387 && mode == XFmode)
37848 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37849 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37850 units = GET_MODE_SIZE (mode);
37851 break;
37853 case MODE_COMPLEX_FLOAT:
37854 if ((TARGET_SSE && mode == TCmode)
37855 || (TARGET_80387 && mode == XCmode)
37856 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37857 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37858 units = GET_MODE_SIZE (mode);
37859 break;
37861 case MODE_VECTOR_INT:
37862 case MODE_VECTOR_FLOAT:
37863 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37864 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37865 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37866 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37867 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37868 units = GET_MODE_SIZE (mode);
37871 /* Return the cost of moving between two registers of mode MODE,
37872 assuming that the move will be in pieces of at most UNITS bytes. */
37873 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37876 /* Compute a (partial) cost for rtx X. Return true if the complete
37877 cost has been computed, and false if subexpressions should be
37878 scanned. In either case, *TOTAL contains the cost result. */
37880 static bool
37881 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37882 bool speed)
37884 rtx mask;
37885 enum rtx_code code = (enum rtx_code) code_i;
37886 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37887 enum machine_mode mode = GET_MODE (x);
37888 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37890 switch (code)
37892 case SET:
37893 if (register_operand (SET_DEST (x), VOIDmode)
37894 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37896 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37897 return true;
37899 return false;
37901 case CONST_INT:
37902 case CONST:
37903 case LABEL_REF:
37904 case SYMBOL_REF:
37905 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37906 *total = 3;
37907 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37908 *total = 2;
37909 else if (flag_pic && SYMBOLIC_CONST (x)
37910 && !(TARGET_64BIT
37911 && (GET_CODE (x) == LABEL_REF
37912 || (GET_CODE (x) == SYMBOL_REF
37913 && SYMBOL_REF_LOCAL_P (x)))))
37914 *total = 1;
37915 else
37916 *total = 0;
37917 return true;
37919 case CONST_DOUBLE:
37920 if (mode == VOIDmode)
37922 *total = 0;
37923 return true;
37925 switch (standard_80387_constant_p (x))
37927 case 1: /* 0.0 */
37928 *total = 1;
37929 return true;
37930 default: /* Other constants */
37931 *total = 2;
37932 return true;
37933 case 0:
37934 case -1:
37935 break;
37937 if (SSE_FLOAT_MODE_P (mode))
37939 case CONST_VECTOR:
37940 switch (standard_sse_constant_p (x))
37942 case 0:
37943 break;
37944 case 1: /* 0: xor eliminates false dependency */
37945 *total = 0;
37946 return true;
37947 default: /* -1: cmp contains false dependency */
37948 *total = 1;
37949 return true;
37952 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37953 it'll probably end up. Add a penalty for size. */
37954 *total = (COSTS_N_INSNS (1)
37955 + (flag_pic != 0 && !TARGET_64BIT)
37956 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37957 return true;
37959 case ZERO_EXTEND:
37960 /* The zero extensions is often completely free on x86_64, so make
37961 it as cheap as possible. */
37962 if (TARGET_64BIT && mode == DImode
37963 && GET_MODE (XEXP (x, 0)) == SImode)
37964 *total = 1;
37965 else if (TARGET_ZERO_EXTEND_WITH_AND)
37966 *total = cost->add;
37967 else
37968 *total = cost->movzx;
37969 return false;
37971 case SIGN_EXTEND:
37972 *total = cost->movsx;
37973 return false;
37975 case ASHIFT:
37976 if (SCALAR_INT_MODE_P (mode)
37977 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37978 && CONST_INT_P (XEXP (x, 1)))
37980 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37981 if (value == 1)
37983 *total = cost->add;
37984 return false;
37986 if ((value == 2 || value == 3)
37987 && cost->lea <= cost->shift_const)
37989 *total = cost->lea;
37990 return false;
37993 /* FALLTHRU */
37995 case ROTATE:
37996 case ASHIFTRT:
37997 case LSHIFTRT:
37998 case ROTATERT:
37999 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38001 /* ??? Should be SSE vector operation cost. */
38002 /* At least for published AMD latencies, this really is the same
38003 as the latency for a simple fpu operation like fabs. */
38004 /* V*QImode is emulated with 1-11 insns. */
38005 if (mode == V16QImode || mode == V32QImode)
38007 int count = 11;
38008 if (TARGET_XOP && mode == V16QImode)
38010 /* For XOP we use vpshab, which requires a broadcast of the
38011 value to the variable shift insn. For constants this
38012 means a V16Q const in mem; even when we can perform the
38013 shift with one insn set the cost to prefer paddb. */
38014 if (CONSTANT_P (XEXP (x, 1)))
38016 *total = (cost->fabs
38017 + rtx_cost (XEXP (x, 0), code, 0, speed)
38018 + (speed ? 2 : COSTS_N_BYTES (16)));
38019 return true;
38021 count = 3;
38023 else if (TARGET_SSSE3)
38024 count = 7;
38025 *total = cost->fabs * count;
38027 else
38028 *total = cost->fabs;
38030 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38032 if (CONST_INT_P (XEXP (x, 1)))
38034 if (INTVAL (XEXP (x, 1)) > 32)
38035 *total = cost->shift_const + COSTS_N_INSNS (2);
38036 else
38037 *total = cost->shift_const * 2;
38039 else
38041 if (GET_CODE (XEXP (x, 1)) == AND)
38042 *total = cost->shift_var * 2;
38043 else
38044 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38047 else
38049 if (CONST_INT_P (XEXP (x, 1)))
38050 *total = cost->shift_const;
38051 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38052 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38054 /* Return the cost after shift-and truncation. */
38055 *total = cost->shift_var;
38056 return true;
38058 else
38059 *total = cost->shift_var;
38061 return false;
38063 case FMA:
38065 rtx sub;
38067 gcc_assert (FLOAT_MODE_P (mode));
38068 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38070 /* ??? SSE scalar/vector cost should be used here. */
38071 /* ??? Bald assumption that fma has the same cost as fmul. */
38072 *total = cost->fmul;
38073 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38075 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38076 sub = XEXP (x, 0);
38077 if (GET_CODE (sub) == NEG)
38078 sub = XEXP (sub, 0);
38079 *total += rtx_cost (sub, FMA, 0, speed);
38081 sub = XEXP (x, 2);
38082 if (GET_CODE (sub) == NEG)
38083 sub = XEXP (sub, 0);
38084 *total += rtx_cost (sub, FMA, 2, speed);
38085 return true;
38088 case MULT:
38089 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38091 /* ??? SSE scalar cost should be used here. */
38092 *total = cost->fmul;
38093 return false;
38095 else if (X87_FLOAT_MODE_P (mode))
38097 *total = cost->fmul;
38098 return false;
38100 else if (FLOAT_MODE_P (mode))
38102 /* ??? SSE vector cost should be used here. */
38103 *total = cost->fmul;
38104 return false;
38106 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38108 /* V*QImode is emulated with 7-13 insns. */
38109 if (mode == V16QImode || mode == V32QImode)
38111 int extra = 11;
38112 if (TARGET_XOP && mode == V16QImode)
38113 extra = 5;
38114 else if (TARGET_SSSE3)
38115 extra = 6;
38116 *total = cost->fmul * 2 + cost->fabs * extra;
38118 /* V*DImode is emulated with 5-8 insns. */
38119 else if (mode == V2DImode || mode == V4DImode)
38121 if (TARGET_XOP && mode == V2DImode)
38122 *total = cost->fmul * 2 + cost->fabs * 3;
38123 else
38124 *total = cost->fmul * 3 + cost->fabs * 5;
38126 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38127 insns, including two PMULUDQ. */
38128 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38129 *total = cost->fmul * 2 + cost->fabs * 5;
38130 else
38131 *total = cost->fmul;
38132 return false;
38134 else
38136 rtx op0 = XEXP (x, 0);
38137 rtx op1 = XEXP (x, 1);
38138 int nbits;
38139 if (CONST_INT_P (XEXP (x, 1)))
38141 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38142 for (nbits = 0; value != 0; value &= value - 1)
38143 nbits++;
38145 else
38146 /* This is arbitrary. */
38147 nbits = 7;
38149 /* Compute costs correctly for widening multiplication. */
38150 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38151 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38152 == GET_MODE_SIZE (mode))
38154 int is_mulwiden = 0;
38155 enum machine_mode inner_mode = GET_MODE (op0);
38157 if (GET_CODE (op0) == GET_CODE (op1))
38158 is_mulwiden = 1, op1 = XEXP (op1, 0);
38159 else if (CONST_INT_P (op1))
38161 if (GET_CODE (op0) == SIGN_EXTEND)
38162 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38163 == INTVAL (op1);
38164 else
38165 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38168 if (is_mulwiden)
38169 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38172 *total = (cost->mult_init[MODE_INDEX (mode)]
38173 + nbits * cost->mult_bit
38174 + rtx_cost (op0, outer_code, opno, speed)
38175 + rtx_cost (op1, outer_code, opno, speed));
38177 return true;
38180 case DIV:
38181 case UDIV:
38182 case MOD:
38183 case UMOD:
38184 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38185 /* ??? SSE cost should be used here. */
38186 *total = cost->fdiv;
38187 else if (X87_FLOAT_MODE_P (mode))
38188 *total = cost->fdiv;
38189 else if (FLOAT_MODE_P (mode))
38190 /* ??? SSE vector cost should be used here. */
38191 *total = cost->fdiv;
38192 else
38193 *total = cost->divide[MODE_INDEX (mode)];
38194 return false;
38196 case PLUS:
38197 if (GET_MODE_CLASS (mode) == MODE_INT
38198 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38200 if (GET_CODE (XEXP (x, 0)) == PLUS
38201 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38202 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38203 && CONSTANT_P (XEXP (x, 1)))
38205 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38206 if (val == 2 || val == 4 || val == 8)
38208 *total = cost->lea;
38209 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38210 outer_code, opno, speed);
38211 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38212 outer_code, opno, speed);
38213 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38214 return true;
38217 else if (GET_CODE (XEXP (x, 0)) == MULT
38218 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38220 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38221 if (val == 2 || val == 4 || val == 8)
38223 *total = cost->lea;
38224 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38225 outer_code, opno, speed);
38226 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38227 return true;
38230 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38232 *total = cost->lea;
38233 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38234 outer_code, opno, speed);
38235 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38236 outer_code, opno, speed);
38237 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38238 return true;
38241 /* FALLTHRU */
38243 case MINUS:
38244 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38246 /* ??? SSE cost should be used here. */
38247 *total = cost->fadd;
38248 return false;
38250 else if (X87_FLOAT_MODE_P (mode))
38252 *total = cost->fadd;
38253 return false;
38255 else if (FLOAT_MODE_P (mode))
38257 /* ??? SSE vector cost should be used here. */
38258 *total = cost->fadd;
38259 return false;
38261 /* FALLTHRU */
38263 case AND:
38264 case IOR:
38265 case XOR:
38266 if (GET_MODE_CLASS (mode) == MODE_INT
38267 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38269 *total = (cost->add * 2
38270 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38271 << (GET_MODE (XEXP (x, 0)) != DImode))
38272 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38273 << (GET_MODE (XEXP (x, 1)) != DImode)));
38274 return true;
38276 /* FALLTHRU */
38278 case NEG:
38279 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38281 /* ??? SSE cost should be used here. */
38282 *total = cost->fchs;
38283 return false;
38285 else if (X87_FLOAT_MODE_P (mode))
38287 *total = cost->fchs;
38288 return false;
38290 else if (FLOAT_MODE_P (mode))
38292 /* ??? SSE vector cost should be used here. */
38293 *total = cost->fchs;
38294 return false;
38296 /* FALLTHRU */
38298 case NOT:
38299 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38301 /* ??? Should be SSE vector operation cost. */
38302 /* At least for published AMD latencies, this really is the same
38303 as the latency for a simple fpu operation like fabs. */
38304 *total = cost->fabs;
38306 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38307 *total = cost->add * 2;
38308 else
38309 *total = cost->add;
38310 return false;
38312 case COMPARE:
38313 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38314 && XEXP (XEXP (x, 0), 1) == const1_rtx
38315 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38316 && XEXP (x, 1) == const0_rtx)
38318 /* This kind of construct is implemented using test[bwl].
38319 Treat it as if we had an AND. */
38320 *total = (cost->add
38321 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38322 + rtx_cost (const1_rtx, outer_code, opno, speed));
38323 return true;
38325 return false;
38327 case FLOAT_EXTEND:
38328 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38329 *total = 0;
38330 return false;
38332 case ABS:
38333 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38334 /* ??? SSE cost should be used here. */
38335 *total = cost->fabs;
38336 else if (X87_FLOAT_MODE_P (mode))
38337 *total = cost->fabs;
38338 else if (FLOAT_MODE_P (mode))
38339 /* ??? SSE vector cost should be used here. */
38340 *total = cost->fabs;
38341 return false;
38343 case SQRT:
38344 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38345 /* ??? SSE cost should be used here. */
38346 *total = cost->fsqrt;
38347 else if (X87_FLOAT_MODE_P (mode))
38348 *total = cost->fsqrt;
38349 else if (FLOAT_MODE_P (mode))
38350 /* ??? SSE vector cost should be used here. */
38351 *total = cost->fsqrt;
38352 return false;
38354 case UNSPEC:
38355 if (XINT (x, 1) == UNSPEC_TP)
38356 *total = 0;
38357 return false;
38359 case VEC_SELECT:
38360 case VEC_CONCAT:
38361 case VEC_DUPLICATE:
38362 /* ??? Assume all of these vector manipulation patterns are
38363 recognizable. In which case they all pretty much have the
38364 same cost. */
38365 *total = cost->fabs;
38366 return true;
38367 case VEC_MERGE:
38368 mask = XEXP (x, 2);
38369 /* This is masked instruction, assume the same cost,
38370 as nonmasked variant. */
38371 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38372 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38373 else
38374 *total = cost->fabs;
38375 return true;
38377 default:
38378 return false;
38382 #if TARGET_MACHO
38384 static int current_machopic_label_num;
38386 /* Given a symbol name and its associated stub, write out the
38387 definition of the stub. */
38389 void
38390 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38392 unsigned int length;
38393 char *binder_name, *symbol_name, lazy_ptr_name[32];
38394 int label = ++current_machopic_label_num;
38396 /* For 64-bit we shouldn't get here. */
38397 gcc_assert (!TARGET_64BIT);
38399 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38400 symb = targetm.strip_name_encoding (symb);
38402 length = strlen (stub);
38403 binder_name = XALLOCAVEC (char, length + 32);
38404 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38406 length = strlen (symb);
38407 symbol_name = XALLOCAVEC (char, length + 32);
38408 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38410 sprintf (lazy_ptr_name, "L%d$lz", label);
38412 if (MACHOPIC_ATT_STUB)
38413 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38414 else if (MACHOPIC_PURE)
38415 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38416 else
38417 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38419 fprintf (file, "%s:\n", stub);
38420 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38422 if (MACHOPIC_ATT_STUB)
38424 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38426 else if (MACHOPIC_PURE)
38428 /* PIC stub. */
38429 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38430 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38431 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38432 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38433 label, lazy_ptr_name, label);
38434 fprintf (file, "\tjmp\t*%%ecx\n");
38436 else
38437 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38439 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38440 it needs no stub-binding-helper. */
38441 if (MACHOPIC_ATT_STUB)
38442 return;
38444 fprintf (file, "%s:\n", binder_name);
38446 if (MACHOPIC_PURE)
38448 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38449 fprintf (file, "\tpushl\t%%ecx\n");
38451 else
38452 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38454 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38456 /* N.B. Keep the correspondence of these
38457 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38458 old-pic/new-pic/non-pic stubs; altering this will break
38459 compatibility with existing dylibs. */
38460 if (MACHOPIC_PURE)
38462 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38463 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38465 else
38466 /* 16-byte -mdynamic-no-pic stub. */
38467 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38469 fprintf (file, "%s:\n", lazy_ptr_name);
38470 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38471 fprintf (file, ASM_LONG "%s\n", binder_name);
38473 #endif /* TARGET_MACHO */
38475 /* Order the registers for register allocator. */
38477 void
38478 x86_order_regs_for_local_alloc (void)
38480 int pos = 0;
38481 int i;
38483 /* First allocate the local general purpose registers. */
38484 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38485 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38486 reg_alloc_order [pos++] = i;
38488 /* Global general purpose registers. */
38489 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38490 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38491 reg_alloc_order [pos++] = i;
38493 /* x87 registers come first in case we are doing FP math
38494 using them. */
38495 if (!TARGET_SSE_MATH)
38496 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38497 reg_alloc_order [pos++] = i;
38499 /* SSE registers. */
38500 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38501 reg_alloc_order [pos++] = i;
38502 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38503 reg_alloc_order [pos++] = i;
38505 /* Extended REX SSE registers. */
38506 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38507 reg_alloc_order [pos++] = i;
38509 /* Mask register. */
38510 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38511 reg_alloc_order [pos++] = i;
38513 /* x87 registers. */
38514 if (TARGET_SSE_MATH)
38515 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38516 reg_alloc_order [pos++] = i;
38518 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38519 reg_alloc_order [pos++] = i;
38521 /* Initialize the rest of array as we do not allocate some registers
38522 at all. */
38523 while (pos < FIRST_PSEUDO_REGISTER)
38524 reg_alloc_order [pos++] = 0;
38527 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38528 in struct attribute_spec handler. */
38529 static tree
38530 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38531 tree args,
38532 int flags ATTRIBUTE_UNUSED,
38533 bool *no_add_attrs)
38535 if (TREE_CODE (*node) != FUNCTION_TYPE
38536 && TREE_CODE (*node) != METHOD_TYPE
38537 && TREE_CODE (*node) != FIELD_DECL
38538 && TREE_CODE (*node) != TYPE_DECL)
38540 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38541 name);
38542 *no_add_attrs = true;
38543 return NULL_TREE;
38545 if (TARGET_64BIT)
38547 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38548 name);
38549 *no_add_attrs = true;
38550 return NULL_TREE;
38552 if (is_attribute_p ("callee_pop_aggregate_return", name))
38554 tree cst;
38556 cst = TREE_VALUE (args);
38557 if (TREE_CODE (cst) != INTEGER_CST)
38559 warning (OPT_Wattributes,
38560 "%qE attribute requires an integer constant argument",
38561 name);
38562 *no_add_attrs = true;
38564 else if (compare_tree_int (cst, 0) != 0
38565 && compare_tree_int (cst, 1) != 0)
38567 warning (OPT_Wattributes,
38568 "argument to %qE attribute is neither zero, nor one",
38569 name);
38570 *no_add_attrs = true;
38573 return NULL_TREE;
38576 return NULL_TREE;
38579 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38580 struct attribute_spec.handler. */
38581 static tree
38582 ix86_handle_abi_attribute (tree *node, tree name,
38583 tree args ATTRIBUTE_UNUSED,
38584 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38586 if (TREE_CODE (*node) != FUNCTION_TYPE
38587 && TREE_CODE (*node) != METHOD_TYPE
38588 && TREE_CODE (*node) != FIELD_DECL
38589 && TREE_CODE (*node) != TYPE_DECL)
38591 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38592 name);
38593 *no_add_attrs = true;
38594 return NULL_TREE;
38597 /* Can combine regparm with all attributes but fastcall. */
38598 if (is_attribute_p ("ms_abi", name))
38600 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38602 error ("ms_abi and sysv_abi attributes are not compatible");
38605 return NULL_TREE;
38607 else if (is_attribute_p ("sysv_abi", name))
38609 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38611 error ("ms_abi and sysv_abi attributes are not compatible");
38614 return NULL_TREE;
38617 return NULL_TREE;
38620 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38621 struct attribute_spec.handler. */
38622 static tree
38623 ix86_handle_struct_attribute (tree *node, tree name,
38624 tree args ATTRIBUTE_UNUSED,
38625 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38627 tree *type = NULL;
38628 if (DECL_P (*node))
38630 if (TREE_CODE (*node) == TYPE_DECL)
38631 type = &TREE_TYPE (*node);
38633 else
38634 type = node;
38636 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38638 warning (OPT_Wattributes, "%qE attribute ignored",
38639 name);
38640 *no_add_attrs = true;
38643 else if ((is_attribute_p ("ms_struct", name)
38644 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38645 || ((is_attribute_p ("gcc_struct", name)
38646 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38648 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38649 name);
38650 *no_add_attrs = true;
38653 return NULL_TREE;
38656 static tree
38657 ix86_handle_fndecl_attribute (tree *node, tree name,
38658 tree args ATTRIBUTE_UNUSED,
38659 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38661 if (TREE_CODE (*node) != FUNCTION_DECL)
38663 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38664 name);
38665 *no_add_attrs = true;
38667 return NULL_TREE;
38670 static bool
38671 ix86_ms_bitfield_layout_p (const_tree record_type)
38673 return ((TARGET_MS_BITFIELD_LAYOUT
38674 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38675 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38678 /* Returns an expression indicating where the this parameter is
38679 located on entry to the FUNCTION. */
38681 static rtx
38682 x86_this_parameter (tree function)
38684 tree type = TREE_TYPE (function);
38685 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38686 int nregs;
38688 if (TARGET_64BIT)
38690 const int *parm_regs;
38692 if (ix86_function_type_abi (type) == MS_ABI)
38693 parm_regs = x86_64_ms_abi_int_parameter_registers;
38694 else
38695 parm_regs = x86_64_int_parameter_registers;
38696 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38699 nregs = ix86_function_regparm (type, function);
38701 if (nregs > 0 && !stdarg_p (type))
38703 int regno;
38704 unsigned int ccvt = ix86_get_callcvt (type);
38706 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38707 regno = aggr ? DX_REG : CX_REG;
38708 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38710 regno = CX_REG;
38711 if (aggr)
38712 return gen_rtx_MEM (SImode,
38713 plus_constant (Pmode, stack_pointer_rtx, 4));
38715 else
38717 regno = AX_REG;
38718 if (aggr)
38720 regno = DX_REG;
38721 if (nregs == 1)
38722 return gen_rtx_MEM (SImode,
38723 plus_constant (Pmode,
38724 stack_pointer_rtx, 4));
38727 return gen_rtx_REG (SImode, regno);
38730 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38731 aggr ? 8 : 4));
38734 /* Determine whether x86_output_mi_thunk can succeed. */
38736 static bool
38737 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38738 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38739 HOST_WIDE_INT vcall_offset, const_tree function)
38741 /* 64-bit can handle anything. */
38742 if (TARGET_64BIT)
38743 return true;
38745 /* For 32-bit, everything's fine if we have one free register. */
38746 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38747 return true;
38749 /* Need a free register for vcall_offset. */
38750 if (vcall_offset)
38751 return false;
38753 /* Need a free register for GOT references. */
38754 if (flag_pic && !targetm.binds_local_p (function))
38755 return false;
38757 /* Otherwise ok. */
38758 return true;
38761 /* Output the assembler code for a thunk function. THUNK_DECL is the
38762 declaration for the thunk function itself, FUNCTION is the decl for
38763 the target function. DELTA is an immediate constant offset to be
38764 added to THIS. If VCALL_OFFSET is nonzero, the word at
38765 *(*this + vcall_offset) should be added to THIS. */
38767 static void
38768 x86_output_mi_thunk (FILE *file,
38769 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38770 HOST_WIDE_INT vcall_offset, tree function)
38772 rtx this_param = x86_this_parameter (function);
38773 rtx this_reg, tmp, fnaddr;
38774 unsigned int tmp_regno;
38776 if (TARGET_64BIT)
38777 tmp_regno = R10_REG;
38778 else
38780 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38781 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38782 tmp_regno = AX_REG;
38783 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38784 tmp_regno = DX_REG;
38785 else
38786 tmp_regno = CX_REG;
38789 emit_note (NOTE_INSN_PROLOGUE_END);
38791 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38792 pull it in now and let DELTA benefit. */
38793 if (REG_P (this_param))
38794 this_reg = this_param;
38795 else if (vcall_offset)
38797 /* Put the this parameter into %eax. */
38798 this_reg = gen_rtx_REG (Pmode, AX_REG);
38799 emit_move_insn (this_reg, this_param);
38801 else
38802 this_reg = NULL_RTX;
38804 /* Adjust the this parameter by a fixed constant. */
38805 if (delta)
38807 rtx delta_rtx = GEN_INT (delta);
38808 rtx delta_dst = this_reg ? this_reg : this_param;
38810 if (TARGET_64BIT)
38812 if (!x86_64_general_operand (delta_rtx, Pmode))
38814 tmp = gen_rtx_REG (Pmode, tmp_regno);
38815 emit_move_insn (tmp, delta_rtx);
38816 delta_rtx = tmp;
38820 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38823 /* Adjust the this parameter by a value stored in the vtable. */
38824 if (vcall_offset)
38826 rtx vcall_addr, vcall_mem, this_mem;
38828 tmp = gen_rtx_REG (Pmode, tmp_regno);
38830 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38831 if (Pmode != ptr_mode)
38832 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38833 emit_move_insn (tmp, this_mem);
38835 /* Adjust the this parameter. */
38836 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38837 if (TARGET_64BIT
38838 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38840 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38841 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38842 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38845 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38846 if (Pmode != ptr_mode)
38847 emit_insn (gen_addsi_1_zext (this_reg,
38848 gen_rtx_REG (ptr_mode,
38849 REGNO (this_reg)),
38850 vcall_mem));
38851 else
38852 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38855 /* If necessary, drop THIS back to its stack slot. */
38856 if (this_reg && this_reg != this_param)
38857 emit_move_insn (this_param, this_reg);
38859 fnaddr = XEXP (DECL_RTL (function), 0);
38860 if (TARGET_64BIT)
38862 if (!flag_pic || targetm.binds_local_p (function)
38863 || TARGET_PECOFF)
38865 else
38867 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38868 tmp = gen_rtx_CONST (Pmode, tmp);
38869 fnaddr = gen_const_mem (Pmode, tmp);
38872 else
38874 if (!flag_pic || targetm.binds_local_p (function))
38876 #if TARGET_MACHO
38877 else if (TARGET_MACHO)
38879 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38880 fnaddr = XEXP (fnaddr, 0);
38882 #endif /* TARGET_MACHO */
38883 else
38885 tmp = gen_rtx_REG (Pmode, CX_REG);
38886 output_set_got (tmp, NULL_RTX);
38888 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38889 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38890 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38891 fnaddr = gen_const_mem (Pmode, fnaddr);
38895 /* Our sibling call patterns do not allow memories, because we have no
38896 predicate that can distinguish between frame and non-frame memory.
38897 For our purposes here, we can get away with (ab)using a jump pattern,
38898 because we're going to do no optimization. */
38899 if (MEM_P (fnaddr))
38901 if (sibcall_insn_operand (fnaddr, word_mode))
38903 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38904 tmp = emit_call_insn (tmp);
38905 SIBLING_CALL_P (tmp) = 1;
38907 else
38908 emit_jump_insn (gen_indirect_jump (fnaddr));
38910 else
38912 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38913 fnaddr = legitimize_pic_address (fnaddr,
38914 gen_rtx_REG (Pmode, tmp_regno));
38916 if (!sibcall_insn_operand (fnaddr, word_mode))
38918 tmp = gen_rtx_REG (word_mode, tmp_regno);
38919 if (GET_MODE (fnaddr) != word_mode)
38920 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38921 emit_move_insn (tmp, fnaddr);
38922 fnaddr = tmp;
38925 tmp = gen_rtx_MEM (QImode, fnaddr);
38926 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38927 tmp = emit_call_insn (tmp);
38928 SIBLING_CALL_P (tmp) = 1;
38930 emit_barrier ();
38932 /* Emit just enough of rest_of_compilation to get the insns emitted.
38933 Note that use_thunk calls assemble_start_function et al. */
38934 tmp = get_insns ();
38935 shorten_branches (tmp);
38936 final_start_function (tmp, file, 1);
38937 final (tmp, file, 1);
38938 final_end_function ();
38941 static void
38942 x86_file_start (void)
38944 default_file_start ();
38945 if (TARGET_16BIT)
38946 fputs ("\t.code16gcc\n", asm_out_file);
38947 #if TARGET_MACHO
38948 darwin_file_start ();
38949 #endif
38950 if (X86_FILE_START_VERSION_DIRECTIVE)
38951 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38952 if (X86_FILE_START_FLTUSED)
38953 fputs ("\t.global\t__fltused\n", asm_out_file);
38954 if (ix86_asm_dialect == ASM_INTEL)
38955 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38959 x86_field_alignment (tree field, int computed)
38961 enum machine_mode mode;
38962 tree type = TREE_TYPE (field);
38964 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38965 return computed;
38966 mode = TYPE_MODE (strip_array_types (type));
38967 if (mode == DFmode || mode == DCmode
38968 || GET_MODE_CLASS (mode) == MODE_INT
38969 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38970 return MIN (32, computed);
38971 return computed;
38974 /* Output assembler code to FILE to increment profiler label # LABELNO
38975 for profiling a function entry. */
38976 void
38977 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38979 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38980 : MCOUNT_NAME);
38982 if (TARGET_64BIT)
38984 #ifndef NO_PROFILE_COUNTERS
38985 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38986 #endif
38988 if (!TARGET_PECOFF && flag_pic)
38989 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38990 else
38991 fprintf (file, "\tcall\t%s\n", mcount_name);
38993 else if (flag_pic)
38995 #ifndef NO_PROFILE_COUNTERS
38996 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38997 LPREFIX, labelno);
38998 #endif
38999 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39001 else
39003 #ifndef NO_PROFILE_COUNTERS
39004 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39005 LPREFIX, labelno);
39006 #endif
39007 fprintf (file, "\tcall\t%s\n", mcount_name);
39011 /* We don't have exact information about the insn sizes, but we may assume
39012 quite safely that we are informed about all 1 byte insns and memory
39013 address sizes. This is enough to eliminate unnecessary padding in
39014 99% of cases. */
39016 static int
39017 min_insn_size (rtx insn)
39019 int l = 0, len;
39021 if (!INSN_P (insn) || !active_insn_p (insn))
39022 return 0;
39024 /* Discard alignments we've emit and jump instructions. */
39025 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39026 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39027 return 0;
39029 /* Important case - calls are always 5 bytes.
39030 It is common to have many calls in the row. */
39031 if (CALL_P (insn)
39032 && symbolic_reference_mentioned_p (PATTERN (insn))
39033 && !SIBLING_CALL_P (insn))
39034 return 5;
39035 len = get_attr_length (insn);
39036 if (len <= 1)
39037 return 1;
39039 /* For normal instructions we rely on get_attr_length being exact,
39040 with a few exceptions. */
39041 if (!JUMP_P (insn))
39043 enum attr_type type = get_attr_type (insn);
39045 switch (type)
39047 case TYPE_MULTI:
39048 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39049 || asm_noperands (PATTERN (insn)) >= 0)
39050 return 0;
39051 break;
39052 case TYPE_OTHER:
39053 case TYPE_FCMP:
39054 break;
39055 default:
39056 /* Otherwise trust get_attr_length. */
39057 return len;
39060 l = get_attr_length_address (insn);
39061 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39062 l = 4;
39064 if (l)
39065 return 1+l;
39066 else
39067 return 2;
39070 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39072 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39073 window. */
39075 static void
39076 ix86_avoid_jump_mispredicts (void)
39078 rtx insn, start = get_insns ();
39079 int nbytes = 0, njumps = 0;
39080 int isjump = 0;
39082 /* Look for all minimal intervals of instructions containing 4 jumps.
39083 The intervals are bounded by START and INSN. NBYTES is the total
39084 size of instructions in the interval including INSN and not including
39085 START. When the NBYTES is smaller than 16 bytes, it is possible
39086 that the end of START and INSN ends up in the same 16byte page.
39088 The smallest offset in the page INSN can start is the case where START
39089 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39090 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39092 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39093 have to, control transfer to label(s) can be performed through other
39094 means, and also we estimate minimum length of all asm stmts as 0. */
39095 for (insn = start; insn; insn = NEXT_INSN (insn))
39097 int min_size;
39099 if (LABEL_P (insn))
39101 int align = label_to_alignment (insn);
39102 int max_skip = label_to_max_skip (insn);
39104 if (max_skip > 15)
39105 max_skip = 15;
39106 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39107 already in the current 16 byte page, because otherwise
39108 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39109 bytes to reach 16 byte boundary. */
39110 if (align <= 0
39111 || (align <= 3 && max_skip != (1 << align) - 1))
39112 max_skip = 0;
39113 if (dump_file)
39114 fprintf (dump_file, "Label %i with max_skip %i\n",
39115 INSN_UID (insn), max_skip);
39116 if (max_skip)
39118 while (nbytes + max_skip >= 16)
39120 start = NEXT_INSN (start);
39121 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39122 || CALL_P (start))
39123 njumps--, isjump = 1;
39124 else
39125 isjump = 0;
39126 nbytes -= min_insn_size (start);
39129 continue;
39132 min_size = min_insn_size (insn);
39133 nbytes += min_size;
39134 if (dump_file)
39135 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39136 INSN_UID (insn), min_size);
39137 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39138 || CALL_P (insn))
39139 njumps++;
39140 else
39141 continue;
39143 while (njumps > 3)
39145 start = NEXT_INSN (start);
39146 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39147 || CALL_P (start))
39148 njumps--, isjump = 1;
39149 else
39150 isjump = 0;
39151 nbytes -= min_insn_size (start);
39153 gcc_assert (njumps >= 0);
39154 if (dump_file)
39155 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39156 INSN_UID (start), INSN_UID (insn), nbytes);
39158 if (njumps == 3 && isjump && nbytes < 16)
39160 int padsize = 15 - nbytes + min_insn_size (insn);
39162 if (dump_file)
39163 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39164 INSN_UID (insn), padsize);
39165 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39169 #endif
39171 /* AMD Athlon works faster
39172 when RET is not destination of conditional jump or directly preceded
39173 by other jump instruction. We avoid the penalty by inserting NOP just
39174 before the RET instructions in such cases. */
39175 static void
39176 ix86_pad_returns (void)
39178 edge e;
39179 edge_iterator ei;
39181 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39183 basic_block bb = e->src;
39184 rtx ret = BB_END (bb);
39185 rtx prev;
39186 bool replace = false;
39188 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39189 || optimize_bb_for_size_p (bb))
39190 continue;
39191 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39192 if (active_insn_p (prev) || LABEL_P (prev))
39193 break;
39194 if (prev && LABEL_P (prev))
39196 edge e;
39197 edge_iterator ei;
39199 FOR_EACH_EDGE (e, ei, bb->preds)
39200 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39201 && !(e->flags & EDGE_FALLTHRU))
39203 replace = true;
39204 break;
39207 if (!replace)
39209 prev = prev_active_insn (ret);
39210 if (prev
39211 && ((JUMP_P (prev) && any_condjump_p (prev))
39212 || CALL_P (prev)))
39213 replace = true;
39214 /* Empty functions get branch mispredict even when
39215 the jump destination is not visible to us. */
39216 if (!prev && !optimize_function_for_size_p (cfun))
39217 replace = true;
39219 if (replace)
39221 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39222 delete_insn (ret);
39227 /* Count the minimum number of instructions in BB. Return 4 if the
39228 number of instructions >= 4. */
39230 static int
39231 ix86_count_insn_bb (basic_block bb)
39233 rtx insn;
39234 int insn_count = 0;
39236 /* Count number of instructions in this block. Return 4 if the number
39237 of instructions >= 4. */
39238 FOR_BB_INSNS (bb, insn)
39240 /* Only happen in exit blocks. */
39241 if (JUMP_P (insn)
39242 && ANY_RETURN_P (PATTERN (insn)))
39243 break;
39245 if (NONDEBUG_INSN_P (insn)
39246 && GET_CODE (PATTERN (insn)) != USE
39247 && GET_CODE (PATTERN (insn)) != CLOBBER)
39249 insn_count++;
39250 if (insn_count >= 4)
39251 return insn_count;
39255 return insn_count;
39259 /* Count the minimum number of instructions in code path in BB.
39260 Return 4 if the number of instructions >= 4. */
39262 static int
39263 ix86_count_insn (basic_block bb)
39265 edge e;
39266 edge_iterator ei;
39267 int min_prev_count;
39269 /* Only bother counting instructions along paths with no
39270 more than 2 basic blocks between entry and exit. Given
39271 that BB has an edge to exit, determine if a predecessor
39272 of BB has an edge from entry. If so, compute the number
39273 of instructions in the predecessor block. If there
39274 happen to be multiple such blocks, compute the minimum. */
39275 min_prev_count = 4;
39276 FOR_EACH_EDGE (e, ei, bb->preds)
39278 edge prev_e;
39279 edge_iterator prev_ei;
39281 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39283 min_prev_count = 0;
39284 break;
39286 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39288 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39290 int count = ix86_count_insn_bb (e->src);
39291 if (count < min_prev_count)
39292 min_prev_count = count;
39293 break;
39298 if (min_prev_count < 4)
39299 min_prev_count += ix86_count_insn_bb (bb);
39301 return min_prev_count;
39304 /* Pad short function to 4 instructions. */
39306 static void
39307 ix86_pad_short_function (void)
39309 edge e;
39310 edge_iterator ei;
39312 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39314 rtx ret = BB_END (e->src);
39315 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39317 int insn_count = ix86_count_insn (e->src);
39319 /* Pad short function. */
39320 if (insn_count < 4)
39322 rtx insn = ret;
39324 /* Find epilogue. */
39325 while (insn
39326 && (!NOTE_P (insn)
39327 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39328 insn = PREV_INSN (insn);
39330 if (!insn)
39331 insn = ret;
39333 /* Two NOPs count as one instruction. */
39334 insn_count = 2 * (4 - insn_count);
39335 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39341 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39342 the epilogue, the Windows system unwinder will apply epilogue logic and
39343 produce incorrect offsets. This can be avoided by adding a nop between
39344 the last insn that can throw and the first insn of the epilogue. */
39346 static void
39347 ix86_seh_fixup_eh_fallthru (void)
39349 edge e;
39350 edge_iterator ei;
39352 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39354 rtx insn, next;
39356 /* Find the beginning of the epilogue. */
39357 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39358 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39359 break;
39360 if (insn == NULL)
39361 continue;
39363 /* We only care about preceding insns that can throw. */
39364 insn = prev_active_insn (insn);
39365 if (insn == NULL || !can_throw_internal (insn))
39366 continue;
39368 /* Do not separate calls from their debug information. */
39369 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39370 if (NOTE_P (next)
39371 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39372 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39373 insn = next;
39374 else
39375 break;
39377 emit_insn_after (gen_nops (const1_rtx), insn);
39381 /* Implement machine specific optimizations. We implement padding of returns
39382 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39383 static void
39384 ix86_reorg (void)
39386 /* We are freeing block_for_insn in the toplev to keep compatibility
39387 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39388 compute_bb_for_insn ();
39390 if (TARGET_SEH && current_function_has_exception_handlers ())
39391 ix86_seh_fixup_eh_fallthru ();
39393 if (optimize && optimize_function_for_speed_p (cfun))
39395 if (TARGET_PAD_SHORT_FUNCTION)
39396 ix86_pad_short_function ();
39397 else if (TARGET_PAD_RETURNS)
39398 ix86_pad_returns ();
39399 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39400 if (TARGET_FOUR_JUMP_LIMIT)
39401 ix86_avoid_jump_mispredicts ();
39402 #endif
39406 /* Return nonzero when QImode register that must be represented via REX prefix
39407 is used. */
39408 bool
39409 x86_extended_QIreg_mentioned_p (rtx insn)
39411 int i;
39412 extract_insn_cached (insn);
39413 for (i = 0; i < recog_data.n_operands; i++)
39414 if (GENERAL_REG_P (recog_data.operand[i])
39415 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39416 return true;
39417 return false;
39420 /* Return nonzero when P points to register encoded via REX prefix.
39421 Called via for_each_rtx. */
39422 static int
39423 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39425 unsigned int regno;
39426 if (!REG_P (*p))
39427 return 0;
39428 regno = REGNO (*p);
39429 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39432 /* Return true when INSN mentions register that must be encoded using REX
39433 prefix. */
39434 bool
39435 x86_extended_reg_mentioned_p (rtx insn)
39437 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39438 extended_reg_mentioned_1, NULL);
39441 /* If profitable, negate (without causing overflow) integer constant
39442 of mode MODE at location LOC. Return true in this case. */
39443 bool
39444 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39446 HOST_WIDE_INT val;
39448 if (!CONST_INT_P (*loc))
39449 return false;
39451 switch (mode)
39453 case DImode:
39454 /* DImode x86_64 constants must fit in 32 bits. */
39455 gcc_assert (x86_64_immediate_operand (*loc, mode));
39457 mode = SImode;
39458 break;
39460 case SImode:
39461 case HImode:
39462 case QImode:
39463 break;
39465 default:
39466 gcc_unreachable ();
39469 /* Avoid overflows. */
39470 if (mode_signbit_p (mode, *loc))
39471 return false;
39473 val = INTVAL (*loc);
39475 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39476 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39477 if ((val < 0 && val != -128)
39478 || val == 128)
39480 *loc = GEN_INT (-val);
39481 return true;
39484 return false;
39487 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39488 optabs would emit if we didn't have TFmode patterns. */
39490 void
39491 x86_emit_floatuns (rtx operands[2])
39493 rtx neglab, donelab, i0, i1, f0, in, out;
39494 enum machine_mode mode, inmode;
39496 inmode = GET_MODE (operands[1]);
39497 gcc_assert (inmode == SImode || inmode == DImode);
39499 out = operands[0];
39500 in = force_reg (inmode, operands[1]);
39501 mode = GET_MODE (out);
39502 neglab = gen_label_rtx ();
39503 donelab = gen_label_rtx ();
39504 f0 = gen_reg_rtx (mode);
39506 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39508 expand_float (out, in, 0);
39510 emit_jump_insn (gen_jump (donelab));
39511 emit_barrier ();
39513 emit_label (neglab);
39515 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39516 1, OPTAB_DIRECT);
39517 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39518 1, OPTAB_DIRECT);
39519 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39521 expand_float (f0, i0, 0);
39523 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39525 emit_label (donelab);
39528 /* AVX512F does support 64-byte integer vector operations,
39529 thus the longest vector we are faced with is V64QImode. */
39530 #define MAX_VECT_LEN 64
39532 struct expand_vec_perm_d
39534 rtx target, op0, op1;
39535 unsigned char perm[MAX_VECT_LEN];
39536 enum machine_mode vmode;
39537 unsigned char nelt;
39538 bool one_operand_p;
39539 bool testing_p;
39542 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39543 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39544 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39546 /* Get a vector mode of the same size as the original but with elements
39547 twice as wide. This is only guaranteed to apply to integral vectors. */
39549 static inline enum machine_mode
39550 get_mode_wider_vector (enum machine_mode o)
39552 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39553 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39554 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39555 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39556 return n;
39559 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39560 fill target with val via vec_duplicate. */
39562 static bool
39563 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39565 bool ok;
39566 rtx insn, dup;
39568 /* First attempt to recognize VAL as-is. */
39569 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39570 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39571 if (recog_memoized (insn) < 0)
39573 rtx seq;
39574 /* If that fails, force VAL into a register. */
39576 start_sequence ();
39577 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39578 seq = get_insns ();
39579 end_sequence ();
39580 if (seq)
39581 emit_insn_before (seq, insn);
39583 ok = recog_memoized (insn) >= 0;
39584 gcc_assert (ok);
39586 return true;
39589 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39590 with all elements equal to VAR. Return true if successful. */
39592 static bool
39593 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39594 rtx target, rtx val)
39596 bool ok;
39598 switch (mode)
39600 case V2SImode:
39601 case V2SFmode:
39602 if (!mmx_ok)
39603 return false;
39604 /* FALLTHRU */
39606 case V4DFmode:
39607 case V4DImode:
39608 case V8SFmode:
39609 case V8SImode:
39610 case V2DFmode:
39611 case V2DImode:
39612 case V4SFmode:
39613 case V4SImode:
39614 case V16SImode:
39615 case V8DImode:
39616 case V16SFmode:
39617 case V8DFmode:
39618 return ix86_vector_duplicate_value (mode, target, val);
39620 case V4HImode:
39621 if (!mmx_ok)
39622 return false;
39623 if (TARGET_SSE || TARGET_3DNOW_A)
39625 rtx x;
39627 val = gen_lowpart (SImode, val);
39628 x = gen_rtx_TRUNCATE (HImode, val);
39629 x = gen_rtx_VEC_DUPLICATE (mode, x);
39630 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39631 return true;
39633 goto widen;
39635 case V8QImode:
39636 if (!mmx_ok)
39637 return false;
39638 goto widen;
39640 case V8HImode:
39641 if (TARGET_SSE2)
39643 struct expand_vec_perm_d dperm;
39644 rtx tmp1, tmp2;
39646 permute:
39647 memset (&dperm, 0, sizeof (dperm));
39648 dperm.target = target;
39649 dperm.vmode = mode;
39650 dperm.nelt = GET_MODE_NUNITS (mode);
39651 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39652 dperm.one_operand_p = true;
39654 /* Extend to SImode using a paradoxical SUBREG. */
39655 tmp1 = gen_reg_rtx (SImode);
39656 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39658 /* Insert the SImode value as low element of a V4SImode vector. */
39659 tmp2 = gen_reg_rtx (V4SImode);
39660 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39661 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39663 ok = (expand_vec_perm_1 (&dperm)
39664 || expand_vec_perm_broadcast_1 (&dperm));
39665 gcc_assert (ok);
39666 return ok;
39668 goto widen;
39670 case V16QImode:
39671 if (TARGET_SSE2)
39672 goto permute;
39673 goto widen;
39675 widen:
39676 /* Replicate the value once into the next wider mode and recurse. */
39678 enum machine_mode smode, wsmode, wvmode;
39679 rtx x;
39681 smode = GET_MODE_INNER (mode);
39682 wvmode = get_mode_wider_vector (mode);
39683 wsmode = GET_MODE_INNER (wvmode);
39685 val = convert_modes (wsmode, smode, val, true);
39686 x = expand_simple_binop (wsmode, ASHIFT, val,
39687 GEN_INT (GET_MODE_BITSIZE (smode)),
39688 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39689 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39691 x = gen_reg_rtx (wvmode);
39692 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39693 gcc_assert (ok);
39694 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39695 return ok;
39698 case V16HImode:
39699 case V32QImode:
39701 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39702 rtx x = gen_reg_rtx (hvmode);
39704 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39705 gcc_assert (ok);
39707 x = gen_rtx_VEC_CONCAT (mode, x, x);
39708 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39710 return true;
39712 default:
39713 return false;
39717 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39718 whose ONE_VAR element is VAR, and other elements are zero. Return true
39719 if successful. */
39721 static bool
39722 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39723 rtx target, rtx var, int one_var)
39725 enum machine_mode vsimode;
39726 rtx new_target;
39727 rtx x, tmp;
39728 bool use_vector_set = false;
39730 switch (mode)
39732 case V2DImode:
39733 /* For SSE4.1, we normally use vector set. But if the second
39734 element is zero and inter-unit moves are OK, we use movq
39735 instead. */
39736 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39737 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39738 && one_var == 0));
39739 break;
39740 case V16QImode:
39741 case V4SImode:
39742 case V4SFmode:
39743 use_vector_set = TARGET_SSE4_1;
39744 break;
39745 case V8HImode:
39746 use_vector_set = TARGET_SSE2;
39747 break;
39748 case V4HImode:
39749 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39750 break;
39751 case V32QImode:
39752 case V16HImode:
39753 case V8SImode:
39754 case V8SFmode:
39755 case V4DFmode:
39756 use_vector_set = TARGET_AVX;
39757 break;
39758 case V4DImode:
39759 /* Use ix86_expand_vector_set in 64bit mode only. */
39760 use_vector_set = TARGET_AVX && TARGET_64BIT;
39761 break;
39762 default:
39763 break;
39766 if (use_vector_set)
39768 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39769 var = force_reg (GET_MODE_INNER (mode), var);
39770 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39771 return true;
39774 switch (mode)
39776 case V2SFmode:
39777 case V2SImode:
39778 if (!mmx_ok)
39779 return false;
39780 /* FALLTHRU */
39782 case V2DFmode:
39783 case V2DImode:
39784 if (one_var != 0)
39785 return false;
39786 var = force_reg (GET_MODE_INNER (mode), var);
39787 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39788 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39789 return true;
39791 case V4SFmode:
39792 case V4SImode:
39793 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39794 new_target = gen_reg_rtx (mode);
39795 else
39796 new_target = target;
39797 var = force_reg (GET_MODE_INNER (mode), var);
39798 x = gen_rtx_VEC_DUPLICATE (mode, var);
39799 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39800 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39801 if (one_var != 0)
39803 /* We need to shuffle the value to the correct position, so
39804 create a new pseudo to store the intermediate result. */
39806 /* With SSE2, we can use the integer shuffle insns. */
39807 if (mode != V4SFmode && TARGET_SSE2)
39809 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39810 const1_rtx,
39811 GEN_INT (one_var == 1 ? 0 : 1),
39812 GEN_INT (one_var == 2 ? 0 : 1),
39813 GEN_INT (one_var == 3 ? 0 : 1)));
39814 if (target != new_target)
39815 emit_move_insn (target, new_target);
39816 return true;
39819 /* Otherwise convert the intermediate result to V4SFmode and
39820 use the SSE1 shuffle instructions. */
39821 if (mode != V4SFmode)
39823 tmp = gen_reg_rtx (V4SFmode);
39824 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39826 else
39827 tmp = new_target;
39829 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39830 const1_rtx,
39831 GEN_INT (one_var == 1 ? 0 : 1),
39832 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39833 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39835 if (mode != V4SFmode)
39836 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39837 else if (tmp != target)
39838 emit_move_insn (target, tmp);
39840 else if (target != new_target)
39841 emit_move_insn (target, new_target);
39842 return true;
39844 case V8HImode:
39845 case V16QImode:
39846 vsimode = V4SImode;
39847 goto widen;
39848 case V4HImode:
39849 case V8QImode:
39850 if (!mmx_ok)
39851 return false;
39852 vsimode = V2SImode;
39853 goto widen;
39854 widen:
39855 if (one_var != 0)
39856 return false;
39858 /* Zero extend the variable element to SImode and recurse. */
39859 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39861 x = gen_reg_rtx (vsimode);
39862 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39863 var, one_var))
39864 gcc_unreachable ();
39866 emit_move_insn (target, gen_lowpart (mode, x));
39867 return true;
39869 default:
39870 return false;
39874 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39875 consisting of the values in VALS. It is known that all elements
39876 except ONE_VAR are constants. Return true if successful. */
39878 static bool
39879 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39880 rtx target, rtx vals, int one_var)
39882 rtx var = XVECEXP (vals, 0, one_var);
39883 enum machine_mode wmode;
39884 rtx const_vec, x;
39886 const_vec = copy_rtx (vals);
39887 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39888 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39890 switch (mode)
39892 case V2DFmode:
39893 case V2DImode:
39894 case V2SFmode:
39895 case V2SImode:
39896 /* For the two element vectors, it's just as easy to use
39897 the general case. */
39898 return false;
39900 case V4DImode:
39901 /* Use ix86_expand_vector_set in 64bit mode only. */
39902 if (!TARGET_64BIT)
39903 return false;
39904 case V4DFmode:
39905 case V8SFmode:
39906 case V8SImode:
39907 case V16HImode:
39908 case V32QImode:
39909 case V4SFmode:
39910 case V4SImode:
39911 case V8HImode:
39912 case V4HImode:
39913 break;
39915 case V16QImode:
39916 if (TARGET_SSE4_1)
39917 break;
39918 wmode = V8HImode;
39919 goto widen;
39920 case V8QImode:
39921 wmode = V4HImode;
39922 goto widen;
39923 widen:
39924 /* There's no way to set one QImode entry easily. Combine
39925 the variable value with its adjacent constant value, and
39926 promote to an HImode set. */
39927 x = XVECEXP (vals, 0, one_var ^ 1);
39928 if (one_var & 1)
39930 var = convert_modes (HImode, QImode, var, true);
39931 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39932 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39933 x = GEN_INT (INTVAL (x) & 0xff);
39935 else
39937 var = convert_modes (HImode, QImode, var, true);
39938 x = gen_int_mode (INTVAL (x) << 8, HImode);
39940 if (x != const0_rtx)
39941 var = expand_simple_binop (HImode, IOR, var, x, var,
39942 1, OPTAB_LIB_WIDEN);
39944 x = gen_reg_rtx (wmode);
39945 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39946 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39948 emit_move_insn (target, gen_lowpart (mode, x));
39949 return true;
39951 default:
39952 return false;
39955 emit_move_insn (target, const_vec);
39956 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39957 return true;
39960 /* A subroutine of ix86_expand_vector_init_general. Use vector
39961 concatenate to handle the most general case: all values variable,
39962 and none identical. */
39964 static void
39965 ix86_expand_vector_init_concat (enum machine_mode mode,
39966 rtx target, rtx *ops, int n)
39968 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39969 rtx first[16], second[8], third[4];
39970 rtvec v;
39971 int i, j;
39973 switch (n)
39975 case 2:
39976 switch (mode)
39978 case V16SImode:
39979 cmode = V8SImode;
39980 break;
39981 case V16SFmode:
39982 cmode = V8SFmode;
39983 break;
39984 case V8DImode:
39985 cmode = V4DImode;
39986 break;
39987 case V8DFmode:
39988 cmode = V4DFmode;
39989 break;
39990 case V8SImode:
39991 cmode = V4SImode;
39992 break;
39993 case V8SFmode:
39994 cmode = V4SFmode;
39995 break;
39996 case V4DImode:
39997 cmode = V2DImode;
39998 break;
39999 case V4DFmode:
40000 cmode = V2DFmode;
40001 break;
40002 case V4SImode:
40003 cmode = V2SImode;
40004 break;
40005 case V4SFmode:
40006 cmode = V2SFmode;
40007 break;
40008 case V2DImode:
40009 cmode = DImode;
40010 break;
40011 case V2SImode:
40012 cmode = SImode;
40013 break;
40014 case V2DFmode:
40015 cmode = DFmode;
40016 break;
40017 case V2SFmode:
40018 cmode = SFmode;
40019 break;
40020 default:
40021 gcc_unreachable ();
40024 if (!register_operand (ops[1], cmode))
40025 ops[1] = force_reg (cmode, ops[1]);
40026 if (!register_operand (ops[0], cmode))
40027 ops[0] = force_reg (cmode, ops[0]);
40028 emit_insn (gen_rtx_SET (VOIDmode, target,
40029 gen_rtx_VEC_CONCAT (mode, ops[0],
40030 ops[1])));
40031 break;
40033 case 4:
40034 switch (mode)
40036 case V4DImode:
40037 cmode = V2DImode;
40038 break;
40039 case V4DFmode:
40040 cmode = V2DFmode;
40041 break;
40042 case V4SImode:
40043 cmode = V2SImode;
40044 break;
40045 case V4SFmode:
40046 cmode = V2SFmode;
40047 break;
40048 default:
40049 gcc_unreachable ();
40051 goto half;
40053 case 8:
40054 switch (mode)
40056 case V8DImode:
40057 cmode = V2DImode;
40058 hmode = V4DImode;
40059 break;
40060 case V8DFmode:
40061 cmode = V2DFmode;
40062 hmode = V4DFmode;
40063 break;
40064 case V8SImode:
40065 cmode = V2SImode;
40066 hmode = V4SImode;
40067 break;
40068 case V8SFmode:
40069 cmode = V2SFmode;
40070 hmode = V4SFmode;
40071 break;
40072 default:
40073 gcc_unreachable ();
40075 goto half;
40077 case 16:
40078 switch (mode)
40080 case V16SImode:
40081 cmode = V2SImode;
40082 hmode = V4SImode;
40083 gmode = V8SImode;
40084 break;
40085 case V16SFmode:
40086 cmode = V2SFmode;
40087 hmode = V4SFmode;
40088 gmode = V8SFmode;
40089 break;
40090 default:
40091 gcc_unreachable ();
40093 goto half;
40095 half:
40096 /* FIXME: We process inputs backward to help RA. PR 36222. */
40097 i = n - 1;
40098 j = (n >> 1) - 1;
40099 for (; i > 0; i -= 2, j--)
40101 first[j] = gen_reg_rtx (cmode);
40102 v = gen_rtvec (2, ops[i - 1], ops[i]);
40103 ix86_expand_vector_init (false, first[j],
40104 gen_rtx_PARALLEL (cmode, v));
40107 n >>= 1;
40108 if (n > 4)
40110 gcc_assert (hmode != VOIDmode);
40111 gcc_assert (gmode != VOIDmode);
40112 for (i = j = 0; i < n; i += 2, j++)
40114 second[j] = gen_reg_rtx (hmode);
40115 ix86_expand_vector_init_concat (hmode, second [j],
40116 &first [i], 2);
40118 n >>= 1;
40119 for (i = j = 0; i < n; i += 2, j++)
40121 third[j] = gen_reg_rtx (gmode);
40122 ix86_expand_vector_init_concat (gmode, third[j],
40123 &second[i], 2);
40125 n >>= 1;
40126 ix86_expand_vector_init_concat (mode, target, third, n);
40128 else if (n > 2)
40130 gcc_assert (hmode != VOIDmode);
40131 for (i = j = 0; i < n; i += 2, j++)
40133 second[j] = gen_reg_rtx (hmode);
40134 ix86_expand_vector_init_concat (hmode, second [j],
40135 &first [i], 2);
40137 n >>= 1;
40138 ix86_expand_vector_init_concat (mode, target, second, n);
40140 else
40141 ix86_expand_vector_init_concat (mode, target, first, n);
40142 break;
40144 default:
40145 gcc_unreachable ();
40149 /* A subroutine of ix86_expand_vector_init_general. Use vector
40150 interleave to handle the most general case: all values variable,
40151 and none identical. */
40153 static void
40154 ix86_expand_vector_init_interleave (enum machine_mode mode,
40155 rtx target, rtx *ops, int n)
40157 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40158 int i, j;
40159 rtx op0, op1;
40160 rtx (*gen_load_even) (rtx, rtx, rtx);
40161 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40162 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40164 switch (mode)
40166 case V8HImode:
40167 gen_load_even = gen_vec_setv8hi;
40168 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40169 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40170 inner_mode = HImode;
40171 first_imode = V4SImode;
40172 second_imode = V2DImode;
40173 third_imode = VOIDmode;
40174 break;
40175 case V16QImode:
40176 gen_load_even = gen_vec_setv16qi;
40177 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40178 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40179 inner_mode = QImode;
40180 first_imode = V8HImode;
40181 second_imode = V4SImode;
40182 third_imode = V2DImode;
40183 break;
40184 default:
40185 gcc_unreachable ();
40188 for (i = 0; i < n; i++)
40190 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40191 op0 = gen_reg_rtx (SImode);
40192 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40194 /* Insert the SImode value as low element of V4SImode vector. */
40195 op1 = gen_reg_rtx (V4SImode);
40196 op0 = gen_rtx_VEC_MERGE (V4SImode,
40197 gen_rtx_VEC_DUPLICATE (V4SImode,
40198 op0),
40199 CONST0_RTX (V4SImode),
40200 const1_rtx);
40201 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40203 /* Cast the V4SImode vector back to a vector in orignal mode. */
40204 op0 = gen_reg_rtx (mode);
40205 emit_move_insn (op0, gen_lowpart (mode, op1));
40207 /* Load even elements into the second position. */
40208 emit_insn (gen_load_even (op0,
40209 force_reg (inner_mode,
40210 ops [i + i + 1]),
40211 const1_rtx));
40213 /* Cast vector to FIRST_IMODE vector. */
40214 ops[i] = gen_reg_rtx (first_imode);
40215 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40218 /* Interleave low FIRST_IMODE vectors. */
40219 for (i = j = 0; i < n; i += 2, j++)
40221 op0 = gen_reg_rtx (first_imode);
40222 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40224 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40225 ops[j] = gen_reg_rtx (second_imode);
40226 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40229 /* Interleave low SECOND_IMODE vectors. */
40230 switch (second_imode)
40232 case V4SImode:
40233 for (i = j = 0; i < n / 2; i += 2, j++)
40235 op0 = gen_reg_rtx (second_imode);
40236 emit_insn (gen_interleave_second_low (op0, ops[i],
40237 ops[i + 1]));
40239 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40240 vector. */
40241 ops[j] = gen_reg_rtx (third_imode);
40242 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40244 second_imode = V2DImode;
40245 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40246 /* FALLTHRU */
40248 case V2DImode:
40249 op0 = gen_reg_rtx (second_imode);
40250 emit_insn (gen_interleave_second_low (op0, ops[0],
40251 ops[1]));
40253 /* Cast the SECOND_IMODE vector back to a vector on original
40254 mode. */
40255 emit_insn (gen_rtx_SET (VOIDmode, target,
40256 gen_lowpart (mode, op0)));
40257 break;
40259 default:
40260 gcc_unreachable ();
40264 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40265 all values variable, and none identical. */
40267 static void
40268 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40269 rtx target, rtx vals)
40271 rtx ops[64], op0, op1;
40272 enum machine_mode half_mode = VOIDmode;
40273 int n, i;
40275 switch (mode)
40277 case V2SFmode:
40278 case V2SImode:
40279 if (!mmx_ok && !TARGET_SSE)
40280 break;
40281 /* FALLTHRU */
40283 case V16SImode:
40284 case V16SFmode:
40285 case V8DFmode:
40286 case V8DImode:
40287 case V8SFmode:
40288 case V8SImode:
40289 case V4DFmode:
40290 case V4DImode:
40291 case V4SFmode:
40292 case V4SImode:
40293 case V2DFmode:
40294 case V2DImode:
40295 n = GET_MODE_NUNITS (mode);
40296 for (i = 0; i < n; i++)
40297 ops[i] = XVECEXP (vals, 0, i);
40298 ix86_expand_vector_init_concat (mode, target, ops, n);
40299 return;
40301 case V32QImode:
40302 half_mode = V16QImode;
40303 goto half;
40305 case V16HImode:
40306 half_mode = V8HImode;
40307 goto half;
40309 half:
40310 n = GET_MODE_NUNITS (mode);
40311 for (i = 0; i < n; i++)
40312 ops[i] = XVECEXP (vals, 0, i);
40313 op0 = gen_reg_rtx (half_mode);
40314 op1 = gen_reg_rtx (half_mode);
40315 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40316 n >> 2);
40317 ix86_expand_vector_init_interleave (half_mode, op1,
40318 &ops [n >> 1], n >> 2);
40319 emit_insn (gen_rtx_SET (VOIDmode, target,
40320 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40321 return;
40323 case V16QImode:
40324 if (!TARGET_SSE4_1)
40325 break;
40326 /* FALLTHRU */
40328 case V8HImode:
40329 if (!TARGET_SSE2)
40330 break;
40332 /* Don't use ix86_expand_vector_init_interleave if we can't
40333 move from GPR to SSE register directly. */
40334 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40335 break;
40337 n = GET_MODE_NUNITS (mode);
40338 for (i = 0; i < n; i++)
40339 ops[i] = XVECEXP (vals, 0, i);
40340 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40341 return;
40343 case V4HImode:
40344 case V8QImode:
40345 break;
40347 default:
40348 gcc_unreachable ();
40352 int i, j, n_elts, n_words, n_elt_per_word;
40353 enum machine_mode inner_mode;
40354 rtx words[4], shift;
40356 inner_mode = GET_MODE_INNER (mode);
40357 n_elts = GET_MODE_NUNITS (mode);
40358 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40359 n_elt_per_word = n_elts / n_words;
40360 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40362 for (i = 0; i < n_words; ++i)
40364 rtx word = NULL_RTX;
40366 for (j = 0; j < n_elt_per_word; ++j)
40368 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40369 elt = convert_modes (word_mode, inner_mode, elt, true);
40371 if (j == 0)
40372 word = elt;
40373 else
40375 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40376 word, 1, OPTAB_LIB_WIDEN);
40377 word = expand_simple_binop (word_mode, IOR, word, elt,
40378 word, 1, OPTAB_LIB_WIDEN);
40382 words[i] = word;
40385 if (n_words == 1)
40386 emit_move_insn (target, gen_lowpart (mode, words[0]));
40387 else if (n_words == 2)
40389 rtx tmp = gen_reg_rtx (mode);
40390 emit_clobber (tmp);
40391 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40392 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40393 emit_move_insn (target, tmp);
40395 else if (n_words == 4)
40397 rtx tmp = gen_reg_rtx (V4SImode);
40398 gcc_assert (word_mode == SImode);
40399 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40400 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40401 emit_move_insn (target, gen_lowpart (mode, tmp));
40403 else
40404 gcc_unreachable ();
40408 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40409 instructions unless MMX_OK is true. */
40411 void
40412 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40414 enum machine_mode mode = GET_MODE (target);
40415 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40416 int n_elts = GET_MODE_NUNITS (mode);
40417 int n_var = 0, one_var = -1;
40418 bool all_same = true, all_const_zero = true;
40419 int i;
40420 rtx x;
40422 for (i = 0; i < n_elts; ++i)
40424 x = XVECEXP (vals, 0, i);
40425 if (!(CONST_INT_P (x)
40426 || GET_CODE (x) == CONST_DOUBLE
40427 || GET_CODE (x) == CONST_FIXED))
40428 n_var++, one_var = i;
40429 else if (x != CONST0_RTX (inner_mode))
40430 all_const_zero = false;
40431 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40432 all_same = false;
40435 /* Constants are best loaded from the constant pool. */
40436 if (n_var == 0)
40438 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40439 return;
40442 /* If all values are identical, broadcast the value. */
40443 if (all_same
40444 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40445 XVECEXP (vals, 0, 0)))
40446 return;
40448 /* Values where only one field is non-constant are best loaded from
40449 the pool and overwritten via move later. */
40450 if (n_var == 1)
40452 if (all_const_zero
40453 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40454 XVECEXP (vals, 0, one_var),
40455 one_var))
40456 return;
40458 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40459 return;
40462 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40465 void
40466 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40468 enum machine_mode mode = GET_MODE (target);
40469 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40470 enum machine_mode half_mode;
40471 bool use_vec_merge = false;
40472 rtx tmp;
40473 static rtx (*gen_extract[6][2]) (rtx, rtx)
40475 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40476 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40477 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40478 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40479 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40480 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40482 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40484 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40485 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40486 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40487 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40488 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40489 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40491 int i, j, n;
40493 switch (mode)
40495 case V2SFmode:
40496 case V2SImode:
40497 if (mmx_ok)
40499 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40500 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40501 if (elt == 0)
40502 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40503 else
40504 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40505 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40506 return;
40508 break;
40510 case V2DImode:
40511 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40512 if (use_vec_merge)
40513 break;
40515 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40516 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40517 if (elt == 0)
40518 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40519 else
40520 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40521 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40522 return;
40524 case V2DFmode:
40526 rtx op0, op1;
40528 /* For the two element vectors, we implement a VEC_CONCAT with
40529 the extraction of the other element. */
40531 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40532 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40534 if (elt == 0)
40535 op0 = val, op1 = tmp;
40536 else
40537 op0 = tmp, op1 = val;
40539 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40540 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40542 return;
40544 case V4SFmode:
40545 use_vec_merge = TARGET_SSE4_1;
40546 if (use_vec_merge)
40547 break;
40549 switch (elt)
40551 case 0:
40552 use_vec_merge = true;
40553 break;
40555 case 1:
40556 /* tmp = target = A B C D */
40557 tmp = copy_to_reg (target);
40558 /* target = A A B B */
40559 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40560 /* target = X A B B */
40561 ix86_expand_vector_set (false, target, val, 0);
40562 /* target = A X C D */
40563 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40564 const1_rtx, const0_rtx,
40565 GEN_INT (2+4), GEN_INT (3+4)));
40566 return;
40568 case 2:
40569 /* tmp = target = A B C D */
40570 tmp = copy_to_reg (target);
40571 /* tmp = X B C D */
40572 ix86_expand_vector_set (false, tmp, val, 0);
40573 /* target = A B X D */
40574 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40575 const0_rtx, const1_rtx,
40576 GEN_INT (0+4), GEN_INT (3+4)));
40577 return;
40579 case 3:
40580 /* tmp = target = A B C D */
40581 tmp = copy_to_reg (target);
40582 /* tmp = X B C D */
40583 ix86_expand_vector_set (false, tmp, val, 0);
40584 /* target = A B X D */
40585 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40586 const0_rtx, const1_rtx,
40587 GEN_INT (2+4), GEN_INT (0+4)));
40588 return;
40590 default:
40591 gcc_unreachable ();
40593 break;
40595 case V4SImode:
40596 use_vec_merge = TARGET_SSE4_1;
40597 if (use_vec_merge)
40598 break;
40600 /* Element 0 handled by vec_merge below. */
40601 if (elt == 0)
40603 use_vec_merge = true;
40604 break;
40607 if (TARGET_SSE2)
40609 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40610 store into element 0, then shuffle them back. */
40612 rtx order[4];
40614 order[0] = GEN_INT (elt);
40615 order[1] = const1_rtx;
40616 order[2] = const2_rtx;
40617 order[3] = GEN_INT (3);
40618 order[elt] = const0_rtx;
40620 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40621 order[1], order[2], order[3]));
40623 ix86_expand_vector_set (false, target, val, 0);
40625 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40626 order[1], order[2], order[3]));
40628 else
40630 /* For SSE1, we have to reuse the V4SF code. */
40631 rtx t = gen_reg_rtx (V4SFmode);
40632 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40633 emit_move_insn (target, gen_lowpart (mode, t));
40635 return;
40637 case V8HImode:
40638 use_vec_merge = TARGET_SSE2;
40639 break;
40640 case V4HImode:
40641 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40642 break;
40644 case V16QImode:
40645 use_vec_merge = TARGET_SSE4_1;
40646 break;
40648 case V8QImode:
40649 break;
40651 case V32QImode:
40652 half_mode = V16QImode;
40653 j = 0;
40654 n = 16;
40655 goto half;
40657 case V16HImode:
40658 half_mode = V8HImode;
40659 j = 1;
40660 n = 8;
40661 goto half;
40663 case V8SImode:
40664 half_mode = V4SImode;
40665 j = 2;
40666 n = 4;
40667 goto half;
40669 case V4DImode:
40670 half_mode = V2DImode;
40671 j = 3;
40672 n = 2;
40673 goto half;
40675 case V8SFmode:
40676 half_mode = V4SFmode;
40677 j = 4;
40678 n = 4;
40679 goto half;
40681 case V4DFmode:
40682 half_mode = V2DFmode;
40683 j = 5;
40684 n = 2;
40685 goto half;
40687 half:
40688 /* Compute offset. */
40689 i = elt / n;
40690 elt %= n;
40692 gcc_assert (i <= 1);
40694 /* Extract the half. */
40695 tmp = gen_reg_rtx (half_mode);
40696 emit_insn (gen_extract[j][i] (tmp, target));
40698 /* Put val in tmp at elt. */
40699 ix86_expand_vector_set (false, tmp, val, elt);
40701 /* Put it back. */
40702 emit_insn (gen_insert[j][i] (target, target, tmp));
40703 return;
40705 default:
40706 break;
40709 if (use_vec_merge)
40711 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40712 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40713 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40715 else
40717 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40719 emit_move_insn (mem, target);
40721 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40722 emit_move_insn (tmp, val);
40724 emit_move_insn (target, mem);
40728 void
40729 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40731 enum machine_mode mode = GET_MODE (vec);
40732 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40733 bool use_vec_extr = false;
40734 rtx tmp;
40736 switch (mode)
40738 case V2SImode:
40739 case V2SFmode:
40740 if (!mmx_ok)
40741 break;
40742 /* FALLTHRU */
40744 case V2DFmode:
40745 case V2DImode:
40746 use_vec_extr = true;
40747 break;
40749 case V4SFmode:
40750 use_vec_extr = TARGET_SSE4_1;
40751 if (use_vec_extr)
40752 break;
40754 switch (elt)
40756 case 0:
40757 tmp = vec;
40758 break;
40760 case 1:
40761 case 3:
40762 tmp = gen_reg_rtx (mode);
40763 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40764 GEN_INT (elt), GEN_INT (elt),
40765 GEN_INT (elt+4), GEN_INT (elt+4)));
40766 break;
40768 case 2:
40769 tmp = gen_reg_rtx (mode);
40770 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40771 break;
40773 default:
40774 gcc_unreachable ();
40776 vec = tmp;
40777 use_vec_extr = true;
40778 elt = 0;
40779 break;
40781 case V4SImode:
40782 use_vec_extr = TARGET_SSE4_1;
40783 if (use_vec_extr)
40784 break;
40786 if (TARGET_SSE2)
40788 switch (elt)
40790 case 0:
40791 tmp = vec;
40792 break;
40794 case 1:
40795 case 3:
40796 tmp = gen_reg_rtx (mode);
40797 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40798 GEN_INT (elt), GEN_INT (elt),
40799 GEN_INT (elt), GEN_INT (elt)));
40800 break;
40802 case 2:
40803 tmp = gen_reg_rtx (mode);
40804 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40805 break;
40807 default:
40808 gcc_unreachable ();
40810 vec = tmp;
40811 use_vec_extr = true;
40812 elt = 0;
40814 else
40816 /* For SSE1, we have to reuse the V4SF code. */
40817 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40818 gen_lowpart (V4SFmode, vec), elt);
40819 return;
40821 break;
40823 case V8HImode:
40824 use_vec_extr = TARGET_SSE2;
40825 break;
40826 case V4HImode:
40827 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40828 break;
40830 case V16QImode:
40831 use_vec_extr = TARGET_SSE4_1;
40832 break;
40834 case V8SFmode:
40835 if (TARGET_AVX)
40837 tmp = gen_reg_rtx (V4SFmode);
40838 if (elt < 4)
40839 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40840 else
40841 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40842 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40843 return;
40845 break;
40847 case V4DFmode:
40848 if (TARGET_AVX)
40850 tmp = gen_reg_rtx (V2DFmode);
40851 if (elt < 2)
40852 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40853 else
40854 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40855 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40856 return;
40858 break;
40860 case V32QImode:
40861 if (TARGET_AVX)
40863 tmp = gen_reg_rtx (V16QImode);
40864 if (elt < 16)
40865 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40866 else
40867 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40868 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40869 return;
40871 break;
40873 case V16HImode:
40874 if (TARGET_AVX)
40876 tmp = gen_reg_rtx (V8HImode);
40877 if (elt < 8)
40878 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40879 else
40880 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40881 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40882 return;
40884 break;
40886 case V8SImode:
40887 if (TARGET_AVX)
40889 tmp = gen_reg_rtx (V4SImode);
40890 if (elt < 4)
40891 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40892 else
40893 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40894 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40895 return;
40897 break;
40899 case V4DImode:
40900 if (TARGET_AVX)
40902 tmp = gen_reg_rtx (V2DImode);
40903 if (elt < 2)
40904 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40905 else
40906 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40907 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40908 return;
40910 break;
40912 case V16SFmode:
40913 tmp = gen_reg_rtx (V8SFmode);
40914 if (elt < 8)
40915 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40916 else
40917 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40918 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40919 return;
40921 case V8DFmode:
40922 tmp = gen_reg_rtx (V4DFmode);
40923 if (elt < 4)
40924 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40925 else
40926 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40927 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40928 return;
40930 case V16SImode:
40931 tmp = gen_reg_rtx (V8SImode);
40932 if (elt < 8)
40933 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40934 else
40935 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40936 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40937 return;
40939 case V8DImode:
40940 tmp = gen_reg_rtx (V4DImode);
40941 if (elt < 4)
40942 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40943 else
40944 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40945 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40946 return;
40948 case V8QImode:
40949 /* ??? Could extract the appropriate HImode element and shift. */
40950 default:
40951 break;
40954 if (use_vec_extr)
40956 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40957 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40959 /* Let the rtl optimizers know about the zero extension performed. */
40960 if (inner_mode == QImode || inner_mode == HImode)
40962 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40963 target = gen_lowpart (SImode, target);
40966 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40968 else
40970 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40972 emit_move_insn (mem, vec);
40974 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40975 emit_move_insn (target, tmp);
40979 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40980 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40981 The upper bits of DEST are undefined, though they shouldn't cause
40982 exceptions (some bits from src or all zeros are ok). */
40984 static void
40985 emit_reduc_half (rtx dest, rtx src, int i)
40987 rtx tem, d = dest;
40988 switch (GET_MODE (src))
40990 case V4SFmode:
40991 if (i == 128)
40992 tem = gen_sse_movhlps (dest, src, src);
40993 else
40994 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40995 GEN_INT (1 + 4), GEN_INT (1 + 4));
40996 break;
40997 case V2DFmode:
40998 tem = gen_vec_interleave_highv2df (dest, src, src);
40999 break;
41000 case V16QImode:
41001 case V8HImode:
41002 case V4SImode:
41003 case V2DImode:
41004 d = gen_reg_rtx (V1TImode);
41005 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41006 GEN_INT (i / 2));
41007 break;
41008 case V8SFmode:
41009 if (i == 256)
41010 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41011 else
41012 tem = gen_avx_shufps256 (dest, src, src,
41013 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41014 break;
41015 case V4DFmode:
41016 if (i == 256)
41017 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41018 else
41019 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41020 break;
41021 case V32QImode:
41022 case V16HImode:
41023 case V8SImode:
41024 case V4DImode:
41025 if (i == 256)
41027 if (GET_MODE (dest) != V4DImode)
41028 d = gen_reg_rtx (V4DImode);
41029 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41030 gen_lowpart (V4DImode, src),
41031 const1_rtx);
41033 else
41035 d = gen_reg_rtx (V2TImode);
41036 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41037 GEN_INT (i / 2));
41039 break;
41040 case V16SImode:
41041 case V16SFmode:
41042 case V8DImode:
41043 case V8DFmode:
41044 if (i > 128)
41045 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41046 gen_lowpart (V16SImode, src),
41047 gen_lowpart (V16SImode, src),
41048 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41049 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41050 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41051 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41052 GEN_INT (0xC), GEN_INT (0xD),
41053 GEN_INT (0xE), GEN_INT (0xF),
41054 GEN_INT (0x10), GEN_INT (0x11),
41055 GEN_INT (0x12), GEN_INT (0x13),
41056 GEN_INT (0x14), GEN_INT (0x15),
41057 GEN_INT (0x16), GEN_INT (0x17));
41058 else
41059 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41060 gen_lowpart (V16SImode, src),
41061 GEN_INT (i == 128 ? 0x2 : 0x1),
41062 GEN_INT (0x3),
41063 GEN_INT (0x3),
41064 GEN_INT (0x3),
41065 GEN_INT (i == 128 ? 0x6 : 0x5),
41066 GEN_INT (0x7),
41067 GEN_INT (0x7),
41068 GEN_INT (0x7),
41069 GEN_INT (i == 128 ? 0xA : 0x9),
41070 GEN_INT (0xB),
41071 GEN_INT (0xB),
41072 GEN_INT (0xB),
41073 GEN_INT (i == 128 ? 0xE : 0xD),
41074 GEN_INT (0xF),
41075 GEN_INT (0xF),
41076 GEN_INT (0xF));
41077 break;
41078 default:
41079 gcc_unreachable ();
41081 emit_insn (tem);
41082 if (d != dest)
41083 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41086 /* Expand a vector reduction. FN is the binary pattern to reduce;
41087 DEST is the destination; IN is the input vector. */
41089 void
41090 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41092 rtx half, dst, vec = in;
41093 enum machine_mode mode = GET_MODE (in);
41094 int i;
41096 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41097 if (TARGET_SSE4_1
41098 && mode == V8HImode
41099 && fn == gen_uminv8hi3)
41101 emit_insn (gen_sse4_1_phminposuw (dest, in));
41102 return;
41105 for (i = GET_MODE_BITSIZE (mode);
41106 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41107 i >>= 1)
41109 half = gen_reg_rtx (mode);
41110 emit_reduc_half (half, vec, i);
41111 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41112 dst = dest;
41113 else
41114 dst = gen_reg_rtx (mode);
41115 emit_insn (fn (dst, half, vec));
41116 vec = dst;
41120 /* Target hook for scalar_mode_supported_p. */
41121 static bool
41122 ix86_scalar_mode_supported_p (enum machine_mode mode)
41124 if (DECIMAL_FLOAT_MODE_P (mode))
41125 return default_decimal_float_supported_p ();
41126 else if (mode == TFmode)
41127 return true;
41128 else
41129 return default_scalar_mode_supported_p (mode);
41132 /* Implements target hook vector_mode_supported_p. */
41133 static bool
41134 ix86_vector_mode_supported_p (enum machine_mode mode)
41136 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41137 return true;
41138 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41139 return true;
41140 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41141 return true;
41142 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41143 return true;
41144 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41145 return true;
41146 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41147 return true;
41148 return false;
41151 /* Target hook for c_mode_for_suffix. */
41152 static enum machine_mode
41153 ix86_c_mode_for_suffix (char suffix)
41155 if (suffix == 'q')
41156 return TFmode;
41157 if (suffix == 'w')
41158 return XFmode;
41160 return VOIDmode;
41163 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41165 We do this in the new i386 backend to maintain source compatibility
41166 with the old cc0-based compiler. */
41168 static tree
41169 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41170 tree inputs ATTRIBUTE_UNUSED,
41171 tree clobbers)
41173 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41174 clobbers);
41175 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41176 clobbers);
41177 return clobbers;
41180 /* Implements target vector targetm.asm.encode_section_info. */
41182 static void ATTRIBUTE_UNUSED
41183 ix86_encode_section_info (tree decl, rtx rtl, int first)
41185 default_encode_section_info (decl, rtl, first);
41187 if (TREE_CODE (decl) == VAR_DECL
41188 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41189 && ix86_in_large_data_p (decl))
41190 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41193 /* Worker function for REVERSE_CONDITION. */
41195 enum rtx_code
41196 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41198 return (mode != CCFPmode && mode != CCFPUmode
41199 ? reverse_condition (code)
41200 : reverse_condition_maybe_unordered (code));
41203 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41204 to OPERANDS[0]. */
41206 const char *
41207 output_387_reg_move (rtx insn, rtx *operands)
41209 if (REG_P (operands[0]))
41211 if (REG_P (operands[1])
41212 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41214 if (REGNO (operands[0]) == FIRST_STACK_REG)
41215 return output_387_ffreep (operands, 0);
41216 return "fstp\t%y0";
41218 if (STACK_TOP_P (operands[0]))
41219 return "fld%Z1\t%y1";
41220 return "fst\t%y0";
41222 else if (MEM_P (operands[0]))
41224 gcc_assert (REG_P (operands[1]));
41225 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41226 return "fstp%Z0\t%y0";
41227 else
41229 /* There is no non-popping store to memory for XFmode.
41230 So if we need one, follow the store with a load. */
41231 if (GET_MODE (operands[0]) == XFmode)
41232 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41233 else
41234 return "fst%Z0\t%y0";
41237 else
41238 gcc_unreachable();
41241 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41242 FP status register is set. */
41244 void
41245 ix86_emit_fp_unordered_jump (rtx label)
41247 rtx reg = gen_reg_rtx (HImode);
41248 rtx temp;
41250 emit_insn (gen_x86_fnstsw_1 (reg));
41252 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41254 emit_insn (gen_x86_sahf_1 (reg));
41256 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41257 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41259 else
41261 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41263 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41264 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41267 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41268 gen_rtx_LABEL_REF (VOIDmode, label),
41269 pc_rtx);
41270 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41272 emit_jump_insn (temp);
41273 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41276 /* Output code to perform a log1p XFmode calculation. */
41278 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41280 rtx label1 = gen_label_rtx ();
41281 rtx label2 = gen_label_rtx ();
41283 rtx tmp = gen_reg_rtx (XFmode);
41284 rtx tmp2 = gen_reg_rtx (XFmode);
41285 rtx test;
41287 emit_insn (gen_absxf2 (tmp, op1));
41288 test = gen_rtx_GE (VOIDmode, tmp,
41289 CONST_DOUBLE_FROM_REAL_VALUE (
41290 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41291 XFmode));
41292 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41294 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41295 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41296 emit_jump (label2);
41298 emit_label (label1);
41299 emit_move_insn (tmp, CONST1_RTX (XFmode));
41300 emit_insn (gen_addxf3 (tmp, op1, tmp));
41301 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41302 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41304 emit_label (label2);
41307 /* Emit code for round calculation. */
41308 void ix86_emit_i387_round (rtx op0, rtx op1)
41310 enum machine_mode inmode = GET_MODE (op1);
41311 enum machine_mode outmode = GET_MODE (op0);
41312 rtx e1, e2, res, tmp, tmp1, half;
41313 rtx scratch = gen_reg_rtx (HImode);
41314 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41315 rtx jump_label = gen_label_rtx ();
41316 rtx insn;
41317 rtx (*gen_abs) (rtx, rtx);
41318 rtx (*gen_neg) (rtx, rtx);
41320 switch (inmode)
41322 case SFmode:
41323 gen_abs = gen_abssf2;
41324 break;
41325 case DFmode:
41326 gen_abs = gen_absdf2;
41327 break;
41328 case XFmode:
41329 gen_abs = gen_absxf2;
41330 break;
41331 default:
41332 gcc_unreachable ();
41335 switch (outmode)
41337 case SFmode:
41338 gen_neg = gen_negsf2;
41339 break;
41340 case DFmode:
41341 gen_neg = gen_negdf2;
41342 break;
41343 case XFmode:
41344 gen_neg = gen_negxf2;
41345 break;
41346 case HImode:
41347 gen_neg = gen_neghi2;
41348 break;
41349 case SImode:
41350 gen_neg = gen_negsi2;
41351 break;
41352 case DImode:
41353 gen_neg = gen_negdi2;
41354 break;
41355 default:
41356 gcc_unreachable ();
41359 e1 = gen_reg_rtx (inmode);
41360 e2 = gen_reg_rtx (inmode);
41361 res = gen_reg_rtx (outmode);
41363 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41365 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41367 /* scratch = fxam(op1) */
41368 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41369 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41370 UNSPEC_FXAM)));
41371 /* e1 = fabs(op1) */
41372 emit_insn (gen_abs (e1, op1));
41374 /* e2 = e1 + 0.5 */
41375 half = force_reg (inmode, half);
41376 emit_insn (gen_rtx_SET (VOIDmode, e2,
41377 gen_rtx_PLUS (inmode, e1, half)));
41379 /* res = floor(e2) */
41380 if (inmode != XFmode)
41382 tmp1 = gen_reg_rtx (XFmode);
41384 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41385 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41387 else
41388 tmp1 = e2;
41390 switch (outmode)
41392 case SFmode:
41393 case DFmode:
41395 rtx tmp0 = gen_reg_rtx (XFmode);
41397 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41399 emit_insn (gen_rtx_SET (VOIDmode, res,
41400 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41401 UNSPEC_TRUNC_NOOP)));
41403 break;
41404 case XFmode:
41405 emit_insn (gen_frndintxf2_floor (res, tmp1));
41406 break;
41407 case HImode:
41408 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41409 break;
41410 case SImode:
41411 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41412 break;
41413 case DImode:
41414 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41415 break;
41416 default:
41417 gcc_unreachable ();
41420 /* flags = signbit(a) */
41421 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41423 /* if (flags) then res = -res */
41424 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41425 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41426 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41427 pc_rtx);
41428 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41429 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41430 JUMP_LABEL (insn) = jump_label;
41432 emit_insn (gen_neg (res, res));
41434 emit_label (jump_label);
41435 LABEL_NUSES (jump_label) = 1;
41437 emit_move_insn (op0, res);
41440 /* Output code to perform a Newton-Rhapson approximation of a single precision
41441 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41443 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41445 rtx x0, x1, e0, e1;
41447 x0 = gen_reg_rtx (mode);
41448 e0 = gen_reg_rtx (mode);
41449 e1 = gen_reg_rtx (mode);
41450 x1 = gen_reg_rtx (mode);
41452 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41454 b = force_reg (mode, b);
41456 /* x0 = rcp(b) estimate */
41457 if (mode == V16SFmode || mode == V8DFmode)
41458 emit_insn (gen_rtx_SET (VOIDmode, x0,
41459 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41460 UNSPEC_RCP14)));
41461 else
41462 emit_insn (gen_rtx_SET (VOIDmode, x0,
41463 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41464 UNSPEC_RCP)));
41466 /* e0 = x0 * b */
41467 emit_insn (gen_rtx_SET (VOIDmode, e0,
41468 gen_rtx_MULT (mode, x0, b)));
41470 /* e0 = x0 * e0 */
41471 emit_insn (gen_rtx_SET (VOIDmode, e0,
41472 gen_rtx_MULT (mode, x0, e0)));
41474 /* e1 = x0 + x0 */
41475 emit_insn (gen_rtx_SET (VOIDmode, e1,
41476 gen_rtx_PLUS (mode, x0, x0)));
41478 /* x1 = e1 - e0 */
41479 emit_insn (gen_rtx_SET (VOIDmode, x1,
41480 gen_rtx_MINUS (mode, e1, e0)));
41482 /* res = a * x1 */
41483 emit_insn (gen_rtx_SET (VOIDmode, res,
41484 gen_rtx_MULT (mode, a, x1)));
41487 /* Output code to perform a Newton-Rhapson approximation of a
41488 single precision floating point [reciprocal] square root. */
41490 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41491 bool recip)
41493 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41494 REAL_VALUE_TYPE r;
41495 int unspec;
41497 x0 = gen_reg_rtx (mode);
41498 e0 = gen_reg_rtx (mode);
41499 e1 = gen_reg_rtx (mode);
41500 e2 = gen_reg_rtx (mode);
41501 e3 = gen_reg_rtx (mode);
41503 real_from_integer (&r, VOIDmode, -3, SIGNED);
41504 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41506 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41507 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41508 unspec = UNSPEC_RSQRT;
41510 if (VECTOR_MODE_P (mode))
41512 mthree = ix86_build_const_vector (mode, true, mthree);
41513 mhalf = ix86_build_const_vector (mode, true, mhalf);
41514 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41515 if (GET_MODE_SIZE (mode) == 64)
41516 unspec = UNSPEC_RSQRT14;
41519 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41520 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41522 a = force_reg (mode, a);
41524 /* x0 = rsqrt(a) estimate */
41525 emit_insn (gen_rtx_SET (VOIDmode, x0,
41526 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41527 unspec)));
41529 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41530 if (!recip)
41532 rtx zero, mask;
41534 zero = gen_reg_rtx (mode);
41535 mask = gen_reg_rtx (mode);
41537 zero = force_reg (mode, CONST0_RTX(mode));
41539 /* Handle masked compare. */
41540 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41542 mask = gen_reg_rtx (HImode);
41543 /* Imm value 0x4 corresponds to not-equal comparison. */
41544 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41545 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41547 else
41549 emit_insn (gen_rtx_SET (VOIDmode, mask,
41550 gen_rtx_NE (mode, zero, a)));
41552 emit_insn (gen_rtx_SET (VOIDmode, x0,
41553 gen_rtx_AND (mode, x0, mask)));
41557 /* e0 = x0 * a */
41558 emit_insn (gen_rtx_SET (VOIDmode, e0,
41559 gen_rtx_MULT (mode, x0, a)));
41560 /* e1 = e0 * x0 */
41561 emit_insn (gen_rtx_SET (VOIDmode, e1,
41562 gen_rtx_MULT (mode, e0, x0)));
41564 /* e2 = e1 - 3. */
41565 mthree = force_reg (mode, mthree);
41566 emit_insn (gen_rtx_SET (VOIDmode, e2,
41567 gen_rtx_PLUS (mode, e1, mthree)));
41569 mhalf = force_reg (mode, mhalf);
41570 if (recip)
41571 /* e3 = -.5 * x0 */
41572 emit_insn (gen_rtx_SET (VOIDmode, e3,
41573 gen_rtx_MULT (mode, x0, mhalf)));
41574 else
41575 /* e3 = -.5 * e0 */
41576 emit_insn (gen_rtx_SET (VOIDmode, e3,
41577 gen_rtx_MULT (mode, e0, mhalf)));
41578 /* ret = e2 * e3 */
41579 emit_insn (gen_rtx_SET (VOIDmode, res,
41580 gen_rtx_MULT (mode, e2, e3)));
41583 #ifdef TARGET_SOLARIS
41584 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41586 static void
41587 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41588 tree decl)
41590 /* With Binutils 2.15, the "@unwind" marker must be specified on
41591 every occurrence of the ".eh_frame" section, not just the first
41592 one. */
41593 if (TARGET_64BIT
41594 && strcmp (name, ".eh_frame") == 0)
41596 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41597 flags & SECTION_WRITE ? "aw" : "a");
41598 return;
41601 #ifndef USE_GAS
41602 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41604 solaris_elf_asm_comdat_section (name, flags, decl);
41605 return;
41607 #endif
41609 default_elf_asm_named_section (name, flags, decl);
41611 #endif /* TARGET_SOLARIS */
41613 /* Return the mangling of TYPE if it is an extended fundamental type. */
41615 static const char *
41616 ix86_mangle_type (const_tree type)
41618 type = TYPE_MAIN_VARIANT (type);
41620 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41621 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41622 return NULL;
41624 switch (TYPE_MODE (type))
41626 case TFmode:
41627 /* __float128 is "g". */
41628 return "g";
41629 case XFmode:
41630 /* "long double" or __float80 is "e". */
41631 return "e";
41632 default:
41633 return NULL;
41637 /* For 32-bit code we can save PIC register setup by using
41638 __stack_chk_fail_local hidden function instead of calling
41639 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41640 register, so it is better to call __stack_chk_fail directly. */
41642 static tree ATTRIBUTE_UNUSED
41643 ix86_stack_protect_fail (void)
41645 return TARGET_64BIT
41646 ? default_external_stack_protect_fail ()
41647 : default_hidden_stack_protect_fail ();
41650 /* Select a format to encode pointers in exception handling data. CODE
41651 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41652 true if the symbol may be affected by dynamic relocations.
41654 ??? All x86 object file formats are capable of representing this.
41655 After all, the relocation needed is the same as for the call insn.
41656 Whether or not a particular assembler allows us to enter such, I
41657 guess we'll have to see. */
41659 asm_preferred_eh_data_format (int code, int global)
41661 if (flag_pic)
41663 int type = DW_EH_PE_sdata8;
41664 if (!TARGET_64BIT
41665 || ix86_cmodel == CM_SMALL_PIC
41666 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41667 type = DW_EH_PE_sdata4;
41668 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41670 if (ix86_cmodel == CM_SMALL
41671 || (ix86_cmodel == CM_MEDIUM && code))
41672 return DW_EH_PE_udata4;
41673 return DW_EH_PE_absptr;
41676 /* Expand copysign from SIGN to the positive value ABS_VALUE
41677 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41678 the sign-bit. */
41679 static void
41680 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41682 enum machine_mode mode = GET_MODE (sign);
41683 rtx sgn = gen_reg_rtx (mode);
41684 if (mask == NULL_RTX)
41686 enum machine_mode vmode;
41688 if (mode == SFmode)
41689 vmode = V4SFmode;
41690 else if (mode == DFmode)
41691 vmode = V2DFmode;
41692 else
41693 vmode = mode;
41695 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41696 if (!VECTOR_MODE_P (mode))
41698 /* We need to generate a scalar mode mask in this case. */
41699 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41700 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41701 mask = gen_reg_rtx (mode);
41702 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41705 else
41706 mask = gen_rtx_NOT (mode, mask);
41707 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41708 gen_rtx_AND (mode, mask, sign)));
41709 emit_insn (gen_rtx_SET (VOIDmode, result,
41710 gen_rtx_IOR (mode, abs_value, sgn)));
41713 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41714 mask for masking out the sign-bit is stored in *SMASK, if that is
41715 non-null. */
41716 static rtx
41717 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41719 enum machine_mode vmode, mode = GET_MODE (op0);
41720 rtx xa, mask;
41722 xa = gen_reg_rtx (mode);
41723 if (mode == SFmode)
41724 vmode = V4SFmode;
41725 else if (mode == DFmode)
41726 vmode = V2DFmode;
41727 else
41728 vmode = mode;
41729 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41730 if (!VECTOR_MODE_P (mode))
41732 /* We need to generate a scalar mode mask in this case. */
41733 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41734 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41735 mask = gen_reg_rtx (mode);
41736 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41738 emit_insn (gen_rtx_SET (VOIDmode, xa,
41739 gen_rtx_AND (mode, op0, mask)));
41741 if (smask)
41742 *smask = mask;
41744 return xa;
41747 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41748 swapping the operands if SWAP_OPERANDS is true. The expanded
41749 code is a forward jump to a newly created label in case the
41750 comparison is true. The generated label rtx is returned. */
41751 static rtx
41752 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41753 bool swap_operands)
41755 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41756 rtx label, tmp;
41758 if (swap_operands)
41760 tmp = op0;
41761 op0 = op1;
41762 op1 = tmp;
41765 label = gen_label_rtx ();
41766 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41767 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41768 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41769 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41770 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41771 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41772 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41773 JUMP_LABEL (tmp) = label;
41775 return label;
41778 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41779 using comparison code CODE. Operands are swapped for the comparison if
41780 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41781 static rtx
41782 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41783 bool swap_operands)
41785 rtx (*insn)(rtx, rtx, rtx, rtx);
41786 enum machine_mode mode = GET_MODE (op0);
41787 rtx mask = gen_reg_rtx (mode);
41789 if (swap_operands)
41791 rtx tmp = op0;
41792 op0 = op1;
41793 op1 = tmp;
41796 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41798 emit_insn (insn (mask, op0, op1,
41799 gen_rtx_fmt_ee (code, mode, op0, op1)));
41800 return mask;
41803 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41804 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41805 static rtx
41806 ix86_gen_TWO52 (enum machine_mode mode)
41808 REAL_VALUE_TYPE TWO52r;
41809 rtx TWO52;
41811 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41812 TWO52 = const_double_from_real_value (TWO52r, mode);
41813 TWO52 = force_reg (mode, TWO52);
41815 return TWO52;
41818 /* Expand SSE sequence for computing lround from OP1 storing
41819 into OP0. */
41820 void
41821 ix86_expand_lround (rtx op0, rtx op1)
41823 /* C code for the stuff we're doing below:
41824 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41825 return (long)tmp;
41827 enum machine_mode mode = GET_MODE (op1);
41828 const struct real_format *fmt;
41829 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41830 rtx adj;
41832 /* load nextafter (0.5, 0.0) */
41833 fmt = REAL_MODE_FORMAT (mode);
41834 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41835 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41837 /* adj = copysign (0.5, op1) */
41838 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41839 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41841 /* adj = op1 + adj */
41842 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41844 /* op0 = (imode)adj */
41845 expand_fix (op0, adj, 0);
41848 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41849 into OPERAND0. */
41850 void
41851 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41853 /* C code for the stuff we're doing below (for do_floor):
41854 xi = (long)op1;
41855 xi -= (double)xi > op1 ? 1 : 0;
41856 return xi;
41858 enum machine_mode fmode = GET_MODE (op1);
41859 enum machine_mode imode = GET_MODE (op0);
41860 rtx ireg, freg, label, tmp;
41862 /* reg = (long)op1 */
41863 ireg = gen_reg_rtx (imode);
41864 expand_fix (ireg, op1, 0);
41866 /* freg = (double)reg */
41867 freg = gen_reg_rtx (fmode);
41868 expand_float (freg, ireg, 0);
41870 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41871 label = ix86_expand_sse_compare_and_jump (UNLE,
41872 freg, op1, !do_floor);
41873 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41874 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41875 emit_move_insn (ireg, tmp);
41877 emit_label (label);
41878 LABEL_NUSES (label) = 1;
41880 emit_move_insn (op0, ireg);
41883 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41884 result in OPERAND0. */
41885 void
41886 ix86_expand_rint (rtx operand0, rtx operand1)
41888 /* C code for the stuff we're doing below:
41889 xa = fabs (operand1);
41890 if (!isless (xa, 2**52))
41891 return operand1;
41892 xa = xa + 2**52 - 2**52;
41893 return copysign (xa, operand1);
41895 enum machine_mode mode = GET_MODE (operand0);
41896 rtx res, xa, label, TWO52, mask;
41898 res = gen_reg_rtx (mode);
41899 emit_move_insn (res, operand1);
41901 /* xa = abs (operand1) */
41902 xa = ix86_expand_sse_fabs (res, &mask);
41904 /* if (!isless (xa, TWO52)) goto label; */
41905 TWO52 = ix86_gen_TWO52 (mode);
41906 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41908 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41909 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41911 ix86_sse_copysign_to_positive (res, xa, res, mask);
41913 emit_label (label);
41914 LABEL_NUSES (label) = 1;
41916 emit_move_insn (operand0, res);
41919 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41920 into OPERAND0. */
41921 void
41922 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41924 /* C code for the stuff we expand below.
41925 double xa = fabs (x), x2;
41926 if (!isless (xa, TWO52))
41927 return x;
41928 xa = xa + TWO52 - TWO52;
41929 x2 = copysign (xa, x);
41930 Compensate. Floor:
41931 if (x2 > x)
41932 x2 -= 1;
41933 Compensate. Ceil:
41934 if (x2 < x)
41935 x2 -= -1;
41936 return x2;
41938 enum machine_mode mode = GET_MODE (operand0);
41939 rtx xa, TWO52, tmp, label, one, res, mask;
41941 TWO52 = ix86_gen_TWO52 (mode);
41943 /* Temporary for holding the result, initialized to the input
41944 operand to ease control flow. */
41945 res = gen_reg_rtx (mode);
41946 emit_move_insn (res, operand1);
41948 /* xa = abs (operand1) */
41949 xa = ix86_expand_sse_fabs (res, &mask);
41951 /* if (!isless (xa, TWO52)) goto label; */
41952 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41954 /* xa = xa + TWO52 - TWO52; */
41955 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41956 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41958 /* xa = copysign (xa, operand1) */
41959 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41961 /* generate 1.0 or -1.0 */
41962 one = force_reg (mode,
41963 const_double_from_real_value (do_floor
41964 ? dconst1 : dconstm1, mode));
41966 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41967 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41968 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41969 gen_rtx_AND (mode, one, tmp)));
41970 /* We always need to subtract here to preserve signed zero. */
41971 tmp = expand_simple_binop (mode, MINUS,
41972 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41973 emit_move_insn (res, tmp);
41975 emit_label (label);
41976 LABEL_NUSES (label) = 1;
41978 emit_move_insn (operand0, res);
41981 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41982 into OPERAND0. */
41983 void
41984 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41986 /* C code for the stuff we expand below.
41987 double xa = fabs (x), x2;
41988 if (!isless (xa, TWO52))
41989 return x;
41990 x2 = (double)(long)x;
41991 Compensate. Floor:
41992 if (x2 > x)
41993 x2 -= 1;
41994 Compensate. Ceil:
41995 if (x2 < x)
41996 x2 += 1;
41997 if (HONOR_SIGNED_ZEROS (mode))
41998 return copysign (x2, x);
41999 return x2;
42001 enum machine_mode mode = GET_MODE (operand0);
42002 rtx xa, xi, TWO52, tmp, label, one, res, mask;
42004 TWO52 = ix86_gen_TWO52 (mode);
42006 /* Temporary for holding the result, initialized to the input
42007 operand to ease control flow. */
42008 res = gen_reg_rtx (mode);
42009 emit_move_insn (res, operand1);
42011 /* xa = abs (operand1) */
42012 xa = ix86_expand_sse_fabs (res, &mask);
42014 /* if (!isless (xa, TWO52)) goto label; */
42015 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42017 /* xa = (double)(long)x */
42018 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42019 expand_fix (xi, res, 0);
42020 expand_float (xa, xi, 0);
42022 /* generate 1.0 */
42023 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42025 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42026 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42027 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42028 gen_rtx_AND (mode, one, tmp)));
42029 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42030 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42031 emit_move_insn (res, tmp);
42033 if (HONOR_SIGNED_ZEROS (mode))
42034 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42036 emit_label (label);
42037 LABEL_NUSES (label) = 1;
42039 emit_move_insn (operand0, res);
42042 /* Expand SSE sequence for computing round from OPERAND1 storing
42043 into OPERAND0. Sequence that works without relying on DImode truncation
42044 via cvttsd2siq that is only available on 64bit targets. */
42045 void
42046 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42048 /* C code for the stuff we expand below.
42049 double xa = fabs (x), xa2, x2;
42050 if (!isless (xa, TWO52))
42051 return x;
42052 Using the absolute value and copying back sign makes
42053 -0.0 -> -0.0 correct.
42054 xa2 = xa + TWO52 - TWO52;
42055 Compensate.
42056 dxa = xa2 - xa;
42057 if (dxa <= -0.5)
42058 xa2 += 1;
42059 else if (dxa > 0.5)
42060 xa2 -= 1;
42061 x2 = copysign (xa2, x);
42062 return x2;
42064 enum machine_mode mode = GET_MODE (operand0);
42065 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42067 TWO52 = ix86_gen_TWO52 (mode);
42069 /* Temporary for holding the result, initialized to the input
42070 operand to ease control flow. */
42071 res = gen_reg_rtx (mode);
42072 emit_move_insn (res, operand1);
42074 /* xa = abs (operand1) */
42075 xa = ix86_expand_sse_fabs (res, &mask);
42077 /* if (!isless (xa, TWO52)) goto label; */
42078 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42080 /* xa2 = xa + TWO52 - TWO52; */
42081 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42082 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42084 /* dxa = xa2 - xa; */
42085 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42087 /* generate 0.5, 1.0 and -0.5 */
42088 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42089 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42090 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42091 0, OPTAB_DIRECT);
42093 /* Compensate. */
42094 tmp = gen_reg_rtx (mode);
42095 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42096 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42097 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42098 gen_rtx_AND (mode, one, tmp)));
42099 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42100 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42101 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42102 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42103 gen_rtx_AND (mode, one, tmp)));
42104 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42106 /* res = copysign (xa2, operand1) */
42107 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42109 emit_label (label);
42110 LABEL_NUSES (label) = 1;
42112 emit_move_insn (operand0, res);
42115 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42116 into OPERAND0. */
42117 void
42118 ix86_expand_trunc (rtx operand0, rtx operand1)
42120 /* C code for SSE variant we expand below.
42121 double xa = fabs (x), x2;
42122 if (!isless (xa, TWO52))
42123 return x;
42124 x2 = (double)(long)x;
42125 if (HONOR_SIGNED_ZEROS (mode))
42126 return copysign (x2, x);
42127 return x2;
42129 enum machine_mode mode = GET_MODE (operand0);
42130 rtx xa, xi, TWO52, label, res, mask;
42132 TWO52 = ix86_gen_TWO52 (mode);
42134 /* Temporary for holding the result, initialized to the input
42135 operand to ease control flow. */
42136 res = gen_reg_rtx (mode);
42137 emit_move_insn (res, operand1);
42139 /* xa = abs (operand1) */
42140 xa = ix86_expand_sse_fabs (res, &mask);
42142 /* if (!isless (xa, TWO52)) goto label; */
42143 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42145 /* x = (double)(long)x */
42146 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42147 expand_fix (xi, res, 0);
42148 expand_float (res, xi, 0);
42150 if (HONOR_SIGNED_ZEROS (mode))
42151 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42153 emit_label (label);
42154 LABEL_NUSES (label) = 1;
42156 emit_move_insn (operand0, res);
42159 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42160 into OPERAND0. */
42161 void
42162 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42164 enum machine_mode mode = GET_MODE (operand0);
42165 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42167 /* C code for SSE variant we expand below.
42168 double xa = fabs (x), x2;
42169 if (!isless (xa, TWO52))
42170 return x;
42171 xa2 = xa + TWO52 - TWO52;
42172 Compensate:
42173 if (xa2 > xa)
42174 xa2 -= 1.0;
42175 x2 = copysign (xa2, x);
42176 return x2;
42179 TWO52 = ix86_gen_TWO52 (mode);
42181 /* Temporary for holding the result, initialized to the input
42182 operand to ease control flow. */
42183 res = gen_reg_rtx (mode);
42184 emit_move_insn (res, operand1);
42186 /* xa = abs (operand1) */
42187 xa = ix86_expand_sse_fabs (res, &smask);
42189 /* if (!isless (xa, TWO52)) goto label; */
42190 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42192 /* res = xa + TWO52 - TWO52; */
42193 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42194 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42195 emit_move_insn (res, tmp);
42197 /* generate 1.0 */
42198 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42200 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42201 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42202 emit_insn (gen_rtx_SET (VOIDmode, mask,
42203 gen_rtx_AND (mode, mask, one)));
42204 tmp = expand_simple_binop (mode, MINUS,
42205 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42206 emit_move_insn (res, tmp);
42208 /* res = copysign (res, operand1) */
42209 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42211 emit_label (label);
42212 LABEL_NUSES (label) = 1;
42214 emit_move_insn (operand0, res);
42217 /* Expand SSE sequence for computing round from OPERAND1 storing
42218 into OPERAND0. */
42219 void
42220 ix86_expand_round (rtx operand0, rtx operand1)
42222 /* C code for the stuff we're doing below:
42223 double xa = fabs (x);
42224 if (!isless (xa, TWO52))
42225 return x;
42226 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42227 return copysign (xa, x);
42229 enum machine_mode mode = GET_MODE (operand0);
42230 rtx res, TWO52, xa, label, xi, half, mask;
42231 const struct real_format *fmt;
42232 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42234 /* Temporary for holding the result, initialized to the input
42235 operand to ease control flow. */
42236 res = gen_reg_rtx (mode);
42237 emit_move_insn (res, operand1);
42239 TWO52 = ix86_gen_TWO52 (mode);
42240 xa = ix86_expand_sse_fabs (res, &mask);
42241 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42243 /* load nextafter (0.5, 0.0) */
42244 fmt = REAL_MODE_FORMAT (mode);
42245 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42246 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42248 /* xa = xa + 0.5 */
42249 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42250 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42252 /* xa = (double)(int64_t)xa */
42253 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42254 expand_fix (xi, xa, 0);
42255 expand_float (xa, xi, 0);
42257 /* res = copysign (xa, operand1) */
42258 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42260 emit_label (label);
42261 LABEL_NUSES (label) = 1;
42263 emit_move_insn (operand0, res);
42266 /* Expand SSE sequence for computing round
42267 from OP1 storing into OP0 using sse4 round insn. */
42268 void
42269 ix86_expand_round_sse4 (rtx op0, rtx op1)
42271 enum machine_mode mode = GET_MODE (op0);
42272 rtx e1, e2, res, half;
42273 const struct real_format *fmt;
42274 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42275 rtx (*gen_copysign) (rtx, rtx, rtx);
42276 rtx (*gen_round) (rtx, rtx, rtx);
42278 switch (mode)
42280 case SFmode:
42281 gen_copysign = gen_copysignsf3;
42282 gen_round = gen_sse4_1_roundsf2;
42283 break;
42284 case DFmode:
42285 gen_copysign = gen_copysigndf3;
42286 gen_round = gen_sse4_1_rounddf2;
42287 break;
42288 default:
42289 gcc_unreachable ();
42292 /* round (a) = trunc (a + copysign (0.5, a)) */
42294 /* load nextafter (0.5, 0.0) */
42295 fmt = REAL_MODE_FORMAT (mode);
42296 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42297 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42298 half = const_double_from_real_value (pred_half, mode);
42300 /* e1 = copysign (0.5, op1) */
42301 e1 = gen_reg_rtx (mode);
42302 emit_insn (gen_copysign (e1, half, op1));
42304 /* e2 = op1 + e1 */
42305 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42307 /* res = trunc (e2) */
42308 res = gen_reg_rtx (mode);
42309 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42311 emit_move_insn (op0, res);
42315 /* Table of valid machine attributes. */
42316 static const struct attribute_spec ix86_attribute_table[] =
42318 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42319 affects_type_identity } */
42320 /* Stdcall attribute says callee is responsible for popping arguments
42321 if they are not variable. */
42322 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42323 true },
42324 /* Fastcall attribute says callee is responsible for popping arguments
42325 if they are not variable. */
42326 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42327 true },
42328 /* Thiscall attribute says callee is responsible for popping arguments
42329 if they are not variable. */
42330 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42331 true },
42332 /* Cdecl attribute says the callee is a normal C declaration */
42333 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42334 true },
42335 /* Regparm attribute specifies how many integer arguments are to be
42336 passed in registers. */
42337 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42338 true },
42339 /* Sseregparm attribute says we are using x86_64 calling conventions
42340 for FP arguments. */
42341 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42342 true },
42343 /* The transactional memory builtins are implicitly regparm or fastcall
42344 depending on the ABI. Override the generic do-nothing attribute that
42345 these builtins were declared with. */
42346 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42347 true },
42348 /* force_align_arg_pointer says this function realigns the stack at entry. */
42349 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42350 false, true, true, ix86_handle_cconv_attribute, false },
42351 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42352 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42353 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42354 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42355 false },
42356 #endif
42357 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42358 false },
42359 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42360 false },
42361 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42362 SUBTARGET_ATTRIBUTE_TABLE,
42363 #endif
42364 /* ms_abi and sysv_abi calling convention function attributes. */
42365 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42366 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42367 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42368 false },
42369 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42370 ix86_handle_callee_pop_aggregate_return, true },
42371 /* End element. */
42372 { NULL, 0, 0, false, false, false, NULL, false }
42375 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42376 static int
42377 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42378 tree vectype,
42379 int misalign ATTRIBUTE_UNUSED)
42381 unsigned elements;
42383 switch (type_of_cost)
42385 case scalar_stmt:
42386 return ix86_cost->scalar_stmt_cost;
42388 case scalar_load:
42389 return ix86_cost->scalar_load_cost;
42391 case scalar_store:
42392 return ix86_cost->scalar_store_cost;
42394 case vector_stmt:
42395 return ix86_cost->vec_stmt_cost;
42397 case vector_load:
42398 return ix86_cost->vec_align_load_cost;
42400 case vector_store:
42401 return ix86_cost->vec_store_cost;
42403 case vec_to_scalar:
42404 return ix86_cost->vec_to_scalar_cost;
42406 case scalar_to_vec:
42407 return ix86_cost->scalar_to_vec_cost;
42409 case unaligned_load:
42410 case unaligned_store:
42411 return ix86_cost->vec_unalign_load_cost;
42413 case cond_branch_taken:
42414 return ix86_cost->cond_taken_branch_cost;
42416 case cond_branch_not_taken:
42417 return ix86_cost->cond_not_taken_branch_cost;
42419 case vec_perm:
42420 case vec_promote_demote:
42421 return ix86_cost->vec_stmt_cost;
42423 case vec_construct:
42424 elements = TYPE_VECTOR_SUBPARTS (vectype);
42425 return elements / 2 + 1;
42427 default:
42428 gcc_unreachable ();
42432 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42433 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42434 insn every time. */
42436 static GTY(()) rtx vselect_insn;
42438 /* Initialize vselect_insn. */
42440 static void
42441 init_vselect_insn (void)
42443 unsigned i;
42444 rtx x;
42446 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42447 for (i = 0; i < MAX_VECT_LEN; ++i)
42448 XVECEXP (x, 0, i) = const0_rtx;
42449 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42450 const0_rtx), x);
42451 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42452 start_sequence ();
42453 vselect_insn = emit_insn (x);
42454 end_sequence ();
42457 /* Construct (set target (vec_select op0 (parallel perm))) and
42458 return true if that's a valid instruction in the active ISA. */
42460 static bool
42461 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42462 unsigned nelt, bool testing_p)
42464 unsigned int i;
42465 rtx x, save_vconcat;
42466 int icode;
42468 if (vselect_insn == NULL_RTX)
42469 init_vselect_insn ();
42471 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42472 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42473 for (i = 0; i < nelt; ++i)
42474 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42475 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42476 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42477 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42478 SET_DEST (PATTERN (vselect_insn)) = target;
42479 icode = recog_memoized (vselect_insn);
42481 if (icode >= 0 && !testing_p)
42482 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42484 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42485 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42486 INSN_CODE (vselect_insn) = -1;
42488 return icode >= 0;
42491 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42493 static bool
42494 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42495 const unsigned char *perm, unsigned nelt,
42496 bool testing_p)
42498 enum machine_mode v2mode;
42499 rtx x;
42500 bool ok;
42502 if (vselect_insn == NULL_RTX)
42503 init_vselect_insn ();
42505 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42506 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42507 PUT_MODE (x, v2mode);
42508 XEXP (x, 0) = op0;
42509 XEXP (x, 1) = op1;
42510 ok = expand_vselect (target, x, perm, nelt, testing_p);
42511 XEXP (x, 0) = const0_rtx;
42512 XEXP (x, 1) = const0_rtx;
42513 return ok;
42516 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42517 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42519 static bool
42520 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42522 enum machine_mode vmode = d->vmode;
42523 unsigned i, mask, nelt = d->nelt;
42524 rtx target, op0, op1, x;
42525 rtx rperm[32], vperm;
42527 if (d->one_operand_p)
42528 return false;
42529 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42531 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42533 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42535 else
42536 return false;
42538 /* This is a blend, not a permute. Elements must stay in their
42539 respective lanes. */
42540 for (i = 0; i < nelt; ++i)
42542 unsigned e = d->perm[i];
42543 if (!(e == i || e == i + nelt))
42544 return false;
42547 if (d->testing_p)
42548 return true;
42550 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42551 decision should be extracted elsewhere, so that we only try that
42552 sequence once all budget==3 options have been tried. */
42553 target = d->target;
42554 op0 = d->op0;
42555 op1 = d->op1;
42556 mask = 0;
42558 switch (vmode)
42560 case V4DFmode:
42561 case V8SFmode:
42562 case V2DFmode:
42563 case V4SFmode:
42564 case V8HImode:
42565 case V8SImode:
42566 for (i = 0; i < nelt; ++i)
42567 mask |= (d->perm[i] >= nelt) << i;
42568 break;
42570 case V2DImode:
42571 for (i = 0; i < 2; ++i)
42572 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42573 vmode = V8HImode;
42574 goto do_subreg;
42576 case V4SImode:
42577 for (i = 0; i < 4; ++i)
42578 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42579 vmode = V8HImode;
42580 goto do_subreg;
42582 case V16QImode:
42583 /* See if bytes move in pairs so we can use pblendw with
42584 an immediate argument, rather than pblendvb with a vector
42585 argument. */
42586 for (i = 0; i < 16; i += 2)
42587 if (d->perm[i] + 1 != d->perm[i + 1])
42589 use_pblendvb:
42590 for (i = 0; i < nelt; ++i)
42591 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42593 finish_pblendvb:
42594 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42595 vperm = force_reg (vmode, vperm);
42597 if (GET_MODE_SIZE (vmode) == 16)
42598 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42599 else
42600 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42601 if (target != d->target)
42602 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42603 return true;
42606 for (i = 0; i < 8; ++i)
42607 mask |= (d->perm[i * 2] >= 16) << i;
42608 vmode = V8HImode;
42609 /* FALLTHRU */
42611 do_subreg:
42612 target = gen_reg_rtx (vmode);
42613 op0 = gen_lowpart (vmode, op0);
42614 op1 = gen_lowpart (vmode, op1);
42615 break;
42617 case V32QImode:
42618 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42619 for (i = 0; i < 32; i += 2)
42620 if (d->perm[i] + 1 != d->perm[i + 1])
42621 goto use_pblendvb;
42622 /* See if bytes move in quadruplets. If yes, vpblendd
42623 with immediate can be used. */
42624 for (i = 0; i < 32; i += 4)
42625 if (d->perm[i] + 2 != d->perm[i + 2])
42626 break;
42627 if (i < 32)
42629 /* See if bytes move the same in both lanes. If yes,
42630 vpblendw with immediate can be used. */
42631 for (i = 0; i < 16; i += 2)
42632 if (d->perm[i] + 16 != d->perm[i + 16])
42633 goto use_pblendvb;
42635 /* Use vpblendw. */
42636 for (i = 0; i < 16; ++i)
42637 mask |= (d->perm[i * 2] >= 32) << i;
42638 vmode = V16HImode;
42639 goto do_subreg;
42642 /* Use vpblendd. */
42643 for (i = 0; i < 8; ++i)
42644 mask |= (d->perm[i * 4] >= 32) << i;
42645 vmode = V8SImode;
42646 goto do_subreg;
42648 case V16HImode:
42649 /* See if words move in pairs. If yes, vpblendd can be used. */
42650 for (i = 0; i < 16; i += 2)
42651 if (d->perm[i] + 1 != d->perm[i + 1])
42652 break;
42653 if (i < 16)
42655 /* See if words move the same in both lanes. If not,
42656 vpblendvb must be used. */
42657 for (i = 0; i < 8; i++)
42658 if (d->perm[i] + 8 != d->perm[i + 8])
42660 /* Use vpblendvb. */
42661 for (i = 0; i < 32; ++i)
42662 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42664 vmode = V32QImode;
42665 nelt = 32;
42666 target = gen_reg_rtx (vmode);
42667 op0 = gen_lowpart (vmode, op0);
42668 op1 = gen_lowpart (vmode, op1);
42669 goto finish_pblendvb;
42672 /* Use vpblendw. */
42673 for (i = 0; i < 16; ++i)
42674 mask |= (d->perm[i] >= 16) << i;
42675 break;
42678 /* Use vpblendd. */
42679 for (i = 0; i < 8; ++i)
42680 mask |= (d->perm[i * 2] >= 16) << i;
42681 vmode = V8SImode;
42682 goto do_subreg;
42684 case V4DImode:
42685 /* Use vpblendd. */
42686 for (i = 0; i < 4; ++i)
42687 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42688 vmode = V8SImode;
42689 goto do_subreg;
42691 default:
42692 gcc_unreachable ();
42695 /* This matches five different patterns with the different modes. */
42696 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42697 x = gen_rtx_SET (VOIDmode, target, x);
42698 emit_insn (x);
42699 if (target != d->target)
42700 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42702 return true;
42705 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42706 in terms of the variable form of vpermilps.
42708 Note that we will have already failed the immediate input vpermilps,
42709 which requires that the high and low part shuffle be identical; the
42710 variable form doesn't require that. */
42712 static bool
42713 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42715 rtx rperm[8], vperm;
42716 unsigned i;
42718 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42719 return false;
42721 /* We can only permute within the 128-bit lane. */
42722 for (i = 0; i < 8; ++i)
42724 unsigned e = d->perm[i];
42725 if (i < 4 ? e >= 4 : e < 4)
42726 return false;
42729 if (d->testing_p)
42730 return true;
42732 for (i = 0; i < 8; ++i)
42734 unsigned e = d->perm[i];
42736 /* Within each 128-bit lane, the elements of op0 are numbered
42737 from 0 and the elements of op1 are numbered from 4. */
42738 if (e >= 8 + 4)
42739 e -= 8;
42740 else if (e >= 4)
42741 e -= 4;
42743 rperm[i] = GEN_INT (e);
42746 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42747 vperm = force_reg (V8SImode, vperm);
42748 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42750 return true;
42753 /* Return true if permutation D can be performed as VMODE permutation
42754 instead. */
42756 static bool
42757 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42759 unsigned int i, j, chunk;
42761 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42762 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42763 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42764 return false;
42766 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42767 return true;
42769 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42770 for (i = 0; i < d->nelt; i += chunk)
42771 if (d->perm[i] & (chunk - 1))
42772 return false;
42773 else
42774 for (j = 1; j < chunk; ++j)
42775 if (d->perm[i] + j != d->perm[i + j])
42776 return false;
42778 return true;
42781 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42782 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42784 static bool
42785 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42787 unsigned i, nelt, eltsz, mask;
42788 unsigned char perm[32];
42789 enum machine_mode vmode = V16QImode;
42790 rtx rperm[32], vperm, target, op0, op1;
42792 nelt = d->nelt;
42794 if (!d->one_operand_p)
42796 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42798 if (TARGET_AVX2
42799 && valid_perm_using_mode_p (V2TImode, d))
42801 if (d->testing_p)
42802 return true;
42804 /* Use vperm2i128 insn. The pattern uses
42805 V4DImode instead of V2TImode. */
42806 target = d->target;
42807 if (d->vmode != V4DImode)
42808 target = gen_reg_rtx (V4DImode);
42809 op0 = gen_lowpart (V4DImode, d->op0);
42810 op1 = gen_lowpart (V4DImode, d->op1);
42811 rperm[0]
42812 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42813 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42814 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42815 if (target != d->target)
42816 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42817 return true;
42819 return false;
42822 else
42824 if (GET_MODE_SIZE (d->vmode) == 16)
42826 if (!TARGET_SSSE3)
42827 return false;
42829 else if (GET_MODE_SIZE (d->vmode) == 32)
42831 if (!TARGET_AVX2)
42832 return false;
42834 /* V4DImode should be already handled through
42835 expand_vselect by vpermq instruction. */
42836 gcc_assert (d->vmode != V4DImode);
42838 vmode = V32QImode;
42839 if (d->vmode == V8SImode
42840 || d->vmode == V16HImode
42841 || d->vmode == V32QImode)
42843 /* First see if vpermq can be used for
42844 V8SImode/V16HImode/V32QImode. */
42845 if (valid_perm_using_mode_p (V4DImode, d))
42847 for (i = 0; i < 4; i++)
42848 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42849 if (d->testing_p)
42850 return true;
42851 target = gen_reg_rtx (V4DImode);
42852 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42853 perm, 4, false))
42855 emit_move_insn (d->target,
42856 gen_lowpart (d->vmode, target));
42857 return true;
42859 return false;
42862 /* Next see if vpermd can be used. */
42863 if (valid_perm_using_mode_p (V8SImode, d))
42864 vmode = V8SImode;
42866 /* Or if vpermps can be used. */
42867 else if (d->vmode == V8SFmode)
42868 vmode = V8SImode;
42870 if (vmode == V32QImode)
42872 /* vpshufb only works intra lanes, it is not
42873 possible to shuffle bytes in between the lanes. */
42874 for (i = 0; i < nelt; ++i)
42875 if ((d->perm[i] ^ i) & (nelt / 2))
42876 return false;
42879 else
42880 return false;
42883 if (d->testing_p)
42884 return true;
42886 if (vmode == V8SImode)
42887 for (i = 0; i < 8; ++i)
42888 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42889 else
42891 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42892 if (!d->one_operand_p)
42893 mask = 2 * nelt - 1;
42894 else if (vmode == V16QImode)
42895 mask = nelt - 1;
42896 else
42897 mask = nelt / 2 - 1;
42899 for (i = 0; i < nelt; ++i)
42901 unsigned j, e = d->perm[i] & mask;
42902 for (j = 0; j < eltsz; ++j)
42903 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42907 vperm = gen_rtx_CONST_VECTOR (vmode,
42908 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42909 vperm = force_reg (vmode, vperm);
42911 target = d->target;
42912 if (d->vmode != vmode)
42913 target = gen_reg_rtx (vmode);
42914 op0 = gen_lowpart (vmode, d->op0);
42915 if (d->one_operand_p)
42917 if (vmode == V16QImode)
42918 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42919 else if (vmode == V32QImode)
42920 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42921 else if (vmode == V8SFmode)
42922 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42923 else
42924 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42926 else
42928 op1 = gen_lowpart (vmode, d->op1);
42929 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42931 if (target != d->target)
42932 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42934 return true;
42937 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42938 in a single instruction. */
42940 static bool
42941 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42943 unsigned i, nelt = d->nelt;
42944 unsigned char perm2[MAX_VECT_LEN];
42946 /* Check plain VEC_SELECT first, because AVX has instructions that could
42947 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42948 input where SEL+CONCAT may not. */
42949 if (d->one_operand_p)
42951 int mask = nelt - 1;
42952 bool identity_perm = true;
42953 bool broadcast_perm = true;
42955 for (i = 0; i < nelt; i++)
42957 perm2[i] = d->perm[i] & mask;
42958 if (perm2[i] != i)
42959 identity_perm = false;
42960 if (perm2[i])
42961 broadcast_perm = false;
42964 if (identity_perm)
42966 if (!d->testing_p)
42967 emit_move_insn (d->target, d->op0);
42968 return true;
42970 else if (broadcast_perm && TARGET_AVX2)
42972 /* Use vpbroadcast{b,w,d}. */
42973 rtx (*gen) (rtx, rtx) = NULL;
42974 switch (d->vmode)
42976 case V32QImode:
42977 gen = gen_avx2_pbroadcastv32qi_1;
42978 break;
42979 case V16HImode:
42980 gen = gen_avx2_pbroadcastv16hi_1;
42981 break;
42982 case V8SImode:
42983 gen = gen_avx2_pbroadcastv8si_1;
42984 break;
42985 case V16QImode:
42986 gen = gen_avx2_pbroadcastv16qi;
42987 break;
42988 case V8HImode:
42989 gen = gen_avx2_pbroadcastv8hi;
42990 break;
42991 case V8SFmode:
42992 gen = gen_avx2_vec_dupv8sf_1;
42993 break;
42994 /* For other modes prefer other shuffles this function creates. */
42995 default: break;
42997 if (gen != NULL)
42999 if (!d->testing_p)
43000 emit_insn (gen (d->target, d->op0));
43001 return true;
43005 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43006 return true;
43008 /* There are plenty of patterns in sse.md that are written for
43009 SEL+CONCAT and are not replicated for a single op. Perhaps
43010 that should be changed, to avoid the nastiness here. */
43012 /* Recognize interleave style patterns, which means incrementing
43013 every other permutation operand. */
43014 for (i = 0; i < nelt; i += 2)
43016 perm2[i] = d->perm[i] & mask;
43017 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43019 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43020 d->testing_p))
43021 return true;
43023 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43024 if (nelt >= 4)
43026 for (i = 0; i < nelt; i += 4)
43028 perm2[i + 0] = d->perm[i + 0] & mask;
43029 perm2[i + 1] = d->perm[i + 1] & mask;
43030 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43031 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43034 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43035 d->testing_p))
43036 return true;
43040 /* Finally, try the fully general two operand permute. */
43041 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43042 d->testing_p))
43043 return true;
43045 /* Recognize interleave style patterns with reversed operands. */
43046 if (!d->one_operand_p)
43048 for (i = 0; i < nelt; ++i)
43050 unsigned e = d->perm[i];
43051 if (e >= nelt)
43052 e -= nelt;
43053 else
43054 e += nelt;
43055 perm2[i] = e;
43058 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43059 d->testing_p))
43060 return true;
43063 /* Try the SSE4.1 blend variable merge instructions. */
43064 if (expand_vec_perm_blend (d))
43065 return true;
43067 /* Try one of the AVX vpermil variable permutations. */
43068 if (expand_vec_perm_vpermil (d))
43069 return true;
43071 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43072 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43073 if (expand_vec_perm_pshufb (d))
43074 return true;
43076 /* Try the AVX512F vpermi2 instructions. */
43077 rtx vec[64];
43078 enum machine_mode mode = d->vmode;
43079 if (mode == V8DFmode)
43080 mode = V8DImode;
43081 else if (mode == V16SFmode)
43082 mode = V16SImode;
43083 for (i = 0; i < nelt; ++i)
43084 vec[i] = GEN_INT (d->perm[i]);
43085 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43086 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43087 return true;
43089 return false;
43092 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43093 in terms of a pair of pshuflw + pshufhw instructions. */
43095 static bool
43096 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43098 unsigned char perm2[MAX_VECT_LEN];
43099 unsigned i;
43100 bool ok;
43102 if (d->vmode != V8HImode || !d->one_operand_p)
43103 return false;
43105 /* The two permutations only operate in 64-bit lanes. */
43106 for (i = 0; i < 4; ++i)
43107 if (d->perm[i] >= 4)
43108 return false;
43109 for (i = 4; i < 8; ++i)
43110 if (d->perm[i] < 4)
43111 return false;
43113 if (d->testing_p)
43114 return true;
43116 /* Emit the pshuflw. */
43117 memcpy (perm2, d->perm, 4);
43118 for (i = 4; i < 8; ++i)
43119 perm2[i] = i;
43120 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43121 gcc_assert (ok);
43123 /* Emit the pshufhw. */
43124 memcpy (perm2 + 4, d->perm + 4, 4);
43125 for (i = 0; i < 4; ++i)
43126 perm2[i] = i;
43127 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43128 gcc_assert (ok);
43130 return true;
43133 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43134 the permutation using the SSSE3 palignr instruction. This succeeds
43135 when all of the elements in PERM fit within one vector and we merely
43136 need to shift them down so that a single vector permutation has a
43137 chance to succeed. */
43139 static bool
43140 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43142 unsigned i, nelt = d->nelt;
43143 unsigned min, max;
43144 bool in_order, ok;
43145 rtx shift, target;
43146 struct expand_vec_perm_d dcopy;
43148 /* Even with AVX, palignr only operates on 128-bit vectors. */
43149 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43150 return false;
43152 min = nelt, max = 0;
43153 for (i = 0; i < nelt; ++i)
43155 unsigned e = d->perm[i];
43156 if (e < min)
43157 min = e;
43158 if (e > max)
43159 max = e;
43161 if (min == 0 || max - min >= nelt)
43162 return false;
43164 /* Given that we have SSSE3, we know we'll be able to implement the
43165 single operand permutation after the palignr with pshufb. */
43166 if (d->testing_p)
43167 return true;
43169 dcopy = *d;
43170 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43171 target = gen_reg_rtx (TImode);
43172 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43173 gen_lowpart (TImode, d->op0), shift));
43175 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43176 dcopy.one_operand_p = true;
43178 in_order = true;
43179 for (i = 0; i < nelt; ++i)
43181 unsigned e = dcopy.perm[i] - min;
43182 if (e != i)
43183 in_order = false;
43184 dcopy.perm[i] = e;
43187 /* Test for the degenerate case where the alignment by itself
43188 produces the desired permutation. */
43189 if (in_order)
43191 emit_move_insn (d->target, dcopy.op0);
43192 return true;
43195 ok = expand_vec_perm_1 (&dcopy);
43196 gcc_assert (ok);
43198 return ok;
43201 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43202 the permutation using the SSE4_1 pblendv instruction. Potentially
43203 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43205 static bool
43206 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43208 unsigned i, which, nelt = d->nelt;
43209 struct expand_vec_perm_d dcopy, dcopy1;
43210 enum machine_mode vmode = d->vmode;
43211 bool ok;
43213 /* Use the same checks as in expand_vec_perm_blend, but skipping
43214 AVX2 as it requires more than 2 instructions for general case. */
43215 if (d->one_operand_p)
43216 return false;
43217 if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
43219 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43221 else
43222 return false;
43224 /* Figure out where permutation elements stay not in their
43225 respective lanes. */
43226 for (i = 0, which = 0; i < nelt; ++i)
43228 unsigned e = d->perm[i];
43229 if (e != i)
43230 which |= (e < nelt ? 1 : 2);
43232 /* We can pblend the part where elements stay not in their
43233 respective lanes only when these elements are all in one
43234 half of a permutation.
43235 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43236 lanes, but both 8 and 9 >= 8
43237 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43238 respective lanes and 8 >= 8, but 2 not. */
43239 if (which != 1 && which != 2)
43240 return false;
43241 if (d->testing_p)
43242 return true;
43244 /* First we apply one operand permutation to the part where
43245 elements stay not in their respective lanes. */
43246 dcopy = *d;
43247 if (which == 2)
43248 dcopy.op0 = dcopy.op1 = d->op1;
43249 else
43250 dcopy.op0 = dcopy.op1 = d->op0;
43251 dcopy.one_operand_p = true;
43253 for (i = 0; i < nelt; ++i)
43254 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43256 ok = expand_vec_perm_1 (&dcopy);
43257 gcc_assert (ok);
43259 /* Next we put permuted elements into their positions. */
43260 dcopy1 = *d;
43261 if (which == 2)
43262 dcopy1.op1 = dcopy.target;
43263 else
43264 dcopy1.op0 = dcopy.target;
43266 for (i = 0; i < nelt; ++i)
43267 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43269 ok = expand_vec_perm_blend (&dcopy1);
43270 gcc_assert (ok);
43272 return true;
43275 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43277 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43278 a two vector permutation into a single vector permutation by using
43279 an interleave operation to merge the vectors. */
43281 static bool
43282 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43284 struct expand_vec_perm_d dremap, dfinal;
43285 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43286 unsigned HOST_WIDE_INT contents;
43287 unsigned char remap[2 * MAX_VECT_LEN];
43288 rtx seq;
43289 bool ok, same_halves = false;
43291 if (GET_MODE_SIZE (d->vmode) == 16)
43293 if (d->one_operand_p)
43294 return false;
43296 else if (GET_MODE_SIZE (d->vmode) == 32)
43298 if (!TARGET_AVX)
43299 return false;
43300 /* For 32-byte modes allow even d->one_operand_p.
43301 The lack of cross-lane shuffling in some instructions
43302 might prevent a single insn shuffle. */
43303 dfinal = *d;
43304 dfinal.testing_p = true;
43305 /* If expand_vec_perm_interleave3 can expand this into
43306 a 3 insn sequence, give up and let it be expanded as
43307 3 insn sequence. While that is one insn longer,
43308 it doesn't need a memory operand and in the common
43309 case that both interleave low and high permutations
43310 with the same operands are adjacent needs 4 insns
43311 for both after CSE. */
43312 if (expand_vec_perm_interleave3 (&dfinal))
43313 return false;
43315 else
43316 return false;
43318 /* Examine from whence the elements come. */
43319 contents = 0;
43320 for (i = 0; i < nelt; ++i)
43321 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43323 memset (remap, 0xff, sizeof (remap));
43324 dremap = *d;
43326 if (GET_MODE_SIZE (d->vmode) == 16)
43328 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43330 /* Split the two input vectors into 4 halves. */
43331 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43332 h2 = h1 << nelt2;
43333 h3 = h2 << nelt2;
43334 h4 = h3 << nelt2;
43336 /* If the elements from the low halves use interleave low, and similarly
43337 for interleave high. If the elements are from mis-matched halves, we
43338 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43339 if ((contents & (h1 | h3)) == contents)
43341 /* punpckl* */
43342 for (i = 0; i < nelt2; ++i)
43344 remap[i] = i * 2;
43345 remap[i + nelt] = i * 2 + 1;
43346 dremap.perm[i * 2] = i;
43347 dremap.perm[i * 2 + 1] = i + nelt;
43349 if (!TARGET_SSE2 && d->vmode == V4SImode)
43350 dremap.vmode = V4SFmode;
43352 else if ((contents & (h2 | h4)) == contents)
43354 /* punpckh* */
43355 for (i = 0; i < nelt2; ++i)
43357 remap[i + nelt2] = i * 2;
43358 remap[i + nelt + nelt2] = i * 2 + 1;
43359 dremap.perm[i * 2] = i + nelt2;
43360 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43362 if (!TARGET_SSE2 && d->vmode == V4SImode)
43363 dremap.vmode = V4SFmode;
43365 else if ((contents & (h1 | h4)) == contents)
43367 /* shufps */
43368 for (i = 0; i < nelt2; ++i)
43370 remap[i] = i;
43371 remap[i + nelt + nelt2] = i + nelt2;
43372 dremap.perm[i] = i;
43373 dremap.perm[i + nelt2] = i + nelt + nelt2;
43375 if (nelt != 4)
43377 /* shufpd */
43378 dremap.vmode = V2DImode;
43379 dremap.nelt = 2;
43380 dremap.perm[0] = 0;
43381 dremap.perm[1] = 3;
43384 else if ((contents & (h2 | h3)) == contents)
43386 /* shufps */
43387 for (i = 0; i < nelt2; ++i)
43389 remap[i + nelt2] = i;
43390 remap[i + nelt] = i + nelt2;
43391 dremap.perm[i] = i + nelt2;
43392 dremap.perm[i + nelt2] = i + nelt;
43394 if (nelt != 4)
43396 /* shufpd */
43397 dremap.vmode = V2DImode;
43398 dremap.nelt = 2;
43399 dremap.perm[0] = 1;
43400 dremap.perm[1] = 2;
43403 else
43404 return false;
43406 else
43408 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43409 unsigned HOST_WIDE_INT q[8];
43410 unsigned int nonzero_halves[4];
43412 /* Split the two input vectors into 8 quarters. */
43413 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43414 for (i = 1; i < 8; ++i)
43415 q[i] = q[0] << (nelt4 * i);
43416 for (i = 0; i < 4; ++i)
43417 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43419 nonzero_halves[nzcnt] = i;
43420 ++nzcnt;
43423 if (nzcnt == 1)
43425 gcc_assert (d->one_operand_p);
43426 nonzero_halves[1] = nonzero_halves[0];
43427 same_halves = true;
43429 else if (d->one_operand_p)
43431 gcc_assert (nonzero_halves[0] == 0);
43432 gcc_assert (nonzero_halves[1] == 1);
43435 if (nzcnt <= 2)
43437 if (d->perm[0] / nelt2 == nonzero_halves[1])
43439 /* Attempt to increase the likelihood that dfinal
43440 shuffle will be intra-lane. */
43441 char tmph = nonzero_halves[0];
43442 nonzero_halves[0] = nonzero_halves[1];
43443 nonzero_halves[1] = tmph;
43446 /* vperm2f128 or vperm2i128. */
43447 for (i = 0; i < nelt2; ++i)
43449 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43450 remap[i + nonzero_halves[0] * nelt2] = i;
43451 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43452 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43455 if (d->vmode != V8SFmode
43456 && d->vmode != V4DFmode
43457 && d->vmode != V8SImode)
43459 dremap.vmode = V8SImode;
43460 dremap.nelt = 8;
43461 for (i = 0; i < 4; ++i)
43463 dremap.perm[i] = i + nonzero_halves[0] * 4;
43464 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43468 else if (d->one_operand_p)
43469 return false;
43470 else if (TARGET_AVX2
43471 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43473 /* vpunpckl* */
43474 for (i = 0; i < nelt4; ++i)
43476 remap[i] = i * 2;
43477 remap[i + nelt] = i * 2 + 1;
43478 remap[i + nelt2] = i * 2 + nelt2;
43479 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43480 dremap.perm[i * 2] = i;
43481 dremap.perm[i * 2 + 1] = i + nelt;
43482 dremap.perm[i * 2 + nelt2] = i + nelt2;
43483 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43486 else if (TARGET_AVX2
43487 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43489 /* vpunpckh* */
43490 for (i = 0; i < nelt4; ++i)
43492 remap[i + nelt4] = i * 2;
43493 remap[i + nelt + nelt4] = i * 2 + 1;
43494 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43495 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43496 dremap.perm[i * 2] = i + nelt4;
43497 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43498 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43499 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43502 else
43503 return false;
43506 /* Use the remapping array set up above to move the elements from their
43507 swizzled locations into their final destinations. */
43508 dfinal = *d;
43509 for (i = 0; i < nelt; ++i)
43511 unsigned e = remap[d->perm[i]];
43512 gcc_assert (e < nelt);
43513 /* If same_halves is true, both halves of the remapped vector are the
43514 same. Avoid cross-lane accesses if possible. */
43515 if (same_halves && i >= nelt2)
43517 gcc_assert (e < nelt2);
43518 dfinal.perm[i] = e + nelt2;
43520 else
43521 dfinal.perm[i] = e;
43523 if (!d->testing_p)
43525 dremap.target = gen_reg_rtx (dremap.vmode);
43526 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43528 dfinal.op1 = dfinal.op0;
43529 dfinal.one_operand_p = true;
43531 /* Test if the final remap can be done with a single insn. For V4SFmode or
43532 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43533 start_sequence ();
43534 ok = expand_vec_perm_1 (&dfinal);
43535 seq = get_insns ();
43536 end_sequence ();
43538 if (!ok)
43539 return false;
43541 if (d->testing_p)
43542 return true;
43544 if (dremap.vmode != dfinal.vmode)
43546 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43547 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43550 ok = expand_vec_perm_1 (&dremap);
43551 gcc_assert (ok);
43553 emit_insn (seq);
43554 return true;
43557 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43558 a single vector cross-lane permutation into vpermq followed
43559 by any of the single insn permutations. */
43561 static bool
43562 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43564 struct expand_vec_perm_d dremap, dfinal;
43565 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43566 unsigned contents[2];
43567 bool ok;
43569 if (!(TARGET_AVX2
43570 && (d->vmode == V32QImode || d->vmode == V16HImode)
43571 && d->one_operand_p))
43572 return false;
43574 contents[0] = 0;
43575 contents[1] = 0;
43576 for (i = 0; i < nelt2; ++i)
43578 contents[0] |= 1u << (d->perm[i] / nelt4);
43579 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43582 for (i = 0; i < 2; ++i)
43584 unsigned int cnt = 0;
43585 for (j = 0; j < 4; ++j)
43586 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43587 return false;
43590 if (d->testing_p)
43591 return true;
43593 dremap = *d;
43594 dremap.vmode = V4DImode;
43595 dremap.nelt = 4;
43596 dremap.target = gen_reg_rtx (V4DImode);
43597 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43598 dremap.op1 = dremap.op0;
43599 dremap.one_operand_p = true;
43600 for (i = 0; i < 2; ++i)
43602 unsigned int cnt = 0;
43603 for (j = 0; j < 4; ++j)
43604 if ((contents[i] & (1u << j)) != 0)
43605 dremap.perm[2 * i + cnt++] = j;
43606 for (; cnt < 2; ++cnt)
43607 dremap.perm[2 * i + cnt] = 0;
43610 dfinal = *d;
43611 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43612 dfinal.op1 = dfinal.op0;
43613 dfinal.one_operand_p = true;
43614 for (i = 0, j = 0; i < nelt; ++i)
43616 if (i == nelt2)
43617 j = 2;
43618 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43619 if ((d->perm[i] / nelt4) == dremap.perm[j])
43621 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43622 dfinal.perm[i] |= nelt4;
43623 else
43624 gcc_unreachable ();
43627 ok = expand_vec_perm_1 (&dremap);
43628 gcc_assert (ok);
43630 ok = expand_vec_perm_1 (&dfinal);
43631 gcc_assert (ok);
43633 return true;
43636 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43637 a vector permutation using two instructions, vperm2f128 resp.
43638 vperm2i128 followed by any single in-lane permutation. */
43640 static bool
43641 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43643 struct expand_vec_perm_d dfirst, dsecond;
43644 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43645 bool ok;
43647 if (!TARGET_AVX
43648 || GET_MODE_SIZE (d->vmode) != 32
43649 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43650 return false;
43652 dsecond = *d;
43653 dsecond.one_operand_p = false;
43654 dsecond.testing_p = true;
43656 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43657 immediate. For perm < 16 the second permutation uses
43658 d->op0 as first operand, for perm >= 16 it uses d->op1
43659 as first operand. The second operand is the result of
43660 vperm2[fi]128. */
43661 for (perm = 0; perm < 32; perm++)
43663 /* Ignore permutations which do not move anything cross-lane. */
43664 if (perm < 16)
43666 /* The second shuffle for e.g. V4DFmode has
43667 0123 and ABCD operands.
43668 Ignore AB23, as 23 is already in the second lane
43669 of the first operand. */
43670 if ((perm & 0xc) == (1 << 2)) continue;
43671 /* And 01CD, as 01 is in the first lane of the first
43672 operand. */
43673 if ((perm & 3) == 0) continue;
43674 /* And 4567, as then the vperm2[fi]128 doesn't change
43675 anything on the original 4567 second operand. */
43676 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43678 else
43680 /* The second shuffle for e.g. V4DFmode has
43681 4567 and ABCD operands.
43682 Ignore AB67, as 67 is already in the second lane
43683 of the first operand. */
43684 if ((perm & 0xc) == (3 << 2)) continue;
43685 /* And 45CD, as 45 is in the first lane of the first
43686 operand. */
43687 if ((perm & 3) == 2) continue;
43688 /* And 0123, as then the vperm2[fi]128 doesn't change
43689 anything on the original 0123 first operand. */
43690 if ((perm & 0xf) == (1 << 2)) continue;
43693 for (i = 0; i < nelt; i++)
43695 j = d->perm[i] / nelt2;
43696 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43697 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43698 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43699 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43700 else
43701 break;
43704 if (i == nelt)
43706 start_sequence ();
43707 ok = expand_vec_perm_1 (&dsecond);
43708 end_sequence ();
43710 else
43711 ok = false;
43713 if (ok)
43715 if (d->testing_p)
43716 return true;
43718 /* Found a usable second shuffle. dfirst will be
43719 vperm2f128 on d->op0 and d->op1. */
43720 dsecond.testing_p = false;
43721 dfirst = *d;
43722 dfirst.target = gen_reg_rtx (d->vmode);
43723 for (i = 0; i < nelt; i++)
43724 dfirst.perm[i] = (i & (nelt2 - 1))
43725 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43727 ok = expand_vec_perm_1 (&dfirst);
43728 gcc_assert (ok);
43730 /* And dsecond is some single insn shuffle, taking
43731 d->op0 and result of vperm2f128 (if perm < 16) or
43732 d->op1 and result of vperm2f128 (otherwise). */
43733 dsecond.op1 = dfirst.target;
43734 if (perm >= 16)
43735 dsecond.op0 = dfirst.op1;
43737 ok = expand_vec_perm_1 (&dsecond);
43738 gcc_assert (ok);
43740 return true;
43743 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43744 if (d->one_operand_p)
43745 return false;
43748 return false;
43751 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43752 a two vector permutation using 2 intra-lane interleave insns
43753 and cross-lane shuffle for 32-byte vectors. */
43755 static bool
43756 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43758 unsigned i, nelt;
43759 rtx (*gen) (rtx, rtx, rtx);
43761 if (d->one_operand_p)
43762 return false;
43763 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43765 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43767 else
43768 return false;
43770 nelt = d->nelt;
43771 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43772 return false;
43773 for (i = 0; i < nelt; i += 2)
43774 if (d->perm[i] != d->perm[0] + i / 2
43775 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43776 return false;
43778 if (d->testing_p)
43779 return true;
43781 switch (d->vmode)
43783 case V32QImode:
43784 if (d->perm[0])
43785 gen = gen_vec_interleave_highv32qi;
43786 else
43787 gen = gen_vec_interleave_lowv32qi;
43788 break;
43789 case V16HImode:
43790 if (d->perm[0])
43791 gen = gen_vec_interleave_highv16hi;
43792 else
43793 gen = gen_vec_interleave_lowv16hi;
43794 break;
43795 case V8SImode:
43796 if (d->perm[0])
43797 gen = gen_vec_interleave_highv8si;
43798 else
43799 gen = gen_vec_interleave_lowv8si;
43800 break;
43801 case V4DImode:
43802 if (d->perm[0])
43803 gen = gen_vec_interleave_highv4di;
43804 else
43805 gen = gen_vec_interleave_lowv4di;
43806 break;
43807 case V8SFmode:
43808 if (d->perm[0])
43809 gen = gen_vec_interleave_highv8sf;
43810 else
43811 gen = gen_vec_interleave_lowv8sf;
43812 break;
43813 case V4DFmode:
43814 if (d->perm[0])
43815 gen = gen_vec_interleave_highv4df;
43816 else
43817 gen = gen_vec_interleave_lowv4df;
43818 break;
43819 default:
43820 gcc_unreachable ();
43823 emit_insn (gen (d->target, d->op0, d->op1));
43824 return true;
43827 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43828 a single vector permutation using a single intra-lane vector
43829 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43830 the non-swapped and swapped vectors together. */
43832 static bool
43833 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43835 struct expand_vec_perm_d dfirst, dsecond;
43836 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43837 rtx seq;
43838 bool ok;
43839 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43841 if (!TARGET_AVX
43842 || TARGET_AVX2
43843 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43844 || !d->one_operand_p)
43845 return false;
43847 dfirst = *d;
43848 for (i = 0; i < nelt; i++)
43849 dfirst.perm[i] = 0xff;
43850 for (i = 0, msk = 0; i < nelt; i++)
43852 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43853 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43854 return false;
43855 dfirst.perm[j] = d->perm[i];
43856 if (j != i)
43857 msk |= (1 << i);
43859 for (i = 0; i < nelt; i++)
43860 if (dfirst.perm[i] == 0xff)
43861 dfirst.perm[i] = i;
43863 if (!d->testing_p)
43864 dfirst.target = gen_reg_rtx (dfirst.vmode);
43866 start_sequence ();
43867 ok = expand_vec_perm_1 (&dfirst);
43868 seq = get_insns ();
43869 end_sequence ();
43871 if (!ok)
43872 return false;
43874 if (d->testing_p)
43875 return true;
43877 emit_insn (seq);
43879 dsecond = *d;
43880 dsecond.op0 = dfirst.target;
43881 dsecond.op1 = dfirst.target;
43882 dsecond.one_operand_p = true;
43883 dsecond.target = gen_reg_rtx (dsecond.vmode);
43884 for (i = 0; i < nelt; i++)
43885 dsecond.perm[i] = i ^ nelt2;
43887 ok = expand_vec_perm_1 (&dsecond);
43888 gcc_assert (ok);
43890 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43891 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43892 return true;
43895 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43896 permutation using two vperm2f128, followed by a vshufpd insn blending
43897 the two vectors together. */
43899 static bool
43900 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43902 struct expand_vec_perm_d dfirst, dsecond, dthird;
43903 bool ok;
43905 if (!TARGET_AVX || (d->vmode != V4DFmode))
43906 return false;
43908 if (d->testing_p)
43909 return true;
43911 dfirst = *d;
43912 dsecond = *d;
43913 dthird = *d;
43915 dfirst.perm[0] = (d->perm[0] & ~1);
43916 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43917 dfirst.perm[2] = (d->perm[2] & ~1);
43918 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43919 dsecond.perm[0] = (d->perm[1] & ~1);
43920 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43921 dsecond.perm[2] = (d->perm[3] & ~1);
43922 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43923 dthird.perm[0] = (d->perm[0] % 2);
43924 dthird.perm[1] = (d->perm[1] % 2) + 4;
43925 dthird.perm[2] = (d->perm[2] % 2) + 2;
43926 dthird.perm[3] = (d->perm[3] % 2) + 6;
43928 dfirst.target = gen_reg_rtx (dfirst.vmode);
43929 dsecond.target = gen_reg_rtx (dsecond.vmode);
43930 dthird.op0 = dfirst.target;
43931 dthird.op1 = dsecond.target;
43932 dthird.one_operand_p = false;
43934 canonicalize_perm (&dfirst);
43935 canonicalize_perm (&dsecond);
43937 ok = expand_vec_perm_1 (&dfirst)
43938 && expand_vec_perm_1 (&dsecond)
43939 && expand_vec_perm_1 (&dthird);
43941 gcc_assert (ok);
43943 return true;
43946 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43947 permutation with two pshufb insns and an ior. We should have already
43948 failed all two instruction sequences. */
43950 static bool
43951 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43953 rtx rperm[2][16], vperm, l, h, op, m128;
43954 unsigned int i, nelt, eltsz;
43956 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43957 return false;
43958 gcc_assert (!d->one_operand_p);
43960 if (d->testing_p)
43961 return true;
43963 nelt = d->nelt;
43964 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43966 /* Generate two permutation masks. If the required element is within
43967 the given vector it is shuffled into the proper lane. If the required
43968 element is in the other vector, force a zero into the lane by setting
43969 bit 7 in the permutation mask. */
43970 m128 = GEN_INT (-128);
43971 for (i = 0; i < nelt; ++i)
43973 unsigned j, e = d->perm[i];
43974 unsigned which = (e >= nelt);
43975 if (e >= nelt)
43976 e -= nelt;
43978 for (j = 0; j < eltsz; ++j)
43980 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43981 rperm[1-which][i*eltsz + j] = m128;
43985 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43986 vperm = force_reg (V16QImode, vperm);
43988 l = gen_reg_rtx (V16QImode);
43989 op = gen_lowpart (V16QImode, d->op0);
43990 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43992 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43993 vperm = force_reg (V16QImode, vperm);
43995 h = gen_reg_rtx (V16QImode);
43996 op = gen_lowpart (V16QImode, d->op1);
43997 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43999 op = d->target;
44000 if (d->vmode != V16QImode)
44001 op = gen_reg_rtx (V16QImode);
44002 emit_insn (gen_iorv16qi3 (op, l, h));
44003 if (op != d->target)
44004 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44006 return true;
44009 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44010 with two vpshufb insns, vpermq and vpor. We should have already failed
44011 all two or three instruction sequences. */
44013 static bool
44014 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44016 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44017 unsigned int i, nelt, eltsz;
44019 if (!TARGET_AVX2
44020 || !d->one_operand_p
44021 || (d->vmode != V32QImode && d->vmode != V16HImode))
44022 return false;
44024 if (d->testing_p)
44025 return true;
44027 nelt = d->nelt;
44028 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44030 /* Generate two permutation masks. If the required element is within
44031 the same lane, it is shuffled in. If the required element from the
44032 other lane, force a zero by setting bit 7 in the permutation mask.
44033 In the other mask the mask has non-negative elements if element
44034 is requested from the other lane, but also moved to the other lane,
44035 so that the result of vpshufb can have the two V2TImode halves
44036 swapped. */
44037 m128 = GEN_INT (-128);
44038 for (i = 0; i < nelt; ++i)
44040 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44041 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44043 for (j = 0; j < eltsz; ++j)
44045 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44046 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44050 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44051 vperm = force_reg (V32QImode, vperm);
44053 h = gen_reg_rtx (V32QImode);
44054 op = gen_lowpart (V32QImode, d->op0);
44055 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44057 /* Swap the 128-byte lanes of h into hp. */
44058 hp = gen_reg_rtx (V4DImode);
44059 op = gen_lowpart (V4DImode, h);
44060 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44061 const1_rtx));
44063 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44064 vperm = force_reg (V32QImode, vperm);
44066 l = gen_reg_rtx (V32QImode);
44067 op = gen_lowpart (V32QImode, d->op0);
44068 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44070 op = d->target;
44071 if (d->vmode != V32QImode)
44072 op = gen_reg_rtx (V32QImode);
44073 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44074 if (op != d->target)
44075 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44077 return true;
44080 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44081 and extract-odd permutations of two V32QImode and V16QImode operand
44082 with two vpshufb insns, vpor and vpermq. We should have already
44083 failed all two or three instruction sequences. */
44085 static bool
44086 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44088 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44089 unsigned int i, nelt, eltsz;
44091 if (!TARGET_AVX2
44092 || d->one_operand_p
44093 || (d->vmode != V32QImode && d->vmode != V16HImode))
44094 return false;
44096 for (i = 0; i < d->nelt; ++i)
44097 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44098 return false;
44100 if (d->testing_p)
44101 return true;
44103 nelt = d->nelt;
44104 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44106 /* Generate two permutation masks. In the first permutation mask
44107 the first quarter will contain indexes for the first half
44108 of the op0, the second quarter will contain bit 7 set, third quarter
44109 will contain indexes for the second half of the op0 and the
44110 last quarter bit 7 set. In the second permutation mask
44111 the first quarter will contain bit 7 set, the second quarter
44112 indexes for the first half of the op1, the third quarter bit 7 set
44113 and last quarter indexes for the second half of the op1.
44114 I.e. the first mask e.g. for V32QImode extract even will be:
44115 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44116 (all values masked with 0xf except for -128) and second mask
44117 for extract even will be
44118 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44119 m128 = GEN_INT (-128);
44120 for (i = 0; i < nelt; ++i)
44122 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44123 unsigned which = d->perm[i] >= nelt;
44124 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44126 for (j = 0; j < eltsz; ++j)
44128 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44129 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44133 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44134 vperm = force_reg (V32QImode, vperm);
44136 l = gen_reg_rtx (V32QImode);
44137 op = gen_lowpart (V32QImode, d->op0);
44138 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44140 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44141 vperm = force_reg (V32QImode, vperm);
44143 h = gen_reg_rtx (V32QImode);
44144 op = gen_lowpart (V32QImode, d->op1);
44145 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44147 ior = gen_reg_rtx (V32QImode);
44148 emit_insn (gen_iorv32qi3 (ior, l, h));
44150 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44151 op = gen_reg_rtx (V4DImode);
44152 ior = gen_lowpart (V4DImode, ior);
44153 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44154 const1_rtx, GEN_INT (3)));
44155 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44157 return true;
44160 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44161 and extract-odd permutations. */
44163 static bool
44164 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44166 rtx t1, t2, t3, t4, t5;
44168 switch (d->vmode)
44170 case V4DFmode:
44171 if (d->testing_p)
44172 break;
44173 t1 = gen_reg_rtx (V4DFmode);
44174 t2 = gen_reg_rtx (V4DFmode);
44176 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44177 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44178 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44180 /* Now an unpck[lh]pd will produce the result required. */
44181 if (odd)
44182 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44183 else
44184 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44185 emit_insn (t3);
44186 break;
44188 case V8SFmode:
44190 int mask = odd ? 0xdd : 0x88;
44192 if (d->testing_p)
44193 break;
44194 t1 = gen_reg_rtx (V8SFmode);
44195 t2 = gen_reg_rtx (V8SFmode);
44196 t3 = gen_reg_rtx (V8SFmode);
44198 /* Shuffle within the 128-bit lanes to produce:
44199 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44200 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44201 GEN_INT (mask)));
44203 /* Shuffle the lanes around to produce:
44204 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44205 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44206 GEN_INT (0x3)));
44208 /* Shuffle within the 128-bit lanes to produce:
44209 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44210 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44212 /* Shuffle within the 128-bit lanes to produce:
44213 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44214 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44216 /* Shuffle the lanes around to produce:
44217 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44218 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44219 GEN_INT (0x20)));
44221 break;
44223 case V2DFmode:
44224 case V4SFmode:
44225 case V2DImode:
44226 case V4SImode:
44227 /* These are always directly implementable by expand_vec_perm_1. */
44228 gcc_unreachable ();
44230 case V8HImode:
44231 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44232 return expand_vec_perm_pshufb2 (d);
44233 else
44235 if (d->testing_p)
44236 break;
44237 /* We need 2*log2(N)-1 operations to achieve odd/even
44238 with interleave. */
44239 t1 = gen_reg_rtx (V8HImode);
44240 t2 = gen_reg_rtx (V8HImode);
44241 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44242 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44243 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44244 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44245 if (odd)
44246 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44247 else
44248 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44249 emit_insn (t3);
44251 break;
44253 case V16QImode:
44254 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44255 return expand_vec_perm_pshufb2 (d);
44256 else
44258 if (d->testing_p)
44259 break;
44260 t1 = gen_reg_rtx (V16QImode);
44261 t2 = gen_reg_rtx (V16QImode);
44262 t3 = gen_reg_rtx (V16QImode);
44263 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44264 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44265 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44266 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44267 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44268 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44269 if (odd)
44270 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44271 else
44272 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44273 emit_insn (t3);
44275 break;
44277 case V16HImode:
44278 case V32QImode:
44279 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44281 case V4DImode:
44282 if (!TARGET_AVX2)
44284 struct expand_vec_perm_d d_copy = *d;
44285 d_copy.vmode = V4DFmode;
44286 if (d->testing_p)
44287 d_copy.target = gen_lowpart (V4DFmode, d->target);
44288 else
44289 d_copy.target = gen_reg_rtx (V4DFmode);
44290 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44291 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44292 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44294 if (!d->testing_p)
44295 emit_move_insn (d->target,
44296 gen_lowpart (V4DImode, d_copy.target));
44297 return true;
44299 return false;
44302 if (d->testing_p)
44303 break;
44305 t1 = gen_reg_rtx (V4DImode);
44306 t2 = gen_reg_rtx (V4DImode);
44308 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44309 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44310 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44312 /* Now an vpunpck[lh]qdq will produce the result required. */
44313 if (odd)
44314 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44315 else
44316 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44317 emit_insn (t3);
44318 break;
44320 case V8SImode:
44321 if (!TARGET_AVX2)
44323 struct expand_vec_perm_d d_copy = *d;
44324 d_copy.vmode = V8SFmode;
44325 if (d->testing_p)
44326 d_copy.target = gen_lowpart (V8SFmode, d->target);
44327 else
44328 d_copy.target = gen_reg_rtx (V8SFmode);
44329 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44330 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44331 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44333 if (!d->testing_p)
44334 emit_move_insn (d->target,
44335 gen_lowpart (V8SImode, d_copy.target));
44336 return true;
44338 return false;
44341 if (d->testing_p)
44342 break;
44344 t1 = gen_reg_rtx (V8SImode);
44345 t2 = gen_reg_rtx (V8SImode);
44346 t3 = gen_reg_rtx (V4DImode);
44347 t4 = gen_reg_rtx (V4DImode);
44348 t5 = gen_reg_rtx (V4DImode);
44350 /* Shuffle the lanes around into
44351 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44352 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44353 gen_lowpart (V4DImode, d->op1),
44354 GEN_INT (0x20)));
44355 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44356 gen_lowpart (V4DImode, d->op1),
44357 GEN_INT (0x31)));
44359 /* Swap the 2nd and 3rd position in each lane into
44360 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44361 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44362 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44363 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44364 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44366 /* Now an vpunpck[lh]qdq will produce
44367 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44368 if (odd)
44369 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44370 gen_lowpart (V4DImode, t2));
44371 else
44372 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44373 gen_lowpart (V4DImode, t2));
44374 emit_insn (t3);
44375 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44376 break;
44378 default:
44379 gcc_unreachable ();
44382 return true;
44385 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44386 extract-even and extract-odd permutations. */
44388 static bool
44389 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44391 unsigned i, odd, nelt = d->nelt;
44393 odd = d->perm[0];
44394 if (odd != 0 && odd != 1)
44395 return false;
44397 for (i = 1; i < nelt; ++i)
44398 if (d->perm[i] != 2 * i + odd)
44399 return false;
44401 return expand_vec_perm_even_odd_1 (d, odd);
44404 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44405 permutations. We assume that expand_vec_perm_1 has already failed. */
44407 static bool
44408 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44410 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44411 enum machine_mode vmode = d->vmode;
44412 unsigned char perm2[4];
44413 rtx op0 = d->op0, dest;
44414 bool ok;
44416 switch (vmode)
44418 case V4DFmode:
44419 case V8SFmode:
44420 /* These are special-cased in sse.md so that we can optionally
44421 use the vbroadcast instruction. They expand to two insns
44422 if the input happens to be in a register. */
44423 gcc_unreachable ();
44425 case V2DFmode:
44426 case V2DImode:
44427 case V4SFmode:
44428 case V4SImode:
44429 /* These are always implementable using standard shuffle patterns. */
44430 gcc_unreachable ();
44432 case V8HImode:
44433 case V16QImode:
44434 /* These can be implemented via interleave. We save one insn by
44435 stopping once we have promoted to V4SImode and then use pshufd. */
44436 if (d->testing_p)
44437 return true;
44440 rtx dest;
44441 rtx (*gen) (rtx, rtx, rtx)
44442 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44443 : gen_vec_interleave_lowv8hi;
44445 if (elt >= nelt2)
44447 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44448 : gen_vec_interleave_highv8hi;
44449 elt -= nelt2;
44451 nelt2 /= 2;
44453 dest = gen_reg_rtx (vmode);
44454 emit_insn (gen (dest, op0, op0));
44455 vmode = get_mode_wider_vector (vmode);
44456 op0 = gen_lowpart (vmode, dest);
44458 while (vmode != V4SImode);
44460 memset (perm2, elt, 4);
44461 dest = gen_reg_rtx (V4SImode);
44462 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44463 gcc_assert (ok);
44464 if (!d->testing_p)
44465 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44466 return true;
44468 case V32QImode:
44469 case V16HImode:
44470 case V8SImode:
44471 case V4DImode:
44472 /* For AVX2 broadcasts of the first element vpbroadcast* or
44473 vpermq should be used by expand_vec_perm_1. */
44474 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44475 return false;
44477 default:
44478 gcc_unreachable ();
44482 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44483 broadcast permutations. */
44485 static bool
44486 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44488 unsigned i, elt, nelt = d->nelt;
44490 if (!d->one_operand_p)
44491 return false;
44493 elt = d->perm[0];
44494 for (i = 1; i < nelt; ++i)
44495 if (d->perm[i] != elt)
44496 return false;
44498 return expand_vec_perm_broadcast_1 (d);
44501 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44502 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44503 all the shorter instruction sequences. */
44505 static bool
44506 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44508 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44509 unsigned int i, nelt, eltsz;
44510 bool used[4];
44512 if (!TARGET_AVX2
44513 || d->one_operand_p
44514 || (d->vmode != V32QImode && d->vmode != V16HImode))
44515 return false;
44517 if (d->testing_p)
44518 return true;
44520 nelt = d->nelt;
44521 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44523 /* Generate 4 permutation masks. If the required element is within
44524 the same lane, it is shuffled in. If the required element from the
44525 other lane, force a zero by setting bit 7 in the permutation mask.
44526 In the other mask the mask has non-negative elements if element
44527 is requested from the other lane, but also moved to the other lane,
44528 so that the result of vpshufb can have the two V2TImode halves
44529 swapped. */
44530 m128 = GEN_INT (-128);
44531 for (i = 0; i < 32; ++i)
44533 rperm[0][i] = m128;
44534 rperm[1][i] = m128;
44535 rperm[2][i] = m128;
44536 rperm[3][i] = m128;
44538 used[0] = false;
44539 used[1] = false;
44540 used[2] = false;
44541 used[3] = false;
44542 for (i = 0; i < nelt; ++i)
44544 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44545 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44546 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44548 for (j = 0; j < eltsz; ++j)
44549 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44550 used[which] = true;
44553 for (i = 0; i < 2; ++i)
44555 if (!used[2 * i + 1])
44557 h[i] = NULL_RTX;
44558 continue;
44560 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44561 gen_rtvec_v (32, rperm[2 * i + 1]));
44562 vperm = force_reg (V32QImode, vperm);
44563 h[i] = gen_reg_rtx (V32QImode);
44564 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44565 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44568 /* Swap the 128-byte lanes of h[X]. */
44569 for (i = 0; i < 2; ++i)
44571 if (h[i] == NULL_RTX)
44572 continue;
44573 op = gen_reg_rtx (V4DImode);
44574 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44575 const2_rtx, GEN_INT (3), const0_rtx,
44576 const1_rtx));
44577 h[i] = gen_lowpart (V32QImode, op);
44580 for (i = 0; i < 2; ++i)
44582 if (!used[2 * i])
44584 l[i] = NULL_RTX;
44585 continue;
44587 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44588 vperm = force_reg (V32QImode, vperm);
44589 l[i] = gen_reg_rtx (V32QImode);
44590 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44591 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44594 for (i = 0; i < 2; ++i)
44596 if (h[i] && l[i])
44598 op = gen_reg_rtx (V32QImode);
44599 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44600 l[i] = op;
44602 else if (h[i])
44603 l[i] = h[i];
44606 gcc_assert (l[0] && l[1]);
44607 op = d->target;
44608 if (d->vmode != V32QImode)
44609 op = gen_reg_rtx (V32QImode);
44610 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44611 if (op != d->target)
44612 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44613 return true;
44616 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44617 With all of the interface bits taken care of, perform the expansion
44618 in D and return true on success. */
44620 static bool
44621 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44623 /* Try a single instruction expansion. */
44624 if (expand_vec_perm_1 (d))
44625 return true;
44627 /* Try sequences of two instructions. */
44629 if (expand_vec_perm_pshuflw_pshufhw (d))
44630 return true;
44632 if (expand_vec_perm_palignr (d))
44633 return true;
44635 if (expand_vec_perm_interleave2 (d))
44636 return true;
44638 if (expand_vec_perm_broadcast (d))
44639 return true;
44641 if (expand_vec_perm_vpermq_perm_1 (d))
44642 return true;
44644 if (expand_vec_perm_vperm2f128 (d))
44645 return true;
44647 if (expand_vec_perm_pblendv (d))
44648 return true;
44650 /* Try sequences of three instructions. */
44652 if (expand_vec_perm_2vperm2f128_vshuf (d))
44653 return true;
44655 if (expand_vec_perm_pshufb2 (d))
44656 return true;
44658 if (expand_vec_perm_interleave3 (d))
44659 return true;
44661 if (expand_vec_perm_vperm2f128_vblend (d))
44662 return true;
44664 /* Try sequences of four instructions. */
44666 if (expand_vec_perm_vpshufb2_vpermq (d))
44667 return true;
44669 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44670 return true;
44672 /* ??? Look for narrow permutations whose element orderings would
44673 allow the promotion to a wider mode. */
44675 /* ??? Look for sequences of interleave or a wider permute that place
44676 the data into the correct lanes for a half-vector shuffle like
44677 pshuf[lh]w or vpermilps. */
44679 /* ??? Look for sequences of interleave that produce the desired results.
44680 The combinatorics of punpck[lh] get pretty ugly... */
44682 if (expand_vec_perm_even_odd (d))
44683 return true;
44685 /* Even longer sequences. */
44686 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44687 return true;
44689 return false;
44692 /* If a permutation only uses one operand, make it clear. Returns true
44693 if the permutation references both operands. */
44695 static bool
44696 canonicalize_perm (struct expand_vec_perm_d *d)
44698 int i, which, nelt = d->nelt;
44700 for (i = which = 0; i < nelt; ++i)
44701 which |= (d->perm[i] < nelt ? 1 : 2);
44703 d->one_operand_p = true;
44704 switch (which)
44706 default:
44707 gcc_unreachable();
44709 case 3:
44710 if (!rtx_equal_p (d->op0, d->op1))
44712 d->one_operand_p = false;
44713 break;
44715 /* The elements of PERM do not suggest that only the first operand
44716 is used, but both operands are identical. Allow easier matching
44717 of the permutation by folding the permutation into the single
44718 input vector. */
44719 /* FALLTHRU */
44721 case 2:
44722 for (i = 0; i < nelt; ++i)
44723 d->perm[i] &= nelt - 1;
44724 d->op0 = d->op1;
44725 break;
44727 case 1:
44728 d->op1 = d->op0;
44729 break;
44732 return (which == 3);
44735 bool
44736 ix86_expand_vec_perm_const (rtx operands[4])
44738 struct expand_vec_perm_d d;
44739 unsigned char perm[MAX_VECT_LEN];
44740 int i, nelt;
44741 bool two_args;
44742 rtx sel;
44744 d.target = operands[0];
44745 d.op0 = operands[1];
44746 d.op1 = operands[2];
44747 sel = operands[3];
44749 d.vmode = GET_MODE (d.target);
44750 gcc_assert (VECTOR_MODE_P (d.vmode));
44751 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44752 d.testing_p = false;
44754 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44755 gcc_assert (XVECLEN (sel, 0) == nelt);
44756 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44758 for (i = 0; i < nelt; ++i)
44760 rtx e = XVECEXP (sel, 0, i);
44761 int ei = INTVAL (e) & (2 * nelt - 1);
44762 d.perm[i] = ei;
44763 perm[i] = ei;
44766 two_args = canonicalize_perm (&d);
44768 if (ix86_expand_vec_perm_const_1 (&d))
44769 return true;
44771 /* If the selector says both arguments are needed, but the operands are the
44772 same, the above tried to expand with one_operand_p and flattened selector.
44773 If that didn't work, retry without one_operand_p; we succeeded with that
44774 during testing. */
44775 if (two_args && d.one_operand_p)
44777 d.one_operand_p = false;
44778 memcpy (d.perm, perm, sizeof (perm));
44779 return ix86_expand_vec_perm_const_1 (&d);
44782 return false;
44785 /* Implement targetm.vectorize.vec_perm_const_ok. */
44787 static bool
44788 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44789 const unsigned char *sel)
44791 struct expand_vec_perm_d d;
44792 unsigned int i, nelt, which;
44793 bool ret;
44795 d.vmode = vmode;
44796 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44797 d.testing_p = true;
44799 /* Given sufficient ISA support we can just return true here
44800 for selected vector modes. */
44801 if (d.vmode == V16SImode || d.vmode == V16SFmode
44802 || d.vmode == V8DFmode || d.vmode == V8DImode)
44803 /* All implementable with a single vpermi2 insn. */
44804 return true;
44805 if (GET_MODE_SIZE (d.vmode) == 16)
44807 /* All implementable with a single vpperm insn. */
44808 if (TARGET_XOP)
44809 return true;
44810 /* All implementable with 2 pshufb + 1 ior. */
44811 if (TARGET_SSSE3)
44812 return true;
44813 /* All implementable with shufpd or unpck[lh]pd. */
44814 if (d.nelt == 2)
44815 return true;
44818 /* Extract the values from the vector CST into the permutation
44819 array in D. */
44820 memcpy (d.perm, sel, nelt);
44821 for (i = which = 0; i < nelt; ++i)
44823 unsigned char e = d.perm[i];
44824 gcc_assert (e < 2 * nelt);
44825 which |= (e < nelt ? 1 : 2);
44828 /* For all elements from second vector, fold the elements to first. */
44829 if (which == 2)
44830 for (i = 0; i < nelt; ++i)
44831 d.perm[i] -= nelt;
44833 /* Check whether the mask can be applied to the vector type. */
44834 d.one_operand_p = (which != 3);
44836 /* Implementable with shufps or pshufd. */
44837 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44838 return true;
44840 /* Otherwise we have to go through the motions and see if we can
44841 figure out how to generate the requested permutation. */
44842 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44843 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44844 if (!d.one_operand_p)
44845 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44847 start_sequence ();
44848 ret = ix86_expand_vec_perm_const_1 (&d);
44849 end_sequence ();
44851 return ret;
44854 void
44855 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44857 struct expand_vec_perm_d d;
44858 unsigned i, nelt;
44860 d.target = targ;
44861 d.op0 = op0;
44862 d.op1 = op1;
44863 d.vmode = GET_MODE (targ);
44864 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44865 d.one_operand_p = false;
44866 d.testing_p = false;
44868 for (i = 0; i < nelt; ++i)
44869 d.perm[i] = i * 2 + odd;
44871 /* We'll either be able to implement the permutation directly... */
44872 if (expand_vec_perm_1 (&d))
44873 return;
44875 /* ... or we use the special-case patterns. */
44876 expand_vec_perm_even_odd_1 (&d, odd);
44879 static void
44880 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44882 struct expand_vec_perm_d d;
44883 unsigned i, nelt, base;
44884 bool ok;
44886 d.target = targ;
44887 d.op0 = op0;
44888 d.op1 = op1;
44889 d.vmode = GET_MODE (targ);
44890 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44891 d.one_operand_p = false;
44892 d.testing_p = false;
44894 base = high_p ? nelt / 2 : 0;
44895 for (i = 0; i < nelt / 2; ++i)
44897 d.perm[i * 2] = i + base;
44898 d.perm[i * 2 + 1] = i + base + nelt;
44901 /* Note that for AVX this isn't one instruction. */
44902 ok = ix86_expand_vec_perm_const_1 (&d);
44903 gcc_assert (ok);
44907 /* Expand a vector operation CODE for a V*QImode in terms of the
44908 same operation on V*HImode. */
44910 void
44911 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44913 enum machine_mode qimode = GET_MODE (dest);
44914 enum machine_mode himode;
44915 rtx (*gen_il) (rtx, rtx, rtx);
44916 rtx (*gen_ih) (rtx, rtx, rtx);
44917 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44918 struct expand_vec_perm_d d;
44919 bool ok, full_interleave;
44920 bool uns_p = false;
44921 int i;
44923 switch (qimode)
44925 case V16QImode:
44926 himode = V8HImode;
44927 gen_il = gen_vec_interleave_lowv16qi;
44928 gen_ih = gen_vec_interleave_highv16qi;
44929 break;
44930 case V32QImode:
44931 himode = V16HImode;
44932 gen_il = gen_avx2_interleave_lowv32qi;
44933 gen_ih = gen_avx2_interleave_highv32qi;
44934 break;
44935 default:
44936 gcc_unreachable ();
44939 op2_l = op2_h = op2;
44940 switch (code)
44942 case MULT:
44943 /* Unpack data such that we've got a source byte in each low byte of
44944 each word. We don't care what goes into the high byte of each word.
44945 Rather than trying to get zero in there, most convenient is to let
44946 it be a copy of the low byte. */
44947 op2_l = gen_reg_rtx (qimode);
44948 op2_h = gen_reg_rtx (qimode);
44949 emit_insn (gen_il (op2_l, op2, op2));
44950 emit_insn (gen_ih (op2_h, op2, op2));
44951 /* FALLTHRU */
44953 op1_l = gen_reg_rtx (qimode);
44954 op1_h = gen_reg_rtx (qimode);
44955 emit_insn (gen_il (op1_l, op1, op1));
44956 emit_insn (gen_ih (op1_h, op1, op1));
44957 full_interleave = qimode == V16QImode;
44958 break;
44960 case ASHIFT:
44961 case LSHIFTRT:
44962 uns_p = true;
44963 /* FALLTHRU */
44964 case ASHIFTRT:
44965 op1_l = gen_reg_rtx (himode);
44966 op1_h = gen_reg_rtx (himode);
44967 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44968 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44969 full_interleave = true;
44970 break;
44971 default:
44972 gcc_unreachable ();
44975 /* Perform the operation. */
44976 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44977 1, OPTAB_DIRECT);
44978 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44979 1, OPTAB_DIRECT);
44980 gcc_assert (res_l && res_h);
44982 /* Merge the data back into the right place. */
44983 d.target = dest;
44984 d.op0 = gen_lowpart (qimode, res_l);
44985 d.op1 = gen_lowpart (qimode, res_h);
44986 d.vmode = qimode;
44987 d.nelt = GET_MODE_NUNITS (qimode);
44988 d.one_operand_p = false;
44989 d.testing_p = false;
44991 if (full_interleave)
44993 /* For SSE2, we used an full interleave, so the desired
44994 results are in the even elements. */
44995 for (i = 0; i < 32; ++i)
44996 d.perm[i] = i * 2;
44998 else
45000 /* For AVX, the interleave used above was not cross-lane. So the
45001 extraction is evens but with the second and third quarter swapped.
45002 Happily, that is even one insn shorter than even extraction. */
45003 for (i = 0; i < 32; ++i)
45004 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45007 ok = ix86_expand_vec_perm_const_1 (&d);
45008 gcc_assert (ok);
45010 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45011 gen_rtx_fmt_ee (code, qimode, op1, op2));
45014 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45015 if op is CONST_VECTOR with all odd elements equal to their
45016 preceding element. */
45018 static bool
45019 const_vector_equal_evenodd_p (rtx op)
45021 enum machine_mode mode = GET_MODE (op);
45022 int i, nunits = GET_MODE_NUNITS (mode);
45023 if (GET_CODE (op) != CONST_VECTOR
45024 || nunits != CONST_VECTOR_NUNITS (op))
45025 return false;
45026 for (i = 0; i < nunits; i += 2)
45027 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45028 return false;
45029 return true;
45032 void
45033 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45034 bool uns_p, bool odd_p)
45036 enum machine_mode mode = GET_MODE (op1);
45037 enum machine_mode wmode = GET_MODE (dest);
45038 rtx x;
45039 rtx orig_op1 = op1, orig_op2 = op2;
45041 if (!nonimmediate_operand (op1, mode))
45042 op1 = force_reg (mode, op1);
45043 if (!nonimmediate_operand (op2, mode))
45044 op2 = force_reg (mode, op2);
45046 /* We only play even/odd games with vectors of SImode. */
45047 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45049 /* If we're looking for the odd results, shift those members down to
45050 the even slots. For some cpus this is faster than a PSHUFD. */
45051 if (odd_p)
45053 /* For XOP use vpmacsdqh, but only for smult, as it is only
45054 signed. */
45055 if (TARGET_XOP && mode == V4SImode && !uns_p)
45057 x = force_reg (wmode, CONST0_RTX (wmode));
45058 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45059 return;
45062 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45063 if (!const_vector_equal_evenodd_p (orig_op1))
45064 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45065 x, NULL, 1, OPTAB_DIRECT);
45066 if (!const_vector_equal_evenodd_p (orig_op2))
45067 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45068 x, NULL, 1, OPTAB_DIRECT);
45069 op1 = gen_lowpart (mode, op1);
45070 op2 = gen_lowpart (mode, op2);
45073 if (mode == V16SImode)
45075 if (uns_p)
45076 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45077 else
45078 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45080 else if (mode == V8SImode)
45082 if (uns_p)
45083 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45084 else
45085 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45087 else if (uns_p)
45088 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45089 else if (TARGET_SSE4_1)
45090 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45091 else
45093 rtx s1, s2, t0, t1, t2;
45095 /* The easiest way to implement this without PMULDQ is to go through
45096 the motions as if we are performing a full 64-bit multiply. With
45097 the exception that we need to do less shuffling of the elements. */
45099 /* Compute the sign-extension, aka highparts, of the two operands. */
45100 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45101 op1, pc_rtx, pc_rtx);
45102 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45103 op2, pc_rtx, pc_rtx);
45105 /* Multiply LO(A) * HI(B), and vice-versa. */
45106 t1 = gen_reg_rtx (wmode);
45107 t2 = gen_reg_rtx (wmode);
45108 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45109 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45111 /* Multiply LO(A) * LO(B). */
45112 t0 = gen_reg_rtx (wmode);
45113 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45115 /* Combine and shift the highparts into place. */
45116 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45117 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45118 1, OPTAB_DIRECT);
45120 /* Combine high and low parts. */
45121 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45122 return;
45124 emit_insn (x);
45127 void
45128 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45129 bool uns_p, bool high_p)
45131 enum machine_mode wmode = GET_MODE (dest);
45132 enum machine_mode mode = GET_MODE (op1);
45133 rtx t1, t2, t3, t4, mask;
45135 switch (mode)
45137 case V4SImode:
45138 t1 = gen_reg_rtx (mode);
45139 t2 = gen_reg_rtx (mode);
45140 if (TARGET_XOP && !uns_p)
45142 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45143 shuffle the elements once so that all elements are in the right
45144 place for immediate use: { A C B D }. */
45145 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45146 const1_rtx, GEN_INT (3)));
45147 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45148 const1_rtx, GEN_INT (3)));
45150 else
45152 /* Put the elements into place for the multiply. */
45153 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45154 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45155 high_p = false;
45157 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45158 break;
45160 case V8SImode:
45161 /* Shuffle the elements between the lanes. After this we
45162 have { A B E F | C D G H } for each operand. */
45163 t1 = gen_reg_rtx (V4DImode);
45164 t2 = gen_reg_rtx (V4DImode);
45165 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45166 const0_rtx, const2_rtx,
45167 const1_rtx, GEN_INT (3)));
45168 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45169 const0_rtx, const2_rtx,
45170 const1_rtx, GEN_INT (3)));
45172 /* Shuffle the elements within the lanes. After this we
45173 have { A A B B | C C D D } or { E E F F | G G H H }. */
45174 t3 = gen_reg_rtx (V8SImode);
45175 t4 = gen_reg_rtx (V8SImode);
45176 mask = GEN_INT (high_p
45177 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45178 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45179 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45180 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45182 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45183 break;
45185 case V8HImode:
45186 case V16HImode:
45187 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45188 uns_p, OPTAB_DIRECT);
45189 t2 = expand_binop (mode,
45190 uns_p ? umul_highpart_optab : smul_highpart_optab,
45191 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45192 gcc_assert (t1 && t2);
45194 t3 = gen_reg_rtx (mode);
45195 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45196 emit_move_insn (dest, gen_lowpart (wmode, t3));
45197 break;
45199 case V16QImode:
45200 case V32QImode:
45201 t1 = gen_reg_rtx (wmode);
45202 t2 = gen_reg_rtx (wmode);
45203 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45204 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45206 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45207 break;
45209 default:
45210 gcc_unreachable ();
45214 void
45215 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45217 rtx res_1, res_2, res_3, res_4;
45219 res_1 = gen_reg_rtx (V4SImode);
45220 res_2 = gen_reg_rtx (V4SImode);
45221 res_3 = gen_reg_rtx (V2DImode);
45222 res_4 = gen_reg_rtx (V2DImode);
45223 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45224 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45226 /* Move the results in element 2 down to element 1; we don't care
45227 what goes in elements 2 and 3. Then we can merge the parts
45228 back together with an interleave.
45230 Note that two other sequences were tried:
45231 (1) Use interleaves at the start instead of psrldq, which allows
45232 us to use a single shufps to merge things back at the end.
45233 (2) Use shufps here to combine the two vectors, then pshufd to
45234 put the elements in the correct order.
45235 In both cases the cost of the reformatting stall was too high
45236 and the overall sequence slower. */
45238 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45239 const0_rtx, const2_rtx,
45240 const0_rtx, const0_rtx));
45241 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45242 const0_rtx, const2_rtx,
45243 const0_rtx, const0_rtx));
45244 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45246 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45249 void
45250 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45252 enum machine_mode mode = GET_MODE (op0);
45253 rtx t1, t2, t3, t4, t5, t6;
45255 if (TARGET_XOP && mode == V2DImode)
45257 /* op1: A,B,C,D, op2: E,F,G,H */
45258 op1 = gen_lowpart (V4SImode, op1);
45259 op2 = gen_lowpart (V4SImode, op2);
45261 t1 = gen_reg_rtx (V4SImode);
45262 t2 = gen_reg_rtx (V4SImode);
45263 t3 = gen_reg_rtx (V2DImode);
45264 t4 = gen_reg_rtx (V2DImode);
45266 /* t1: B,A,D,C */
45267 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45268 GEN_INT (1),
45269 GEN_INT (0),
45270 GEN_INT (3),
45271 GEN_INT (2)));
45273 /* t2: (B*E),(A*F),(D*G),(C*H) */
45274 emit_insn (gen_mulv4si3 (t2, t1, op2));
45276 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45277 emit_insn (gen_xop_phadddq (t3, t2));
45279 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45280 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45282 /* Multiply lower parts and add all */
45283 t5 = gen_reg_rtx (V2DImode);
45284 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45285 gen_lowpart (V4SImode, op1),
45286 gen_lowpart (V4SImode, op2)));
45287 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45290 else
45292 enum machine_mode nmode;
45293 rtx (*umul) (rtx, rtx, rtx);
45295 if (mode == V2DImode)
45297 umul = gen_vec_widen_umult_even_v4si;
45298 nmode = V4SImode;
45300 else if (mode == V4DImode)
45302 umul = gen_vec_widen_umult_even_v8si;
45303 nmode = V8SImode;
45305 else if (mode == V8DImode)
45307 umul = gen_vec_widen_umult_even_v16si;
45308 nmode = V16SImode;
45310 else
45311 gcc_unreachable ();
45314 /* Multiply low parts. */
45315 t1 = gen_reg_rtx (mode);
45316 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45318 /* Shift input vectors right 32 bits so we can multiply high parts. */
45319 t6 = GEN_INT (32);
45320 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45321 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45323 /* Multiply high parts by low parts. */
45324 t4 = gen_reg_rtx (mode);
45325 t5 = gen_reg_rtx (mode);
45326 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45327 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45329 /* Combine and shift the highparts back. */
45330 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45331 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45333 /* Combine high and low parts. */
45334 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45337 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45338 gen_rtx_MULT (mode, op1, op2));
45341 /* Calculate integer abs() using only SSE2 instructions. */
45343 void
45344 ix86_expand_sse2_abs (rtx target, rtx input)
45346 enum machine_mode mode = GET_MODE (target);
45347 rtx tmp0, tmp1, x;
45349 switch (mode)
45351 /* For 32-bit signed integer X, the best way to calculate the absolute
45352 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45353 case V4SImode:
45354 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45355 GEN_INT (GET_MODE_BITSIZE
45356 (GET_MODE_INNER (mode)) - 1),
45357 NULL, 0, OPTAB_DIRECT);
45358 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45359 NULL, 0, OPTAB_DIRECT);
45360 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45361 target, 0, OPTAB_DIRECT);
45362 break;
45364 /* For 16-bit signed integer X, the best way to calculate the absolute
45365 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45366 case V8HImode:
45367 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45369 x = expand_simple_binop (mode, SMAX, tmp0, input,
45370 target, 0, OPTAB_DIRECT);
45371 break;
45373 /* For 8-bit signed integer X, the best way to calculate the absolute
45374 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45375 as SSE2 provides the PMINUB insn. */
45376 case V16QImode:
45377 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45379 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45380 target, 0, OPTAB_DIRECT);
45381 break;
45383 default:
45384 gcc_unreachable ();
45387 if (x != target)
45388 emit_move_insn (target, x);
45391 /* Expand an insert into a vector register through pinsr insn.
45392 Return true if successful. */
45394 bool
45395 ix86_expand_pinsr (rtx *operands)
45397 rtx dst = operands[0];
45398 rtx src = operands[3];
45400 unsigned int size = INTVAL (operands[1]);
45401 unsigned int pos = INTVAL (operands[2]);
45403 if (GET_CODE (dst) == SUBREG)
45405 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45406 dst = SUBREG_REG (dst);
45409 if (GET_CODE (src) == SUBREG)
45410 src = SUBREG_REG (src);
45412 switch (GET_MODE (dst))
45414 case V16QImode:
45415 case V8HImode:
45416 case V4SImode:
45417 case V2DImode:
45419 enum machine_mode srcmode, dstmode;
45420 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45422 srcmode = mode_for_size (size, MODE_INT, 0);
45424 switch (srcmode)
45426 case QImode:
45427 if (!TARGET_SSE4_1)
45428 return false;
45429 dstmode = V16QImode;
45430 pinsr = gen_sse4_1_pinsrb;
45431 break;
45433 case HImode:
45434 if (!TARGET_SSE2)
45435 return false;
45436 dstmode = V8HImode;
45437 pinsr = gen_sse2_pinsrw;
45438 break;
45440 case SImode:
45441 if (!TARGET_SSE4_1)
45442 return false;
45443 dstmode = V4SImode;
45444 pinsr = gen_sse4_1_pinsrd;
45445 break;
45447 case DImode:
45448 gcc_assert (TARGET_64BIT);
45449 if (!TARGET_SSE4_1)
45450 return false;
45451 dstmode = V2DImode;
45452 pinsr = gen_sse4_1_pinsrq;
45453 break;
45455 default:
45456 return false;
45459 rtx d = dst;
45460 if (GET_MODE (dst) != dstmode)
45461 d = gen_reg_rtx (dstmode);
45462 src = gen_lowpart (srcmode, src);
45464 pos /= size;
45466 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45467 GEN_INT (1 << pos)));
45468 if (d != dst)
45469 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45470 return true;
45473 default:
45474 return false;
45478 /* This function returns the calling abi specific va_list type node.
45479 It returns the FNDECL specific va_list type. */
45481 static tree
45482 ix86_fn_abi_va_list (tree fndecl)
45484 if (!TARGET_64BIT)
45485 return va_list_type_node;
45486 gcc_assert (fndecl != NULL_TREE);
45488 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45489 return ms_va_list_type_node;
45490 else
45491 return sysv_va_list_type_node;
45494 /* Returns the canonical va_list type specified by TYPE. If there
45495 is no valid TYPE provided, it return NULL_TREE. */
45497 static tree
45498 ix86_canonical_va_list_type (tree type)
45500 tree wtype, htype;
45502 /* Resolve references and pointers to va_list type. */
45503 if (TREE_CODE (type) == MEM_REF)
45504 type = TREE_TYPE (type);
45505 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45506 type = TREE_TYPE (type);
45507 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45508 type = TREE_TYPE (type);
45510 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45512 wtype = va_list_type_node;
45513 gcc_assert (wtype != NULL_TREE);
45514 htype = type;
45515 if (TREE_CODE (wtype) == ARRAY_TYPE)
45517 /* If va_list is an array type, the argument may have decayed
45518 to a pointer type, e.g. by being passed to another function.
45519 In that case, unwrap both types so that we can compare the
45520 underlying records. */
45521 if (TREE_CODE (htype) == ARRAY_TYPE
45522 || POINTER_TYPE_P (htype))
45524 wtype = TREE_TYPE (wtype);
45525 htype = TREE_TYPE (htype);
45528 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45529 return va_list_type_node;
45530 wtype = sysv_va_list_type_node;
45531 gcc_assert (wtype != NULL_TREE);
45532 htype = type;
45533 if (TREE_CODE (wtype) == ARRAY_TYPE)
45535 /* If va_list is an array type, the argument may have decayed
45536 to a pointer type, e.g. by being passed to another function.
45537 In that case, unwrap both types so that we can compare the
45538 underlying records. */
45539 if (TREE_CODE (htype) == ARRAY_TYPE
45540 || POINTER_TYPE_P (htype))
45542 wtype = TREE_TYPE (wtype);
45543 htype = TREE_TYPE (htype);
45546 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45547 return sysv_va_list_type_node;
45548 wtype = ms_va_list_type_node;
45549 gcc_assert (wtype != NULL_TREE);
45550 htype = type;
45551 if (TREE_CODE (wtype) == ARRAY_TYPE)
45553 /* If va_list is an array type, the argument may have decayed
45554 to a pointer type, e.g. by being passed to another function.
45555 In that case, unwrap both types so that we can compare the
45556 underlying records. */
45557 if (TREE_CODE (htype) == ARRAY_TYPE
45558 || POINTER_TYPE_P (htype))
45560 wtype = TREE_TYPE (wtype);
45561 htype = TREE_TYPE (htype);
45564 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45565 return ms_va_list_type_node;
45566 return NULL_TREE;
45568 return std_canonical_va_list_type (type);
45571 /* Iterate through the target-specific builtin types for va_list.
45572 IDX denotes the iterator, *PTREE is set to the result type of
45573 the va_list builtin, and *PNAME to its internal type.
45574 Returns zero if there is no element for this index, otherwise
45575 IDX should be increased upon the next call.
45576 Note, do not iterate a base builtin's name like __builtin_va_list.
45577 Used from c_common_nodes_and_builtins. */
45579 static int
45580 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45582 if (TARGET_64BIT)
45584 switch (idx)
45586 default:
45587 break;
45589 case 0:
45590 *ptree = ms_va_list_type_node;
45591 *pname = "__builtin_ms_va_list";
45592 return 1;
45594 case 1:
45595 *ptree = sysv_va_list_type_node;
45596 *pname = "__builtin_sysv_va_list";
45597 return 1;
45601 return 0;
45604 #undef TARGET_SCHED_DISPATCH
45605 #define TARGET_SCHED_DISPATCH has_dispatch
45606 #undef TARGET_SCHED_DISPATCH_DO
45607 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45608 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45609 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45610 #undef TARGET_SCHED_REORDER
45611 #define TARGET_SCHED_REORDER ix86_sched_reorder
45612 #undef TARGET_SCHED_ADJUST_PRIORITY
45613 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45614 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45615 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45616 ix86_dependencies_evaluation_hook
45618 /* The size of the dispatch window is the total number of bytes of
45619 object code allowed in a window. */
45620 #define DISPATCH_WINDOW_SIZE 16
45622 /* Number of dispatch windows considered for scheduling. */
45623 #define MAX_DISPATCH_WINDOWS 3
45625 /* Maximum number of instructions in a window. */
45626 #define MAX_INSN 4
45628 /* Maximum number of immediate operands in a window. */
45629 #define MAX_IMM 4
45631 /* Maximum number of immediate bits allowed in a window. */
45632 #define MAX_IMM_SIZE 128
45634 /* Maximum number of 32 bit immediates allowed in a window. */
45635 #define MAX_IMM_32 4
45637 /* Maximum number of 64 bit immediates allowed in a window. */
45638 #define MAX_IMM_64 2
45640 /* Maximum total of loads or prefetches allowed in a window. */
45641 #define MAX_LOAD 2
45643 /* Maximum total of stores allowed in a window. */
45644 #define MAX_STORE 1
45646 #undef BIG
45647 #define BIG 100
45650 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45651 enum dispatch_group {
45652 disp_no_group = 0,
45653 disp_load,
45654 disp_store,
45655 disp_load_store,
45656 disp_prefetch,
45657 disp_imm,
45658 disp_imm_32,
45659 disp_imm_64,
45660 disp_branch,
45661 disp_cmp,
45662 disp_jcc,
45663 disp_last
45666 /* Number of allowable groups in a dispatch window. It is an array
45667 indexed by dispatch_group enum. 100 is used as a big number,
45668 because the number of these kind of operations does not have any
45669 effect in dispatch window, but we need them for other reasons in
45670 the table. */
45671 static unsigned int num_allowable_groups[disp_last] = {
45672 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45675 char group_name[disp_last + 1][16] = {
45676 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45677 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45678 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45681 /* Instruction path. */
45682 enum insn_path {
45683 no_path = 0,
45684 path_single, /* Single micro op. */
45685 path_double, /* Double micro op. */
45686 path_multi, /* Instructions with more than 2 micro op.. */
45687 last_path
45690 /* sched_insn_info defines a window to the instructions scheduled in
45691 the basic block. It contains a pointer to the insn_info table and
45692 the instruction scheduled.
45694 Windows are allocated for each basic block and are linked
45695 together. */
45696 typedef struct sched_insn_info_s {
45697 rtx insn;
45698 enum dispatch_group group;
45699 enum insn_path path;
45700 int byte_len;
45701 int imm_bytes;
45702 } sched_insn_info;
45704 /* Linked list of dispatch windows. This is a two way list of
45705 dispatch windows of a basic block. It contains information about
45706 the number of uops in the window and the total number of
45707 instructions and of bytes in the object code for this dispatch
45708 window. */
45709 typedef struct dispatch_windows_s {
45710 int num_insn; /* Number of insn in the window. */
45711 int num_uops; /* Number of uops in the window. */
45712 int window_size; /* Number of bytes in the window. */
45713 int window_num; /* Window number between 0 or 1. */
45714 int num_imm; /* Number of immediates in an insn. */
45715 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45716 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45717 int imm_size; /* Total immediates in the window. */
45718 int num_loads; /* Total memory loads in the window. */
45719 int num_stores; /* Total memory stores in the window. */
45720 int violation; /* Violation exists in window. */
45721 sched_insn_info *window; /* Pointer to the window. */
45722 struct dispatch_windows_s *next;
45723 struct dispatch_windows_s *prev;
45724 } dispatch_windows;
45726 /* Immediate valuse used in an insn. */
45727 typedef struct imm_info_s
45729 int imm;
45730 int imm32;
45731 int imm64;
45732 } imm_info;
45734 static dispatch_windows *dispatch_window_list;
45735 static dispatch_windows *dispatch_window_list1;
45737 /* Get dispatch group of insn. */
45739 static enum dispatch_group
45740 get_mem_group (rtx insn)
45742 enum attr_memory memory;
45744 if (INSN_CODE (insn) < 0)
45745 return disp_no_group;
45746 memory = get_attr_memory (insn);
45747 if (memory == MEMORY_STORE)
45748 return disp_store;
45750 if (memory == MEMORY_LOAD)
45751 return disp_load;
45753 if (memory == MEMORY_BOTH)
45754 return disp_load_store;
45756 return disp_no_group;
45759 /* Return true if insn is a compare instruction. */
45761 static bool
45762 is_cmp (rtx insn)
45764 enum attr_type type;
45766 type = get_attr_type (insn);
45767 return (type == TYPE_TEST
45768 || type == TYPE_ICMP
45769 || type == TYPE_FCMP
45770 || GET_CODE (PATTERN (insn)) == COMPARE);
45773 /* Return true if a dispatch violation encountered. */
45775 static bool
45776 dispatch_violation (void)
45778 if (dispatch_window_list->next)
45779 return dispatch_window_list->next->violation;
45780 return dispatch_window_list->violation;
45783 /* Return true if insn is a branch instruction. */
45785 static bool
45786 is_branch (rtx insn)
45788 return (CALL_P (insn) || JUMP_P (insn));
45791 /* Return true if insn is a prefetch instruction. */
45793 static bool
45794 is_prefetch (rtx insn)
45796 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45799 /* This function initializes a dispatch window and the list container holding a
45800 pointer to the window. */
45802 static void
45803 init_window (int window_num)
45805 int i;
45806 dispatch_windows *new_list;
45808 if (window_num == 0)
45809 new_list = dispatch_window_list;
45810 else
45811 new_list = dispatch_window_list1;
45813 new_list->num_insn = 0;
45814 new_list->num_uops = 0;
45815 new_list->window_size = 0;
45816 new_list->next = NULL;
45817 new_list->prev = NULL;
45818 new_list->window_num = window_num;
45819 new_list->num_imm = 0;
45820 new_list->num_imm_32 = 0;
45821 new_list->num_imm_64 = 0;
45822 new_list->imm_size = 0;
45823 new_list->num_loads = 0;
45824 new_list->num_stores = 0;
45825 new_list->violation = false;
45827 for (i = 0; i < MAX_INSN; i++)
45829 new_list->window[i].insn = NULL;
45830 new_list->window[i].group = disp_no_group;
45831 new_list->window[i].path = no_path;
45832 new_list->window[i].byte_len = 0;
45833 new_list->window[i].imm_bytes = 0;
45835 return;
45838 /* This function allocates and initializes a dispatch window and the
45839 list container holding a pointer to the window. */
45841 static dispatch_windows *
45842 allocate_window (void)
45844 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45845 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45847 return new_list;
45850 /* This routine initializes the dispatch scheduling information. It
45851 initiates building dispatch scheduler tables and constructs the
45852 first dispatch window. */
45854 static void
45855 init_dispatch_sched (void)
45857 /* Allocate a dispatch list and a window. */
45858 dispatch_window_list = allocate_window ();
45859 dispatch_window_list1 = allocate_window ();
45860 init_window (0);
45861 init_window (1);
45864 /* This function returns true if a branch is detected. End of a basic block
45865 does not have to be a branch, but here we assume only branches end a
45866 window. */
45868 static bool
45869 is_end_basic_block (enum dispatch_group group)
45871 return group == disp_branch;
45874 /* This function is called when the end of a window processing is reached. */
45876 static void
45877 process_end_window (void)
45879 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45880 if (dispatch_window_list->next)
45882 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45883 gcc_assert (dispatch_window_list->window_size
45884 + dispatch_window_list1->window_size <= 48);
45885 init_window (1);
45887 init_window (0);
45890 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45891 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45892 for 48 bytes of instructions. Note that these windows are not dispatch
45893 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45895 static dispatch_windows *
45896 allocate_next_window (int window_num)
45898 if (window_num == 0)
45900 if (dispatch_window_list->next)
45901 init_window (1);
45902 init_window (0);
45903 return dispatch_window_list;
45906 dispatch_window_list->next = dispatch_window_list1;
45907 dispatch_window_list1->prev = dispatch_window_list;
45909 return dispatch_window_list1;
45912 /* Increment the number of immediate operands of an instruction. */
45914 static int
45915 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45917 if (*in_rtx == 0)
45918 return 0;
45920 switch ( GET_CODE (*in_rtx))
45922 case CONST:
45923 case SYMBOL_REF:
45924 case CONST_INT:
45925 (imm_values->imm)++;
45926 if (x86_64_immediate_operand (*in_rtx, SImode))
45927 (imm_values->imm32)++;
45928 else
45929 (imm_values->imm64)++;
45930 break;
45932 case CONST_DOUBLE:
45933 (imm_values->imm)++;
45934 (imm_values->imm64)++;
45935 break;
45937 case CODE_LABEL:
45938 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45940 (imm_values->imm)++;
45941 (imm_values->imm32)++;
45943 break;
45945 default:
45946 break;
45949 return 0;
45952 /* Compute number of immediate operands of an instruction. */
45954 static void
45955 find_constant (rtx in_rtx, imm_info *imm_values)
45957 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45958 (rtx_function) find_constant_1, (void *) imm_values);
45961 /* Return total size of immediate operands of an instruction along with number
45962 of corresponding immediate-operands. It initializes its parameters to zero
45963 befor calling FIND_CONSTANT.
45964 INSN is the input instruction. IMM is the total of immediates.
45965 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45966 bit immediates. */
45968 static int
45969 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45971 imm_info imm_values = {0, 0, 0};
45973 find_constant (insn, &imm_values);
45974 *imm = imm_values.imm;
45975 *imm32 = imm_values.imm32;
45976 *imm64 = imm_values.imm64;
45977 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45980 /* This function indicates if an operand of an instruction is an
45981 immediate. */
45983 static bool
45984 has_immediate (rtx insn)
45986 int num_imm_operand;
45987 int num_imm32_operand;
45988 int num_imm64_operand;
45990 if (insn)
45991 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45992 &num_imm64_operand);
45993 return false;
45996 /* Return single or double path for instructions. */
45998 static enum insn_path
45999 get_insn_path (rtx insn)
46001 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46003 if ((int)path == 0)
46004 return path_single;
46006 if ((int)path == 1)
46007 return path_double;
46009 return path_multi;
46012 /* Return insn dispatch group. */
46014 static enum dispatch_group
46015 get_insn_group (rtx insn)
46017 enum dispatch_group group = get_mem_group (insn);
46018 if (group)
46019 return group;
46021 if (is_branch (insn))
46022 return disp_branch;
46024 if (is_cmp (insn))
46025 return disp_cmp;
46027 if (has_immediate (insn))
46028 return disp_imm;
46030 if (is_prefetch (insn))
46031 return disp_prefetch;
46033 return disp_no_group;
46036 /* Count number of GROUP restricted instructions in a dispatch
46037 window WINDOW_LIST. */
46039 static int
46040 count_num_restricted (rtx insn, dispatch_windows *window_list)
46042 enum dispatch_group group = get_insn_group (insn);
46043 int imm_size;
46044 int num_imm_operand;
46045 int num_imm32_operand;
46046 int num_imm64_operand;
46048 if (group == disp_no_group)
46049 return 0;
46051 if (group == disp_imm)
46053 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46054 &num_imm64_operand);
46055 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46056 || num_imm_operand + window_list->num_imm > MAX_IMM
46057 || (num_imm32_operand > 0
46058 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46059 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46060 || (num_imm64_operand > 0
46061 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46062 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46063 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46064 && num_imm64_operand > 0
46065 && ((window_list->num_imm_64 > 0
46066 && window_list->num_insn >= 2)
46067 || window_list->num_insn >= 3)))
46068 return BIG;
46070 return 1;
46073 if ((group == disp_load_store
46074 && (window_list->num_loads >= MAX_LOAD
46075 || window_list->num_stores >= MAX_STORE))
46076 || ((group == disp_load
46077 || group == disp_prefetch)
46078 && window_list->num_loads >= MAX_LOAD)
46079 || (group == disp_store
46080 && window_list->num_stores >= MAX_STORE))
46081 return BIG;
46083 return 1;
46086 /* This function returns true if insn satisfies dispatch rules on the
46087 last window scheduled. */
46089 static bool
46090 fits_dispatch_window (rtx insn)
46092 dispatch_windows *window_list = dispatch_window_list;
46093 dispatch_windows *window_list_next = dispatch_window_list->next;
46094 unsigned int num_restrict;
46095 enum dispatch_group group = get_insn_group (insn);
46096 enum insn_path path = get_insn_path (insn);
46097 int sum;
46099 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46100 instructions should be given the lowest priority in the
46101 scheduling process in Haifa scheduler to make sure they will be
46102 scheduled in the same dispatch window as the reference to them. */
46103 if (group == disp_jcc || group == disp_cmp)
46104 return false;
46106 /* Check nonrestricted. */
46107 if (group == disp_no_group || group == disp_branch)
46108 return true;
46110 /* Get last dispatch window. */
46111 if (window_list_next)
46112 window_list = window_list_next;
46114 if (window_list->window_num == 1)
46116 sum = window_list->prev->window_size + window_list->window_size;
46118 if (sum == 32
46119 || (min_insn_size (insn) + sum) >= 48)
46120 /* Window 1 is full. Go for next window. */
46121 return true;
46124 num_restrict = count_num_restricted (insn, window_list);
46126 if (num_restrict > num_allowable_groups[group])
46127 return false;
46129 /* See if it fits in the first window. */
46130 if (window_list->window_num == 0)
46132 /* The first widow should have only single and double path
46133 uops. */
46134 if (path == path_double
46135 && (window_list->num_uops + 2) > MAX_INSN)
46136 return false;
46137 else if (path != path_single)
46138 return false;
46140 return true;
46143 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46144 dispatch window WINDOW_LIST. */
46146 static void
46147 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46149 int byte_len = min_insn_size (insn);
46150 int num_insn = window_list->num_insn;
46151 int imm_size;
46152 sched_insn_info *window = window_list->window;
46153 enum dispatch_group group = get_insn_group (insn);
46154 enum insn_path path = get_insn_path (insn);
46155 int num_imm_operand;
46156 int num_imm32_operand;
46157 int num_imm64_operand;
46159 if (!window_list->violation && group != disp_cmp
46160 && !fits_dispatch_window (insn))
46161 window_list->violation = true;
46163 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46164 &num_imm64_operand);
46166 /* Initialize window with new instruction. */
46167 window[num_insn].insn = insn;
46168 window[num_insn].byte_len = byte_len;
46169 window[num_insn].group = group;
46170 window[num_insn].path = path;
46171 window[num_insn].imm_bytes = imm_size;
46173 window_list->window_size += byte_len;
46174 window_list->num_insn = num_insn + 1;
46175 window_list->num_uops = window_list->num_uops + num_uops;
46176 window_list->imm_size += imm_size;
46177 window_list->num_imm += num_imm_operand;
46178 window_list->num_imm_32 += num_imm32_operand;
46179 window_list->num_imm_64 += num_imm64_operand;
46181 if (group == disp_store)
46182 window_list->num_stores += 1;
46183 else if (group == disp_load
46184 || group == disp_prefetch)
46185 window_list->num_loads += 1;
46186 else if (group == disp_load_store)
46188 window_list->num_stores += 1;
46189 window_list->num_loads += 1;
46193 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46194 If the total bytes of instructions or the number of instructions in
46195 the window exceed allowable, it allocates a new window. */
46197 static void
46198 add_to_dispatch_window (rtx insn)
46200 int byte_len;
46201 dispatch_windows *window_list;
46202 dispatch_windows *next_list;
46203 dispatch_windows *window0_list;
46204 enum insn_path path;
46205 enum dispatch_group insn_group;
46206 bool insn_fits;
46207 int num_insn;
46208 int num_uops;
46209 int window_num;
46210 int insn_num_uops;
46211 int sum;
46213 if (INSN_CODE (insn) < 0)
46214 return;
46216 byte_len = min_insn_size (insn);
46217 window_list = dispatch_window_list;
46218 next_list = window_list->next;
46219 path = get_insn_path (insn);
46220 insn_group = get_insn_group (insn);
46222 /* Get the last dispatch window. */
46223 if (next_list)
46224 window_list = dispatch_window_list->next;
46226 if (path == path_single)
46227 insn_num_uops = 1;
46228 else if (path == path_double)
46229 insn_num_uops = 2;
46230 else
46231 insn_num_uops = (int) path;
46233 /* If current window is full, get a new window.
46234 Window number zero is full, if MAX_INSN uops are scheduled in it.
46235 Window number one is full, if window zero's bytes plus window
46236 one's bytes is 32, or if the bytes of the new instruction added
46237 to the total makes it greater than 48, or it has already MAX_INSN
46238 instructions in it. */
46239 num_insn = window_list->num_insn;
46240 num_uops = window_list->num_uops;
46241 window_num = window_list->window_num;
46242 insn_fits = fits_dispatch_window (insn);
46244 if (num_insn >= MAX_INSN
46245 || num_uops + insn_num_uops > MAX_INSN
46246 || !(insn_fits))
46248 window_num = ~window_num & 1;
46249 window_list = allocate_next_window (window_num);
46252 if (window_num == 0)
46254 add_insn_window (insn, window_list, insn_num_uops);
46255 if (window_list->num_insn >= MAX_INSN
46256 && insn_group == disp_branch)
46258 process_end_window ();
46259 return;
46262 else if (window_num == 1)
46264 window0_list = window_list->prev;
46265 sum = window0_list->window_size + window_list->window_size;
46266 if (sum == 32
46267 || (byte_len + sum) >= 48)
46269 process_end_window ();
46270 window_list = dispatch_window_list;
46273 add_insn_window (insn, window_list, insn_num_uops);
46275 else
46276 gcc_unreachable ();
46278 if (is_end_basic_block (insn_group))
46280 /* End of basic block is reached do end-basic-block process. */
46281 process_end_window ();
46282 return;
46286 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46288 DEBUG_FUNCTION static void
46289 debug_dispatch_window_file (FILE *file, int window_num)
46291 dispatch_windows *list;
46292 int i;
46294 if (window_num == 0)
46295 list = dispatch_window_list;
46296 else
46297 list = dispatch_window_list1;
46299 fprintf (file, "Window #%d:\n", list->window_num);
46300 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46301 list->num_insn, list->num_uops, list->window_size);
46302 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46303 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46305 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46306 list->num_stores);
46307 fprintf (file, " insn info:\n");
46309 for (i = 0; i < MAX_INSN; i++)
46311 if (!list->window[i].insn)
46312 break;
46313 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46314 i, group_name[list->window[i].group],
46315 i, (void *)list->window[i].insn,
46316 i, list->window[i].path,
46317 i, list->window[i].byte_len,
46318 i, list->window[i].imm_bytes);
46322 /* Print to stdout a dispatch window. */
46324 DEBUG_FUNCTION void
46325 debug_dispatch_window (int window_num)
46327 debug_dispatch_window_file (stdout, window_num);
46330 /* Print INSN dispatch information to FILE. */
46332 DEBUG_FUNCTION static void
46333 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46335 int byte_len;
46336 enum insn_path path;
46337 enum dispatch_group group;
46338 int imm_size;
46339 int num_imm_operand;
46340 int num_imm32_operand;
46341 int num_imm64_operand;
46343 if (INSN_CODE (insn) < 0)
46344 return;
46346 byte_len = min_insn_size (insn);
46347 path = get_insn_path (insn);
46348 group = get_insn_group (insn);
46349 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46350 &num_imm64_operand);
46352 fprintf (file, " insn info:\n");
46353 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46354 group_name[group], path, byte_len);
46355 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46356 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46359 /* Print to STDERR the status of the ready list with respect to
46360 dispatch windows. */
46362 DEBUG_FUNCTION void
46363 debug_ready_dispatch (void)
46365 int i;
46366 int no_ready = number_in_ready ();
46368 fprintf (stdout, "Number of ready: %d\n", no_ready);
46370 for (i = 0; i < no_ready; i++)
46371 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46374 /* This routine is the driver of the dispatch scheduler. */
46376 static void
46377 do_dispatch (rtx insn, int mode)
46379 if (mode == DISPATCH_INIT)
46380 init_dispatch_sched ();
46381 else if (mode == ADD_TO_DISPATCH_WINDOW)
46382 add_to_dispatch_window (insn);
46385 /* Return TRUE if Dispatch Scheduling is supported. */
46387 static bool
46388 has_dispatch (rtx insn, int action)
46390 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46391 && flag_dispatch_scheduler)
46392 switch (action)
46394 default:
46395 return false;
46397 case IS_DISPATCH_ON:
46398 return true;
46399 break;
46401 case IS_CMP:
46402 return is_cmp (insn);
46404 case DISPATCH_VIOLATION:
46405 return dispatch_violation ();
46407 case FITS_DISPATCH_WINDOW:
46408 return fits_dispatch_window (insn);
46411 return false;
46414 /* Implementation of reassociation_width target hook used by
46415 reassoc phase to identify parallelism level in reassociated
46416 tree. Statements tree_code is passed in OPC. Arguments type
46417 is passed in MODE.
46419 Currently parallel reassociation is enabled for Atom
46420 processors only and we set reassociation width to be 2
46421 because Atom may issue up to 2 instructions per cycle.
46423 Return value should be fixed if parallel reassociation is
46424 enabled for other processors. */
46426 static int
46427 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46428 enum machine_mode mode)
46430 int res = 1;
46432 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46433 res = 2;
46434 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46435 res = 2;
46437 return res;
46440 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46441 place emms and femms instructions. */
46443 static enum machine_mode
46444 ix86_preferred_simd_mode (enum machine_mode mode)
46446 if (!TARGET_SSE)
46447 return word_mode;
46449 switch (mode)
46451 case QImode:
46452 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46453 case HImode:
46454 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46455 case SImode:
46456 return TARGET_AVX512F ? V16SImode :
46457 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46458 case DImode:
46459 return TARGET_AVX512F ? V8DImode :
46460 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46462 case SFmode:
46463 if (TARGET_AVX512F)
46464 return V16SFmode;
46465 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46466 return V8SFmode;
46467 else
46468 return V4SFmode;
46470 case DFmode:
46471 if (!TARGET_VECTORIZE_DOUBLE)
46472 return word_mode;
46473 else if (TARGET_AVX512F)
46474 return V8DFmode;
46475 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46476 return V4DFmode;
46477 else if (TARGET_SSE2)
46478 return V2DFmode;
46479 /* FALLTHRU */
46481 default:
46482 return word_mode;
46486 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46487 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46488 256bit and 128bit vectors. */
46490 static unsigned int
46491 ix86_autovectorize_vector_sizes (void)
46493 return TARGET_AVX512F ? 64 | 32 | 16 :
46494 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46499 /* Return class of registers which could be used for pseudo of MODE
46500 and of class RCLASS for spilling instead of memory. Return NO_REGS
46501 if it is not possible or non-profitable. */
46502 static reg_class_t
46503 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46505 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46506 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46507 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46508 return ALL_SSE_REGS;
46509 return NO_REGS;
46512 /* Implement targetm.vectorize.init_cost. */
46514 static void *
46515 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46517 unsigned *cost = XNEWVEC (unsigned, 3);
46518 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46519 return cost;
46522 /* Implement targetm.vectorize.add_stmt_cost. */
46524 static unsigned
46525 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46526 struct _stmt_vec_info *stmt_info, int misalign,
46527 enum vect_cost_model_location where)
46529 unsigned *cost = (unsigned *) data;
46530 unsigned retval = 0;
46532 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46533 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46535 /* Statements in an inner loop relative to the loop being
46536 vectorized are weighted more heavily. The value here is
46537 arbitrary and could potentially be improved with analysis. */
46538 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46539 count *= 50; /* FIXME. */
46541 retval = (unsigned) (count * stmt_cost);
46543 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46544 for Silvermont as it has out of order integer pipeline and can execute
46545 2 scalar instruction per tick, but has in order SIMD pipeline. */
46546 if (TARGET_SILVERMONT || TARGET_INTEL)
46547 if (stmt_info && stmt_info->stmt)
46549 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46550 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46551 retval = (retval * 17) / 10;
46554 cost[where] += retval;
46556 return retval;
46559 /* Implement targetm.vectorize.finish_cost. */
46561 static void
46562 ix86_finish_cost (void *data, unsigned *prologue_cost,
46563 unsigned *body_cost, unsigned *epilogue_cost)
46565 unsigned *cost = (unsigned *) data;
46566 *prologue_cost = cost[vect_prologue];
46567 *body_cost = cost[vect_body];
46568 *epilogue_cost = cost[vect_epilogue];
46571 /* Implement targetm.vectorize.destroy_cost_data. */
46573 static void
46574 ix86_destroy_cost_data (void *data)
46576 free (data);
46579 /* Validate target specific memory model bits in VAL. */
46581 static unsigned HOST_WIDE_INT
46582 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46584 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46585 bool strong;
46587 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46588 |MEMMODEL_MASK)
46589 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46591 warning (OPT_Winvalid_memory_model,
46592 "Unknown architecture specific memory model");
46593 return MEMMODEL_SEQ_CST;
46595 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46596 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46598 warning (OPT_Winvalid_memory_model,
46599 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46600 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46602 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46604 warning (OPT_Winvalid_memory_model,
46605 "HLE_RELEASE not used with RELEASE or stronger memory model");
46606 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46608 return val;
46611 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46612 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46613 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46614 or number of vecsize_mangle variants that should be emitted. */
46616 static int
46617 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46618 struct cgraph_simd_clone *clonei,
46619 tree base_type, int num)
46621 int ret = 1;
46623 if (clonei->simdlen
46624 && (clonei->simdlen < 2
46625 || clonei->simdlen > 16
46626 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46628 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46629 "unsupported simdlen %d", clonei->simdlen);
46630 return 0;
46633 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46634 if (TREE_CODE (ret_type) != VOID_TYPE)
46635 switch (TYPE_MODE (ret_type))
46637 case QImode:
46638 case HImode:
46639 case SImode:
46640 case DImode:
46641 case SFmode:
46642 case DFmode:
46643 /* case SCmode: */
46644 /* case DCmode: */
46645 break;
46646 default:
46647 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46648 "unsupported return type %qT for simd\n", ret_type);
46649 return 0;
46652 tree t;
46653 int i;
46655 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46656 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46657 switch (TYPE_MODE (TREE_TYPE (t)))
46659 case QImode:
46660 case HImode:
46661 case SImode:
46662 case DImode:
46663 case SFmode:
46664 case DFmode:
46665 /* case SCmode: */
46666 /* case DCmode: */
46667 break;
46668 default:
46669 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46670 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46671 return 0;
46674 if (clonei->cilk_elemental)
46676 /* Parse here processor clause. If not present, default to 'b'. */
46677 clonei->vecsize_mangle = 'b';
46679 else if (!TREE_PUBLIC (node->decl))
46681 /* If the function isn't exported, we can pick up just one ISA
46682 for the clones. */
46683 if (TARGET_AVX2)
46684 clonei->vecsize_mangle = 'd';
46685 else if (TARGET_AVX)
46686 clonei->vecsize_mangle = 'c';
46687 else
46688 clonei->vecsize_mangle = 'b';
46689 ret = 1;
46691 else
46693 clonei->vecsize_mangle = "bcd"[num];
46694 ret = 3;
46696 switch (clonei->vecsize_mangle)
46698 case 'b':
46699 clonei->vecsize_int = 128;
46700 clonei->vecsize_float = 128;
46701 break;
46702 case 'c':
46703 clonei->vecsize_int = 128;
46704 clonei->vecsize_float = 256;
46705 break;
46706 case 'd':
46707 clonei->vecsize_int = 256;
46708 clonei->vecsize_float = 256;
46709 break;
46711 if (clonei->simdlen == 0)
46713 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46714 clonei->simdlen = clonei->vecsize_int;
46715 else
46716 clonei->simdlen = clonei->vecsize_float;
46717 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46718 if (clonei->simdlen > 16)
46719 clonei->simdlen = 16;
46721 return ret;
46724 /* Add target attribute to SIMD clone NODE if needed. */
46726 static void
46727 ix86_simd_clone_adjust (struct cgraph_node *node)
46729 const char *str = NULL;
46730 gcc_assert (node->decl == cfun->decl);
46731 switch (node->simdclone->vecsize_mangle)
46733 case 'b':
46734 if (!TARGET_SSE2)
46735 str = "sse2";
46736 break;
46737 case 'c':
46738 if (!TARGET_AVX)
46739 str = "avx";
46740 break;
46741 case 'd':
46742 if (!TARGET_AVX2)
46743 str = "avx2";
46744 break;
46745 default:
46746 gcc_unreachable ();
46748 if (str == NULL)
46749 return;
46750 push_cfun (NULL);
46751 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46752 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46753 gcc_assert (ok);
46754 pop_cfun ();
46755 ix86_previous_fndecl = NULL_TREE;
46756 ix86_set_current_function (node->decl);
46759 /* If SIMD clone NODE can't be used in a vectorized loop
46760 in current function, return -1, otherwise return a badness of using it
46761 (0 if it is most desirable from vecsize_mangle point of view, 1
46762 slightly less desirable, etc.). */
46764 static int
46765 ix86_simd_clone_usable (struct cgraph_node *node)
46767 switch (node->simdclone->vecsize_mangle)
46769 case 'b':
46770 if (!TARGET_SSE2)
46771 return -1;
46772 if (!TARGET_AVX)
46773 return 0;
46774 return TARGET_AVX2 ? 2 : 1;
46775 case 'c':
46776 if (!TARGET_AVX)
46777 return -1;
46778 return TARGET_AVX2 ? 1 : 0;
46779 break;
46780 case 'd':
46781 if (!TARGET_AVX2)
46782 return -1;
46783 return 0;
46784 default:
46785 gcc_unreachable ();
46789 /* This function gives out the number of memory references.
46790 This value determines the unrolling factor for
46791 bdver3 and bdver4 architectures. */
46793 static int
46794 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46796 if (*x != NULL_RTX && MEM_P (*x))
46798 enum machine_mode mode;
46799 unsigned int n_words;
46801 mode = GET_MODE (*x);
46802 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46804 if (n_words > 4)
46805 (*mem_count)+=2;
46806 else
46807 (*mem_count)+=1;
46809 return 0;
46812 /* This function adjusts the unroll factor based on
46813 the hardware capabilities. For ex, bdver3 has
46814 a loop buffer which makes unrolling of smaller
46815 loops less important. This function decides the
46816 unroll factor using number of memory references
46817 (value 32 is used) as a heuristic. */
46819 static unsigned
46820 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46822 basic_block *bbs;
46823 rtx insn;
46824 unsigned i;
46825 unsigned mem_count = 0;
46827 if (!TARGET_ADJUST_UNROLL)
46828 return nunroll;
46830 /* Count the number of memory references within the loop body. */
46831 bbs = get_loop_body (loop);
46832 for (i = 0; i < loop->num_nodes; i++)
46834 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46835 if (NONDEBUG_INSN_P (insn))
46836 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46838 free (bbs);
46840 if (mem_count && mem_count <=32)
46841 return 32/mem_count;
46843 return nunroll;
46847 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46849 static bool
46850 ix86_float_exceptions_rounding_supported_p (void)
46852 /* For x87 floating point with standard excess precision handling,
46853 there is no adddf3 pattern (since x87 floating point only has
46854 XFmode operations) so the default hook implementation gets this
46855 wrong. */
46856 return TARGET_80387 || TARGET_SSE_MATH;
46859 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46861 static void
46862 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46864 if (!TARGET_80387 && !TARGET_SSE_MATH)
46865 return;
46866 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46867 if (TARGET_80387)
46869 tree fenv_index_type = build_index_type (size_int (6));
46870 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46871 tree fenv_var = create_tmp_var (fenv_type, NULL);
46872 mark_addressable (fenv_var);
46873 tree fenv_ptr = build_pointer_type (fenv_type);
46874 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46875 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46876 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46877 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46878 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46879 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46880 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46881 tree hold_fnclex = build_call_expr (fnclex, 0);
46882 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46883 hold_fnclex);
46884 *clear = build_call_expr (fnclex, 0);
46885 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46886 mark_addressable (sw_var);
46887 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46888 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46889 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46890 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46891 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46892 exceptions_var, exceptions_x87);
46893 *update = build2 (COMPOUND_EXPR, integer_type_node,
46894 fnstsw_call, update_mod);
46895 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46896 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46898 if (TARGET_SSE_MATH)
46900 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46901 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46902 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46903 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46904 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46905 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46906 mxcsr_orig_var, stmxcsr_hold_call);
46907 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46908 mxcsr_orig_var,
46909 build_int_cst (unsigned_type_node, 0x1f80));
46910 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46911 build_int_cst (unsigned_type_node, 0xffffffc0));
46912 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46913 mxcsr_mod_var, hold_mod_val);
46914 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46915 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46916 hold_assign_orig, hold_assign_mod);
46917 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46918 ldmxcsr_hold_call);
46919 if (*hold)
46920 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46921 else
46922 *hold = hold_all;
46923 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46924 if (*clear)
46925 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46926 ldmxcsr_clear_call);
46927 else
46928 *clear = ldmxcsr_clear_call;
46929 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46930 tree exceptions_sse = fold_convert (integer_type_node,
46931 stxmcsr_update_call);
46932 if (*update)
46934 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46935 exceptions_var, exceptions_sse);
46936 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46937 exceptions_var, exceptions_mod);
46938 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46939 exceptions_assign);
46941 else
46942 *update = build2 (MODIFY_EXPR, integer_type_node,
46943 exceptions_var, exceptions_sse);
46944 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46945 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46946 ldmxcsr_update_call);
46948 tree atomic_feraiseexcept
46949 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46950 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46951 1, exceptions_var);
46952 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46953 atomic_feraiseexcept_call);
46956 /* Initialize the GCC target structure. */
46957 #undef TARGET_RETURN_IN_MEMORY
46958 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46960 #undef TARGET_LEGITIMIZE_ADDRESS
46961 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46963 #undef TARGET_ATTRIBUTE_TABLE
46964 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46965 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46966 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46967 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46968 # undef TARGET_MERGE_DECL_ATTRIBUTES
46969 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46970 #endif
46972 #undef TARGET_COMP_TYPE_ATTRIBUTES
46973 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46975 #undef TARGET_INIT_BUILTINS
46976 #define TARGET_INIT_BUILTINS ix86_init_builtins
46977 #undef TARGET_BUILTIN_DECL
46978 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46979 #undef TARGET_EXPAND_BUILTIN
46980 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46982 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46983 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46984 ix86_builtin_vectorized_function
46986 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46987 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46989 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46990 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46992 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46993 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46995 #undef TARGET_BUILTIN_RECIPROCAL
46996 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46998 #undef TARGET_ASM_FUNCTION_EPILOGUE
46999 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47001 #undef TARGET_ENCODE_SECTION_INFO
47002 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47003 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47004 #else
47005 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47006 #endif
47008 #undef TARGET_ASM_OPEN_PAREN
47009 #define TARGET_ASM_OPEN_PAREN ""
47010 #undef TARGET_ASM_CLOSE_PAREN
47011 #define TARGET_ASM_CLOSE_PAREN ""
47013 #undef TARGET_ASM_BYTE_OP
47014 #define TARGET_ASM_BYTE_OP ASM_BYTE
47016 #undef TARGET_ASM_ALIGNED_HI_OP
47017 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47018 #undef TARGET_ASM_ALIGNED_SI_OP
47019 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47020 #ifdef ASM_QUAD
47021 #undef TARGET_ASM_ALIGNED_DI_OP
47022 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47023 #endif
47025 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47026 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47028 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47029 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47031 #undef TARGET_ASM_UNALIGNED_HI_OP
47032 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47033 #undef TARGET_ASM_UNALIGNED_SI_OP
47034 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47035 #undef TARGET_ASM_UNALIGNED_DI_OP
47036 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47038 #undef TARGET_PRINT_OPERAND
47039 #define TARGET_PRINT_OPERAND ix86_print_operand
47040 #undef TARGET_PRINT_OPERAND_ADDRESS
47041 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47042 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47043 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47044 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47045 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47047 #undef TARGET_SCHED_INIT_GLOBAL
47048 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47049 #undef TARGET_SCHED_ADJUST_COST
47050 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47051 #undef TARGET_SCHED_ISSUE_RATE
47052 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47053 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47054 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47055 ia32_multipass_dfa_lookahead
47056 #undef TARGET_SCHED_MACRO_FUSION_P
47057 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47058 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47059 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47061 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47062 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47064 #undef TARGET_MEMMODEL_CHECK
47065 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47067 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47068 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47070 #ifdef HAVE_AS_TLS
47071 #undef TARGET_HAVE_TLS
47072 #define TARGET_HAVE_TLS true
47073 #endif
47074 #undef TARGET_CANNOT_FORCE_CONST_MEM
47075 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47076 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47077 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47079 #undef TARGET_DELEGITIMIZE_ADDRESS
47080 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47082 #undef TARGET_MS_BITFIELD_LAYOUT_P
47083 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47085 #if TARGET_MACHO
47086 #undef TARGET_BINDS_LOCAL_P
47087 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47088 #endif
47089 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47090 #undef TARGET_BINDS_LOCAL_P
47091 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47092 #endif
47094 #undef TARGET_ASM_OUTPUT_MI_THUNK
47095 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47096 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47097 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47099 #undef TARGET_ASM_FILE_START
47100 #define TARGET_ASM_FILE_START x86_file_start
47102 #undef TARGET_OPTION_OVERRIDE
47103 #define TARGET_OPTION_OVERRIDE ix86_option_override
47105 #undef TARGET_REGISTER_MOVE_COST
47106 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47107 #undef TARGET_MEMORY_MOVE_COST
47108 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47109 #undef TARGET_RTX_COSTS
47110 #define TARGET_RTX_COSTS ix86_rtx_costs
47111 #undef TARGET_ADDRESS_COST
47112 #define TARGET_ADDRESS_COST ix86_address_cost
47114 #undef TARGET_FIXED_CONDITION_CODE_REGS
47115 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47116 #undef TARGET_CC_MODES_COMPATIBLE
47117 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47119 #undef TARGET_MACHINE_DEPENDENT_REORG
47120 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47122 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47123 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47125 #undef TARGET_BUILD_BUILTIN_VA_LIST
47126 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47128 #undef TARGET_FOLD_BUILTIN
47129 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47131 #undef TARGET_COMPARE_VERSION_PRIORITY
47132 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47134 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47135 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47136 ix86_generate_version_dispatcher_body
47138 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47139 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47140 ix86_get_function_versions_dispatcher
47142 #undef TARGET_ENUM_VA_LIST_P
47143 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47145 #undef TARGET_FN_ABI_VA_LIST
47146 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47148 #undef TARGET_CANONICAL_VA_LIST_TYPE
47149 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47151 #undef TARGET_EXPAND_BUILTIN_VA_START
47152 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47154 #undef TARGET_MD_ASM_CLOBBERS
47155 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47157 #undef TARGET_PROMOTE_PROTOTYPES
47158 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47159 #undef TARGET_SETUP_INCOMING_VARARGS
47160 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47161 #undef TARGET_MUST_PASS_IN_STACK
47162 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47163 #undef TARGET_FUNCTION_ARG_ADVANCE
47164 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47165 #undef TARGET_FUNCTION_ARG
47166 #define TARGET_FUNCTION_ARG ix86_function_arg
47167 #undef TARGET_FUNCTION_ARG_BOUNDARY
47168 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47169 #undef TARGET_PASS_BY_REFERENCE
47170 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47171 #undef TARGET_INTERNAL_ARG_POINTER
47172 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47173 #undef TARGET_UPDATE_STACK_BOUNDARY
47174 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47175 #undef TARGET_GET_DRAP_RTX
47176 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47177 #undef TARGET_STRICT_ARGUMENT_NAMING
47178 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47179 #undef TARGET_STATIC_CHAIN
47180 #define TARGET_STATIC_CHAIN ix86_static_chain
47181 #undef TARGET_TRAMPOLINE_INIT
47182 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47183 #undef TARGET_RETURN_POPS_ARGS
47184 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47186 #undef TARGET_LEGITIMATE_COMBINED_INSN
47187 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47189 #undef TARGET_ASAN_SHADOW_OFFSET
47190 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47192 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47193 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47195 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47196 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47198 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47199 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47201 #undef TARGET_C_MODE_FOR_SUFFIX
47202 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47204 #ifdef HAVE_AS_TLS
47205 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47206 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47207 #endif
47209 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47210 #undef TARGET_INSERT_ATTRIBUTES
47211 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47212 #endif
47214 #undef TARGET_MANGLE_TYPE
47215 #define TARGET_MANGLE_TYPE ix86_mangle_type
47217 #if !TARGET_MACHO
47218 #undef TARGET_STACK_PROTECT_FAIL
47219 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47220 #endif
47222 #undef TARGET_FUNCTION_VALUE
47223 #define TARGET_FUNCTION_VALUE ix86_function_value
47225 #undef TARGET_FUNCTION_VALUE_REGNO_P
47226 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47228 #undef TARGET_PROMOTE_FUNCTION_MODE
47229 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47231 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47232 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47234 #undef TARGET_INSTANTIATE_DECLS
47235 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47237 #undef TARGET_SECONDARY_RELOAD
47238 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47240 #undef TARGET_CLASS_MAX_NREGS
47241 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47243 #undef TARGET_PREFERRED_RELOAD_CLASS
47244 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47245 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47246 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47247 #undef TARGET_CLASS_LIKELY_SPILLED_P
47248 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47250 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47251 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47252 ix86_builtin_vectorization_cost
47253 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47254 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47255 ix86_vectorize_vec_perm_const_ok
47256 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47257 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47258 ix86_preferred_simd_mode
47259 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47260 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47261 ix86_autovectorize_vector_sizes
47262 #undef TARGET_VECTORIZE_INIT_COST
47263 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47264 #undef TARGET_VECTORIZE_ADD_STMT_COST
47265 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47266 #undef TARGET_VECTORIZE_FINISH_COST
47267 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47268 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47269 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47271 #undef TARGET_SET_CURRENT_FUNCTION
47272 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47274 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47275 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47277 #undef TARGET_OPTION_SAVE
47278 #define TARGET_OPTION_SAVE ix86_function_specific_save
47280 #undef TARGET_OPTION_RESTORE
47281 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47283 #undef TARGET_OPTION_PRINT
47284 #define TARGET_OPTION_PRINT ix86_function_specific_print
47286 #undef TARGET_OPTION_FUNCTION_VERSIONS
47287 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47289 #undef TARGET_CAN_INLINE_P
47290 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47292 #undef TARGET_EXPAND_TO_RTL_HOOK
47293 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47295 #undef TARGET_LEGITIMATE_ADDRESS_P
47296 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47298 #undef TARGET_LRA_P
47299 #define TARGET_LRA_P hook_bool_void_true
47301 #undef TARGET_REGISTER_PRIORITY
47302 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47304 #undef TARGET_REGISTER_USAGE_LEVELING_P
47305 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47307 #undef TARGET_LEGITIMATE_CONSTANT_P
47308 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47310 #undef TARGET_FRAME_POINTER_REQUIRED
47311 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47313 #undef TARGET_CAN_ELIMINATE
47314 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47316 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47317 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47319 #undef TARGET_ASM_CODE_END
47320 #define TARGET_ASM_CODE_END ix86_code_end
47322 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47323 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47325 #if TARGET_MACHO
47326 #undef TARGET_INIT_LIBFUNCS
47327 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47328 #endif
47330 #undef TARGET_LOOP_UNROLL_ADJUST
47331 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47333 #undef TARGET_SPILL_CLASS
47334 #define TARGET_SPILL_CLASS ix86_spill_class
47336 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47337 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47338 ix86_simd_clone_compute_vecsize_and_simdlen
47340 #undef TARGET_SIMD_CLONE_ADJUST
47341 #define TARGET_SIMD_CLONE_ADJUST \
47342 ix86_simd_clone_adjust
47344 #undef TARGET_SIMD_CLONE_USABLE
47345 #define TARGET_SIMD_CLONE_USABLE \
47346 ix86_simd_clone_usable
47348 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47349 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47350 ix86_float_exceptions_rounding_supported_p
47352 #undef TARGET_MODE_EMIT
47353 #define TARGET_MODE_EMIT ix86_emit_mode_set
47355 #undef TARGET_MODE_NEEDED
47356 #define TARGET_MODE_NEEDED ix86_mode_needed
47358 #undef TARGET_MODE_AFTER
47359 #define TARGET_MODE_AFTER ix86_mode_after
47361 #undef TARGET_MODE_ENTRY
47362 #define TARGET_MODE_ENTRY ix86_mode_entry
47364 #undef TARGET_MODE_EXIT
47365 #define TARGET_MODE_EXIT ix86_mode_exit
47367 #undef TARGET_MODE_PRIORITY
47368 #define TARGET_MODE_PRIORITY ix86_mode_priority
47370 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47371 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47373 struct gcc_target targetm = TARGET_INITIALIZER;
47375 #include "gt-i386.h"